From: Nick Burch Date: Mon, 20 Sep 2010 14:26:49 +0000 (+0000) Subject: Work inspired by bug #48018 - get HWPF lists more consistent in read vs write, and... X-Git-Tag: REL_3_7_BETA3~10 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=8b2f2bcff633d330eef903ab9babe7129ed1d96e;p=poi.git Work inspired by bug #48018 - get HWPF lists more consistent in read vs write, and preserve order as apparently that matters. Includes a fair number of list related unit tests, but not for everything git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@998943 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index f1b71436e1..d3eaf3f08e 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + 48018 - Improve HWPF handling of lists in documents read and then saved, by preserving order better 49820 - Fix HWPF paragraph levels, so that outline levels can be properly fetched 47271 - Avoid infinite loops on broken HWPF documents with a corrupt CHP style with a parent of itself 49936 - Handle HWPF documents with problematic HeaderStories better diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/ListLevel.java b/src/scratchpad/src/org/apache/poi/hwpf/model/ListLevel.java index de8e7afccd..d6392af776 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/ListLevel.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/ListLevel.java @@ -91,7 +91,7 @@ public final class ListLevel System.arraycopy(buf, offset, _rgbxchNums, 0, RGBXCH_NUMS_SIZE); offset += RGBXCH_NUMS_SIZE; - _ixchFollow = buf[offset++]; + _ixchFollow = buf[offset++]; _dxaSpace = LittleEndian.getInt(buf, offset); offset += LittleEndian.INT_SIZE; _dxaIndent = LittleEndian.getInt(buf, offset); @@ -216,10 +216,10 @@ public final class ListLevel LittleEndian.putShort(buf, offset, _reserved); offset += LittleEndian.SHORT_SIZE; - System.arraycopy(_grpprlChpx, 0, buf, offset, _cbGrpprlChpx); - offset += _cbGrpprlChpx; System.arraycopy(_grpprlPapx, 0, buf, offset, _cbGrpprlPapx); offset += _cbGrpprlPapx; + System.arraycopy(_grpprlChpx, 0, buf, offset, _cbGrpprlChpx); + offset += _cbGrpprlChpx; if (_numberText == null) { // TODO - write junit to test this flow diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/ListTables.java b/src/scratchpad/src/org/apache/poi/hwpf/model/ListTables.java index fd72d42270..9124c1fb20 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/ListTables.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/ListTables.java @@ -17,19 +17,22 @@ package org.apache.poi.hwpf.model; -import org.apache.poi.util.LittleEndian; -import org.apache.poi.util.POILogFactory; -import org.apache.poi.util.POILogger; - -import org.apache.poi.hwpf.model.io.*; - -import java.util.HashMap; +import java.io.ByteArrayOutputStream; +import java.io.IOException; import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; import java.util.Iterator; +import java.util.List; +import java.util.Map; import java.util.NoSuchElementException; +import java.util.Set; -import java.io.ByteArrayOutputStream; -import java.io.IOException; +import org.apache.poi.hwpf.model.io.HWPFOutputStream; +import org.apache.poi.util.LittleEndian; +import org.apache.poi.util.POILogFactory; +import org.apache.poi.util.POILogger; /** * @author Ryan Ackley @@ -40,8 +43,8 @@ public final class ListTables private static final int LIST_FORMAT_OVERRIDE_SIZE = 16; private static POILogger log = POILogFactory.getLogger(ListTables.class); - HashMap _listMap = new HashMap(); - ArrayList _overrideList = new ArrayList(); + ListMap _listMap = new ListMap(); + ArrayList _overrideList = new ArrayList(); public ListTables() { @@ -109,22 +112,17 @@ public final class ListTables public void writeListDataTo(HWPFOutputStream tableStream) throws IOException { - - Integer[] intList = (Integer[])_listMap.keySet().toArray(new Integer[0]); + int listSize = _listMap.size(); // use this stream as a buffer for the levels since their size varies. ByteArrayOutputStream levelBuf = new ByteArrayOutputStream(); - // use a byte array for the lists because we know their size. - byte[] listBuf = new byte[intList.length * LIST_DATA_SIZE]; - byte[] shortHolder = new byte[2]; - LittleEndian.putShort(shortHolder, (short)intList.length); + LittleEndian.putShort(shortHolder, (short)listSize); tableStream.write(shortHolder); - for (int x = 0; x < intList.length; x++) - { - ListData lst = (ListData)_listMap.get(intList[x]); + for(Integer x : _listMap.sortedKeys()) { + ListData lst = _listMap.get(x); tableStream.write(lst.toByteArray()); ListLevel[] lvls = lst.getLevels(); for (int y = 0; y < lvls.length; y++) @@ -150,7 +148,7 @@ public final class ListTables for (int x = 0; x < size; x++) { - ListFormatOverride lfo = (ListFormatOverride)_overrideList.get(x); + ListFormatOverride lfo = _overrideList.get(x); tableStream.write(lfo.toByteArray()); ListFormatOverrideLevel[] lfolvls = lfo.getLevelOverrides(); for (int y = 0; y < lfolvls.length; y++) @@ -164,7 +162,7 @@ public final class ListTables public ListFormatOverride getOverride(int lfoIndex) { - return (ListFormatOverride)_overrideList.get(lfoIndex - 1); + return _overrideList.get(lfoIndex - 1); } public int getOverrideIndexFromListID(int lstid) @@ -173,7 +171,7 @@ public final class ListTables int size = _overrideList.size(); for (int x = 0; x < size; x++) { - ListFormatOverride next = (ListFormatOverride)_overrideList.get(x); + ListFormatOverride next = _overrideList.get(x); if (next.getLsid() == lstid) { // 1-based index I think @@ -190,7 +188,7 @@ public final class ListTables public ListLevel getLevel(int listID, int level) { - ListData lst = (ListData)_listMap.get(Integer.valueOf(listID)); + ListData lst = _listMap.get(Integer.valueOf(listID)); if(level < lst.numLevels()) { ListLevel lvl = lst.getLevels()[level]; return lvl; @@ -201,7 +199,7 @@ public final class ListTables public ListData getListData(int listID) { - return (ListData) _listMap.get(Integer.valueOf(listID)); + return _listMap.get(Integer.valueOf(listID)); } public boolean equals(Object obj) @@ -215,12 +213,12 @@ public final class ListTables if (_listMap.size() == tables._listMap.size()) { - Iterator it = _listMap.keySet().iterator(); + Iterator it = _listMap.keySet().iterator(); while (it.hasNext()) { - Object key = it.next(); - ListData lst1 = (ListData)_listMap.get(key); - ListData lst2 = (ListData)tables._listMap.get(key); + Integer key = it.next(); + ListData lst1 = _listMap.get(key); + ListData lst2 = tables._listMap.get(key); if (!lst1.equals(lst2)) { return false; @@ -241,4 +239,70 @@ public final class ListTables } return false; } + + private static class ListMap implements Map { + private ArrayList keyList = new ArrayList(); + private HashMap parent = new HashMap(); + private ListMap() {} + + public void clear() { + keyList.clear(); + parent.clear(); + } + + public boolean containsKey(Object key) { + return parent.containsKey(key); + } + + public boolean containsValue(Object value) { + return parent.containsValue(value); + } + + public ListData get(Object key) { + return parent.get(key); + } + + public boolean isEmpty() { + return parent.isEmpty(); + } + + public ListData put(Integer key, ListData value) { + keyList.add(key); + return parent.put(key, value); + } + + public void putAll(Map map) { + for(Entry entry : map.entrySet()) { + put(entry.getKey(), entry.getValue()); + } + } + + public ListData remove(Object key) { + keyList.remove(key); + return parent.remove(key); + } + + public int size() { + return parent.size(); + } + + public Set> entrySet() { + throw new IllegalStateException("Use sortedKeys() + get() instead"); + } + + public List sortedKeys() { + return Collections.unmodifiableList(keyList); + } + public Set keySet() { + throw new IllegalStateException("Use sortedKeys() instead"); + } + + public Collection values() { + ArrayList values = new ArrayList(); + for(Integer key : keyList) { + values.add(parent.get(key)); + } + return values; + } + } } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Paragraph.java b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Paragraph.java index 87fe4d7f1d..f51576df9f 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Paragraph.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Paragraph.java @@ -434,16 +434,30 @@ public class Paragraph extends Range implements Cloneable { _papx.updateSprm(SPRM_DCS, dcs.toShort()); } + /** + * Returns the ilfo, an index to the document's hpllfo, which + * describes the automatic number formatting of the paragraph. + * A value of zero means it isn't numbered. + */ public int getIlfo() { return _props.getIlfo(); } + /** + * Returns the multi-level indent for the paragraph. Will be + * zero for non-list paragraphs, and the first level of any + * list. Subsequent levels in hold values 1-8. + */ public int getIlvl() { return _props.getIlvl(); } + /** + * Returns the heading level (1-8), or 9 if the paragraph + * isn't in a heading style. + */ public int getLvl() { return _props.getLvl(); diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestLists.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestLists.java new file mode 100644 index 0000000000..eafb18fb95 --- /dev/null +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestLists.java @@ -0,0 +1,210 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.hwpf.usermodel; + +import java.io.IOException; + +import junit.framework.TestCase; + +import org.apache.poi.hwpf.HWPFDocument; +import org.apache.poi.hwpf.HWPFTestDataSamples; + +/** + * Tests for our handling of lists + */ +public final class TestLists extends TestCase { + public void testBasics() { + HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Lists.doc"); + Range r = doc.getRange(); + + assertEquals(40, r.numParagraphs()); + assertEquals("Heading Level 1\r", r.getParagraph(0).text()); + assertEquals("This document has different lists in it for testing\r", r.getParagraph(1).text()); + assertEquals("The end!\r", r.getParagraph(38).text()); + assertEquals("\r", r.getParagraph(39).text()); + + assertEquals(0, r.getParagraph(0).getLvl()); + assertEquals(9, r.getParagraph(1).getLvl()); + assertEquals(9, r.getParagraph(38).getLvl()); + assertEquals(9, r.getParagraph(39).getLvl()); + } + + public void testUnorderedLists() { + HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Lists.doc"); + Range r = doc.getRange(); + assertEquals(40, r.numParagraphs()); + + // Normal bullet points + assertEquals("This document has different lists in it for testing\r", r.getParagraph(1).text()); + assertEquals("Unordered list 1\r", r.getParagraph(2).text()); + assertEquals("UL 2\r", r.getParagraph(3).text()); + assertEquals("UL 3\r", r.getParagraph(4).text()); + assertEquals("Next up is an ordered list:\r", r.getParagraph(5).text()); + + assertEquals(9, r.getParagraph(1).getLvl()); + assertEquals(9, r.getParagraph(2).getLvl()); + assertEquals(9, r.getParagraph(3).getLvl()); + assertEquals(9, r.getParagraph(4).getLvl()); + assertEquals(9, r.getParagraph(5).getLvl()); + + assertEquals(0, r.getParagraph(1).getIlvl()); + assertEquals(0, r.getParagraph(2).getIlvl()); + assertEquals(0, r.getParagraph(3).getIlvl()); + assertEquals(0, r.getParagraph(4).getIlvl()); + assertEquals(0, r.getParagraph(5).getIlvl()); + + // Tick bullets + assertEquals("Now for an un-ordered list with a different bullet style:\r", r.getParagraph(9).text()); + assertEquals("Tick 1\r", r.getParagraph(10).text()); + assertEquals("Tick 2\r", r.getParagraph(11).text()); + assertEquals("Multi-level un-ordered list:\r", r.getParagraph(12).text()); + + assertEquals(9, r.getParagraph(9).getLvl()); + assertEquals(9, r.getParagraph(10).getLvl()); + assertEquals(9, r.getParagraph(11).getLvl()); + assertEquals(9, r.getParagraph(12).getLvl()); + + assertEquals(0, r.getParagraph(9).getIlvl()); + assertEquals(0, r.getParagraph(10).getIlvl()); + assertEquals(0, r.getParagraph(11).getIlvl()); + assertEquals(0, r.getParagraph(12).getIlvl()); + + // TODO Test for tick not bullet + } + + public void testOrderedLists() { + HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Lists.doc"); + Range r = doc.getRange(); + assertEquals(40, r.numParagraphs()); + + assertEquals("Next up is an ordered list:\r", r.getParagraph(5).text()); + assertEquals("Ordered list 1\r", r.getParagraph(6).text()); + assertEquals("OL 2\r", r.getParagraph(7).text()); + assertEquals("OL 3\r", r.getParagraph(8).text()); + assertEquals("Now for an un-ordered list with a different bullet style:\r", r.getParagraph(9).text()); + + assertEquals(9, r.getParagraph(5).getLvl()); + assertEquals(9, r.getParagraph(6).getLvl()); + assertEquals(9, r.getParagraph(7).getLvl()); + assertEquals(9, r.getParagraph(8).getLvl()); + assertEquals(9, r.getParagraph(9).getLvl()); + + assertEquals(0, r.getParagraph(5).getIlvl()); + assertEquals(0, r.getParagraph(6).getIlvl()); + assertEquals(0, r.getParagraph(7).getIlvl()); + assertEquals(0, r.getParagraph(8).getIlvl()); + assertEquals(0, r.getParagraph(9).getIlvl()); + } + + public void testMultiLevelLists() { + HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Lists.doc"); + Range r = doc.getRange(); + assertEquals(40, r.numParagraphs()); + + assertEquals("Multi-level un-ordered list:\r", r.getParagraph(12).text()); + assertEquals("ML 1:1\r", r.getParagraph(13).text()); + assertEquals("ML 1:2\r", r.getParagraph(14).text()); + assertEquals("ML 2:1\r", r.getParagraph(15).text()); + assertEquals("ML 2:2\r", r.getParagraph(16).text()); + assertEquals("ML 2:3\r", r.getParagraph(17).text()); + assertEquals("ML 3:1\r", r.getParagraph(18).text()); + assertEquals("ML 4:1\r", r.getParagraph(19).text()); + assertEquals("ML 5:1\r", r.getParagraph(20).text()); + assertEquals("ML 5:2\r", r.getParagraph(21).text()); + assertEquals("ML 2:4\r", r.getParagraph(22).text()); + assertEquals("ML 1:3\r", r.getParagraph(23).text()); + assertEquals("Multi-level ordered list:\r", r.getParagraph(24).text()); + assertEquals("OL 1\r", r.getParagraph(25).text()); + assertEquals("OL 2\r", r.getParagraph(26).text()); + assertEquals("OL 2.1\r", r.getParagraph(27).text()); + assertEquals("OL 2.2\r", r.getParagraph(28).text()); + assertEquals("OL 2.2.1\r", r.getParagraph(29).text()); + assertEquals("OL 2.2.2\r", r.getParagraph(30).text()); + assertEquals("OL 2.2.2.1\r", r.getParagraph(31).text()); + assertEquals("OL 2.2.3\r", r.getParagraph(32).text()); + assertEquals("OL 3\r", r.getParagraph(33).text()); + assertEquals("Finally we want some indents, to tell the difference\r", r.getParagraph(34).text()); + + for(int i=12; i<=34; i++) { + assertEquals(9, r.getParagraph(12).getLvl()); + } + assertEquals(0, r.getParagraph(12).getIlvl()); + assertEquals(0, r.getParagraph(13).getIlvl()); + assertEquals(0, r.getParagraph(14).getIlvl()); + assertEquals(1, r.getParagraph(15).getIlvl()); + assertEquals(1, r.getParagraph(16).getIlvl()); + assertEquals(1, r.getParagraph(17).getIlvl()); + assertEquals(2, r.getParagraph(18).getIlvl()); + assertEquals(3, r.getParagraph(19).getIlvl()); + assertEquals(4, r.getParagraph(20).getIlvl()); + assertEquals(4, r.getParagraph(21).getIlvl()); + assertEquals(1, r.getParagraph(22).getIlvl()); + assertEquals(0, r.getParagraph(23).getIlvl()); + assertEquals(0, r.getParagraph(24).getIlvl()); + assertEquals(0, r.getParagraph(25).getIlvl()); + assertEquals(0, r.getParagraph(26).getIlvl()); + assertEquals(1, r.getParagraph(27).getIlvl()); + assertEquals(1, r.getParagraph(28).getIlvl()); + assertEquals(2, r.getParagraph(29).getIlvl()); + assertEquals(2, r.getParagraph(30).getIlvl()); + assertEquals(3, r.getParagraph(31).getIlvl()); + assertEquals(2, r.getParagraph(32).getIlvl()); + assertEquals(0, r.getParagraph(33).getIlvl()); + assertEquals(0, r.getParagraph(34).getIlvl()); + } + + public void testIndentedText() { + HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Lists.doc"); + Range r = doc.getRange(); + + assertEquals(40, r.numParagraphs()); + assertEquals("Finally we want some indents, to tell the difference\r", r.getParagraph(34).text()); + assertEquals("Indented once\r", r.getParagraph(35).text()); + assertEquals("Indented twice\r", r.getParagraph(36).text()); + assertEquals("Indented three times\r", r.getParagraph(37).text()); + assertEquals("The end!\r", r.getParagraph(38).text()); + + assertEquals(9, r.getParagraph(34).getLvl()); + assertEquals(9, r.getParagraph(35).getLvl()); + assertEquals(9, r.getParagraph(36).getLvl()); + assertEquals(9, r.getParagraph(37).getLvl()); + assertEquals(9, r.getParagraph(38).getLvl()); + assertEquals(9, r.getParagraph(39).getLvl()); + + assertEquals(0, r.getParagraph(34).getIlvl()); + assertEquals(0, r.getParagraph(35).getIlvl()); + assertEquals(0, r.getParagraph(36).getIlvl()); + assertEquals(0, r.getParagraph(37).getIlvl()); + assertEquals(0, r.getParagraph(38).getIlvl()); + assertEquals(0, r.getParagraph(39).getIlvl()); + + // TODO Test the indent + } + + public void testWriteRead() throws IOException { + HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Lists.doc"); + doc = HWPFTestDataSamples.writeOutAndReadBack(doc); + + Range r = doc.getRange(); + + // Check a couple at random + assertEquals(4, r.getParagraph(21).getIlvl()); + assertEquals(1, r.getParagraph(22).getIlvl()); + assertEquals(0, r.getParagraph(23).getIlvl()); + } +} diff --git a/test-data/document/Lists.doc b/test-data/document/Lists.doc new file mode 100644 index 0000000000..80201eef14 Binary files /dev/null and b/test-data/document/Lists.doc differ