From 9cb74f87feb40eb2503934745cb959ed95da86fc Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Tue, 15 Jul 2008 21:15:16 +0000 Subject: [PATCH] Start on HSSFOptimiser, which removes un-needed cell styles and fonts, fixing up references as it does so git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@677041 13f79535-47bb-0310-9956-ffa450edef68 --- src/documentation/content/xdocs/changes.xml | 1 + src/documentation/content/xdocs/status.xml | 1 + .../apache/poi/hssf/record/UnicodeString.java | 17 ++ .../poi/hssf/usermodel/HSSFOptimiser.java | 178 ++++++++++++++++++ .../hssf/usermodel/HSSFRichTextString.java | 19 +- .../poi/hssf/usermodel/HSSFWorkbook.java | 10 + .../poi/hssf/usermodel/AllUserModelTests.java | 1 + .../poi/hssf/usermodel/TestHSSFOptimiser.java | 147 +++++++++++++++ 8 files changed, 372 insertions(+), 2 deletions(-) create mode 100644 src/java/org/apache/poi/hssf/usermodel/HSSFOptimiser.java create mode 100644 src/testcases/org/apache/poi/hssf/usermodel/TestHSSFOptimiser.java diff --git a/src/documentation/content/xdocs/changes.xml b/src/documentation/content/xdocs/changes.xml index 12f62cb8b9..a5cb00484c 100644 --- a/src/documentation/content/xdocs/changes.xml +++ b/src/documentation/content/xdocs/changes.xml @@ -37,6 +37,7 @@ + New helper, HSSFOptimiser, which handles removing duplicated font and style records, to avoid going over the limits in Excel 45322 - Fixed NPE in HSSFSheet.autoSizeColumn() when cell number format was not found 45380 - Missing return keyword in ArrayPtg.toFormulaString() 44958 - Record level support for Data Tables. (No formula parser support though) diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index a62ee3ddeb..27f0624314 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + New helper, HSSFOptimiser, which handles removing duplicated font and style records, to avoid going over the limits in Excel 45322 - Fixed NPE in HSSFSheet.autoSizeColumn() when cell number format was not found 45380 - Missing return keyword in ArrayPtg.toFormulaString() 44958 - Record level support for Data Tables. (No formula parser support though) diff --git a/src/java/org/apache/poi/hssf/record/UnicodeString.java b/src/java/org/apache/poi/hssf/record/UnicodeString.java index b53fcd485c..9919d52c3d 100644 --- a/src/java/org/apache/poi/hssf/record/UnicodeString.java +++ b/src/java/org/apache/poi/hssf/record/UnicodeString.java @@ -439,6 +439,23 @@ public class UnicodeString this.field_5_ext_rst = ext_rst; } + + /** + * Swaps all use in the string of one font index + * for use of a different font index. + * Normally only called when fonts have been + * removed / re-ordered + */ + public void swapFontUse(short oldFontIndex, short newFontIndex) { + Iterator i = field_4_format_runs.iterator(); + while(i.hasNext()) { + FormatRun run = (FormatRun)i.next(); + if(run.fontIndex == oldFontIndex) { + run.fontIndex = newFontIndex; + } + } + } + /** * unlike the real records we return the same as "getString()" rather than debug info * @see #getDebugInfo() diff --git a/src/java/org/apache/poi/hssf/usermodel/HSSFOptimiser.java b/src/java/org/apache/poi/hssf/usermodel/HSSFOptimiser.java new file mode 100644 index 0000000000..c01037ae49 --- /dev/null +++ b/src/java/org/apache/poi/hssf/usermodel/HSSFOptimiser.java @@ -0,0 +1,178 @@ +/* ==================================================================== + Copyright 2002-2004 Apache Software Foundation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.hssf.usermodel; + +import java.util.HashSet; +import java.util.Iterator; + +import org.apache.poi.hssf.record.ExtendedFormatRecord; +import org.apache.poi.hssf.record.FontRecord; +import org.apache.poi.hssf.record.UnicodeString; + +/** + * Excel can get cranky if you give it files containing too + * many (especially duplicate) objects, and this class can + * help to avoid those. + * In general, it's much better to make sure you don't + * duplicate the objects in your code, as this is likely + * to be much faster than creating lots and lots of + * excel objects+records, only to optimise them down to + * many fewer at a later stage. + * However, sometimes this is too hard / tricky to do, which + * is where the use of this class comes in. + */ +public class HSSFOptimiser { + /** + * Goes through the Workbook, optimising the fonts by + * removing duplicate ones. + * For now, only works on fonts used in {@link HSSFCellStyle} + * and {@link HSSFRichTextString}. Any other font uses + * (eg charts, pictures) may well end up broken! + * This can be a slow operation, especially if you have + * lots of cells, cell styles or rich text strings + * @param workbook The workbook in which to optimise the fonts + */ + public static void optimiseFonts(HSSFWorkbook workbook) { + // Where each font has ended up, and if we need to + // delete the record for it. Start off with no change + short[] newPos = + new short[workbook.getWorkbook().getNumberOfFontRecords()+1]; + boolean[] zapRecords = new boolean[newPos.length]; + for(int i=0; i