<!-- Don't forget to update status.xml too! -->
<release version="3.1.1-alpha1" date="2008-??-??">
+ <action dev="POI-DEVELOPERS" type="add">New helper, HSSFOptimiser, which handles removing duplicated font and style records, to avoid going over the limits in Excel</action>
<action dev="POI-DEVELOPERS" type="fix">45322 - Fixed NPE in HSSFSheet.autoSizeColumn() when cell number format was not found</action>
<action dev="POI-DEVELOPERS" type="add">45380 - Missing return keyword in ArrayPtg.toFormulaString()</action>
<action dev="POI-DEVELOPERS" type="add">44958 - Record level support for Data Tables. (No formula parser support though)</action>
<!-- Don't forget to update changes.xml too! -->
<changes>
<release version="3.1.1-alpha1" date="2008-??-??">
+ <action dev="POI-DEVELOPERS" type="add">New helper, HSSFOptimiser, which handles removing duplicated font and style records, to avoid going over the limits in Excel</action>
<action dev="POI-DEVELOPERS" type="fix">45322 - Fixed NPE in HSSFSheet.autoSizeColumn() when cell number format was not found</action>
<action dev="POI-DEVELOPERS" type="add">45380 - Missing return keyword in ArrayPtg.toFormulaString()</action>
<action dev="POI-DEVELOPERS" type="add">44958 - Record level support for Data Tables. (No formula parser support though)</action>
this.field_5_ext_rst = ext_rst;
}
+
+ /**
+ * Swaps all use in the string of one font index
+ * for use of a different font index.
+ * Normally only called when fonts have been
+ * removed / re-ordered
+ */
+ public void swapFontUse(short oldFontIndex, short newFontIndex) {
+ Iterator i = field_4_format_runs.iterator();
+ while(i.hasNext()) {
+ FormatRun run = (FormatRun)i.next();
+ if(run.fontIndex == oldFontIndex) {
+ run.fontIndex = newFontIndex;
+ }
+ }
+ }
+
/**
* unlike the real records we return the same as "getString()" rather than debug info
* @see #getDebugInfo()
--- /dev/null
+/* ====================================================================
+ Copyright 2002-2004 Apache Software Foundation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.hssf.usermodel;
+
+import java.util.HashSet;
+import java.util.Iterator;
+
+import org.apache.poi.hssf.record.ExtendedFormatRecord;
+import org.apache.poi.hssf.record.FontRecord;
+import org.apache.poi.hssf.record.UnicodeString;
+
+/**
+ * Excel can get cranky if you give it files containing too
+ * many (especially duplicate) objects, and this class can
+ * help to avoid those.
+ * In general, it's much better to make sure you don't
+ * duplicate the objects in your code, as this is likely
+ * to be much faster than creating lots and lots of
+ * excel objects+records, only to optimise them down to
+ * many fewer at a later stage.
+ * However, sometimes this is too hard / tricky to do, which
+ * is where the use of this class comes in.
+ */
+public class HSSFOptimiser {
+ /**
+ * Goes through the Workbook, optimising the fonts by
+ * removing duplicate ones.
+ * For now, only works on fonts used in {@link HSSFCellStyle}
+ * and {@link HSSFRichTextString}. Any other font uses
+ * (eg charts, pictures) may well end up broken!
+ * This can be a slow operation, especially if you have
+ * lots of cells, cell styles or rich text strings
+ * @param workbook The workbook in which to optimise the fonts
+ */
+ public static void optimiseFonts(HSSFWorkbook workbook) {
+ // Where each font has ended up, and if we need to
+ // delete the record for it. Start off with no change
+ short[] newPos =
+ new short[workbook.getWorkbook().getNumberOfFontRecords()+1];
+ boolean[] zapRecords = new boolean[newPos.length];
+ for(int i=0; i<newPos.length; i++) {
+ newPos[i] = (short)i;
+ zapRecords[i] = false;
+ }
+
+ // Get each font record, so we can do deletes
+ // without getting confused
+ FontRecord[] frecs = new FontRecord[newPos.length];
+ for(int i=0; i<newPos.length; i++) {
+ // There is no 4!
+ if(i == 4) continue;
+
+ frecs[i] = workbook.getWorkbook().getFontRecordAt(i);
+ }
+
+ // Loop over each font, seeing if it is the same
+ // as an earlier one. If it is, point users of the
+ // later duplicate copy to the earlier one, and
+ // mark the later one as needing deleting
+ // Note - don't change built in fonts (those before 5)
+ for(int i=5; i<newPos.length; i++) {
+ // Check this one for being a duplicate
+ // of an earlier one
+ int earlierDuplicate = -1;
+ for(int j=0; j<i && earlierDuplicate == -1; j++) {
+ if(j == 4) continue;
+
+ FontRecord frCheck = workbook.getWorkbook().getFontRecordAt(j);
+ if(frCheck.sameProperties(frecs[i])) {
+ earlierDuplicate = j;
+ }
+ }
+
+ // If we got a duplicate, mark it as such
+ if(earlierDuplicate != -1) {
+ newPos[i] = (short)earlierDuplicate;
+ zapRecords[i] = true;
+ }
+ }
+
+ // Update the new positions based on
+ // deletes that have occurred between
+ // the start and them
+ // Only need to worry about user fonts
+ for(int i=5; i<newPos.length; i++) {
+ // Find the number deleted to that
+ // point, and adjust
+ short preDeletePos = newPos[i];
+ short newPosition = preDeletePos;
+ for(int j=0; j<preDeletePos; j++) {
+ if(zapRecords[j]) newPosition--;
+ }
+
+ // Update the new position
+ newPos[i] = newPosition;
+ }
+
+ // Zap the un-needed user font records
+ for(int i=5; i<newPos.length; i++) {
+ if(zapRecords[i]) {
+ workbook.getWorkbook().removeFontRecord(
+ frecs[i]
+ );
+ }
+ }
+
+ // Tell HSSFWorkbook that it needs to
+ // re-start its HSSFFontCache
+ workbook.resetFontCache();
+
+ // Update the cell styles to point at the
+ // new locations of the fonts
+ for(int i=0; i<workbook.getWorkbook().getNumExFormats(); i++) {
+ ExtendedFormatRecord xfr = workbook.getWorkbook().getExFormatAt(i);
+ xfr.setFontIndex(
+ newPos[ xfr.getFontIndex() ]
+ );
+ }
+
+ // Update the rich text strings to point at
+ // the new locations of the fonts
+ // Remember that one underlying unicode string
+ // may be shared by multiple RichTextStrings!
+ HashSet doneUnicodeStrings = new HashSet();
+ for(int sheetNum=0; sheetNum<workbook.getNumberOfSheets(); sheetNum++) {
+ HSSFSheet s = workbook.getSheetAt(sheetNum);
+ Iterator rIt = s.rowIterator();
+ while(rIt.hasNext()) {
+ HSSFRow row = (HSSFRow)rIt.next();
+ Iterator cIt = row.cellIterator();
+ while(cIt.hasNext()) {
+ HSSFCell cell = (HSSFCell)cIt.next();
+ if(cell.getCellType() == HSSFCell.CELL_TYPE_STRING) {
+ HSSFRichTextString rtr = cell.getRichStringCellValue();
+ UnicodeString u = rtr.getRawUnicodeString();
+
+ // Have we done this string already?
+ if(! doneUnicodeStrings.contains(u)) {
+ // Update for each new position
+ for(short i=5; i<newPos.length; i++) {
+ if(i != newPos[i]) {
+ u.swapFontUse(i, newPos[i]);
+ }
+ }
+
+ // Mark as done
+ doneUnicodeStrings.add(u);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ /**
+ * Goes through the Wokrbook, optimising the cell styles
+ * by removing duplicate ones.
+ * For best results, optimise the fonts via a call to
+ * {@link #optimiseFonts(HSSFWorkbook)} first.
+ * @param workbook The workbook in which to optimise the cell styles
+ */
+ public static void optimiseCellStyles(HSSFWorkbook workbook) {
+
+ }
+}
/** Called whenever the unicode string is modified. When it is modified
* we need to create a new SST index, so that other LabelSSTRecords will not
- * be affected by changes tat we make to this string.
+ * be affected by changes that we make to this string.
*/
private UnicodeString cloneStringIfRequired() {
if (book == null)
return string.getString();
}
- /** Used internally by the HSSFCell to get the internal string value*/
+ /**
+ * Used internally by the HSSFCell to get the internal
+ * string value.
+ * Will ensure the string is not shared
+ */
UnicodeString getUnicodeString() {
return cloneStringIfRequired();
}
+
+ /**
+ * Returns the raw, probably shared Unicode String.
+ * Used when tweaking the styles, eg updating font
+ * positions.
+ * Changes to this string may well effect
+ * other RichTextStrings too!
+ */
+ UnicodeString getRawUnicodeString() {
+ return string;
+ }
/** Used internally by the HSSFCell to set the internal string value*/
void setUnicodeString(UnicodeString str) {
return retval;
}
+
+ /**
+ * Reset the fonts cache, causing all new calls
+ * to getFontAt() to create new objects.
+ * Should only be called after deleting fonts,
+ * and that's not something you should normally do
+ */
+ protected void resetFontCache() {
+ fonts = new Hashtable();
+ }
/**
* create a new Cell style and add it to the workbook's style table
result.addTestSuite(TestHSSFDateUtil.class);
result.addTestSuite(TestHSSFHeaderFooter.class);
result.addTestSuite(TestHSSFHyperlink.class);
+ result.addTestSuite(TestHSSFOptimiser.class);
result.addTestSuite(TestHSSFPalette.class);
result.addTestSuite(TestHSSFPatriarch.class);
result.addTestSuite(TestHSSFPicture.class);
--- /dev/null
+/* ====================================================================
+ Copyright 2002-2004 Apache Software Foundation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.hssf.usermodel;
+
+import junit.framework.TestCase;
+
+public class TestHSSFOptimiser extends TestCase {
+ public void testDoesNoHarmIfNothingToDo() throws Exception {
+ HSSFWorkbook wb = new HSSFWorkbook();
+
+ HSSFFont f = wb.createFont();
+ f.setFontName("Testing");
+ HSSFCellStyle s = wb.createCellStyle();
+ s.setFont(f);
+
+ assertEquals(5, wb.getNumberOfFonts());
+ assertEquals(22, wb.getNumCellStyles());
+
+ // Optimise fonts
+ HSSFOptimiser.optimiseFonts(wb);
+
+ assertEquals(5, wb.getNumberOfFonts());
+ assertEquals(22, wb.getNumCellStyles());
+
+ assertEquals(f, s.getFont(wb));
+
+ // Optimise styles
+// HSSFOptimiser.optimiseCellStyles(wb);
+
+ assertEquals(5, wb.getNumberOfFonts());
+ assertEquals(22, wb.getNumCellStyles());
+
+ assertEquals(f, s.getFont(wb));
+ }
+
+ public void testOptimiseFonts() throws Exception {
+ HSSFWorkbook wb = new HSSFWorkbook();
+
+ // Add 6 fonts, some duplicates
+ HSSFFont f1 = wb.createFont();
+ f1.setFontHeight((short)11);
+ f1.setFontName("Testing");
+
+ HSSFFont f2 = wb.createFont();
+ f2.setFontHeight((short)22);
+ f2.setFontName("Also Testing");
+
+ HSSFFont f3 = wb.createFont();
+ f3.setFontHeight((short)33);
+ f3.setFontName("Unique");
+
+ HSSFFont f4 = wb.createFont();
+ f4.setFontHeight((short)11);
+ f4.setFontName("Testing");
+
+ HSSFFont f5 = wb.createFont();
+ f5.setFontHeight((short)22);
+ f5.setFontName("Also Testing");
+
+ HSSFFont f6 = wb.createFont();
+ f6.setFontHeight((short)66);
+ f6.setFontName("Also Unique");
+
+
+
+ // Use all three of the four in cell styles
+ HSSFCellStyle cs1 = wb.createCellStyle();
+ cs1.setFont(f1);
+ assertEquals(5, cs1.getFontIndex());
+
+ HSSFCellStyle cs2 = wb.createCellStyle();
+ cs2.setFont(f4);
+ assertEquals(8, cs2.getFontIndex());
+
+ HSSFCellStyle cs3 = wb.createCellStyle();
+ cs3.setFont(f5);
+ assertEquals(9, cs3.getFontIndex());
+
+ HSSFCellStyle cs4 = wb.createCellStyle();
+ cs4.setFont(f6);
+ assertEquals(10, cs4.getFontIndex());
+
+
+ // And three in rich text
+ HSSFSheet s = wb.createSheet();
+ HSSFRow r = s.createRow(0);
+
+ HSSFRichTextString rtr1 = new HSSFRichTextString("Test");
+ rtr1.applyFont(0, 2, f1);
+ rtr1.applyFont(3, 4, f2);
+ r.createCell((short)0).setCellValue(rtr1);
+
+ HSSFRichTextString rtr2 = new HSSFRichTextString("AlsoTest");
+ rtr2.applyFont(0, 2, f3);
+ rtr2.applyFont(3, 5, f5);
+ rtr2.applyFont(6, 8, f6);
+ r.createCell((short)1).setCellValue(rtr2);
+
+
+ // Check what we have now
+ assertEquals(10, wb.getNumberOfFonts());
+ assertEquals(25, wb.getNumCellStyles());
+
+ // Optimise
+ HSSFOptimiser.optimiseFonts(wb);
+
+ // Check font count
+ assertEquals(8, wb.getNumberOfFonts());
+ assertEquals(25, wb.getNumCellStyles());
+
+ // Check font use in cell styles
+ assertEquals(5, cs1.getFontIndex());
+ assertEquals(5, cs2.getFontIndex()); // duplicate of 1
+ assertEquals(6, cs3.getFontIndex()); // duplicate of 2
+ assertEquals(8, cs4.getFontIndex()); // two have gone
+
+
+ // And in rich text
+
+ // RTR 1 had f1 and f2, unchanged
+ assertEquals(5, r.getCell(0).getRichStringCellValue().getFontAtIndex(0));
+ assertEquals(5, r.getCell(0).getRichStringCellValue().getFontAtIndex(1));
+ assertEquals(6, r.getCell(0).getRichStringCellValue().getFontAtIndex(3));
+ assertEquals(6, r.getCell(0).getRichStringCellValue().getFontAtIndex(4));
+
+ // RTR 2 had f3 (unchanged), f5 (=f2) and f6 (moved down)
+ assertEquals(7, r.getCell(1).getRichStringCellValue().getFontAtIndex(0));
+ assertEquals(7, r.getCell(1).getRichStringCellValue().getFontAtIndex(1));
+ assertEquals(6, r.getCell(1).getRichStringCellValue().getFontAtIndex(3));
+ assertEquals(6, r.getCell(1).getRichStringCellValue().getFontAtIndex(4));
+ assertEquals(8, r.getCell(1).getRichStringCellValue().getFontAtIndex(6));
+ assertEquals(8, r.getCell(1).getRichStringCellValue().getFontAtIndex(7));
+ }
+}