]> source.dussan.org Git - poi.git/commitdiff
Start on HSSFOptimiser, which removes un-needed cell styles and fonts, fixing up...
authorNick Burch <nick@apache.org>
Tue, 15 Jul 2008 21:15:16 +0000 (21:15 +0000)
committerNick Burch <nick@apache.org>
Tue, 15 Jul 2008 21:15:16 +0000 (21:15 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@677041 13f79535-47bb-0310-9956-ffa450edef68

src/documentation/content/xdocs/changes.xml
src/documentation/content/xdocs/status.xml
src/java/org/apache/poi/hssf/record/UnicodeString.java
src/java/org/apache/poi/hssf/usermodel/HSSFOptimiser.java [new file with mode: 0644]
src/java/org/apache/poi/hssf/usermodel/HSSFRichTextString.java
src/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java
src/testcases/org/apache/poi/hssf/usermodel/AllUserModelTests.java
src/testcases/org/apache/poi/hssf/usermodel/TestHSSFOptimiser.java [new file with mode: 0644]

index 12f62cb8b9a504bcb4708f00883443d0e4a90c15..a5cb00484c7d865150e11f0514cd1091745ae1d6 100644 (file)
@@ -37,6 +37,7 @@
 
                <!-- Don't forget to update status.xml too! -->
         <release version="3.1.1-alpha1" date="2008-??-??">
+           <action dev="POI-DEVELOPERS" type="add">New helper, HSSFOptimiser, which handles removing duplicated font and style records, to avoid going over the limits in Excel</action>
            <action dev="POI-DEVELOPERS" type="fix">45322 - Fixed NPE in HSSFSheet.autoSizeColumn() when cell number format was not found</action>
            <action dev="POI-DEVELOPERS" type="add">45380 - Missing return keyword in ArrayPtg.toFormulaString()</action>
            <action dev="POI-DEVELOPERS" type="add">44958 - Record level support for Data Tables. (No formula parser support though)</action>
index a62ee3ddebc3496da69febdaf76a395e0818c0c9..27f0624314d027b20af01232d643c16edb50ab3d 100644 (file)
@@ -34,6 +34,7 @@
        <!-- Don't forget to update changes.xml too! -->
     <changes>
         <release version="3.1.1-alpha1" date="2008-??-??">
+           <action dev="POI-DEVELOPERS" type="add">New helper, HSSFOptimiser, which handles removing duplicated font and style records, to avoid going over the limits in Excel</action>
            <action dev="POI-DEVELOPERS" type="fix">45322 - Fixed NPE in HSSFSheet.autoSizeColumn() when cell number format was not found</action>
            <action dev="POI-DEVELOPERS" type="add">45380 - Missing return keyword in ArrayPtg.toFormulaString()</action>
            <action dev="POI-DEVELOPERS" type="add">44958 - Record level support for Data Tables. (No formula parser support though)</action>
index b53fcd485c5dd9d1ede3126378e5b8876b0abc62..9919d52c3d5ee83c79c03a07101f97b314617eb5 100644 (file)
@@ -439,6 +439,23 @@ public class UnicodeString
       this.field_5_ext_rst = ext_rst;
     }
 
+
+    /**
+     * Swaps all use in the string of one font index 
+     *  for use of a different font index.
+     * Normally only called when fonts have been
+     *  removed / re-ordered
+     */
+    public void swapFontUse(short oldFontIndex, short newFontIndex) {
+       Iterator i = field_4_format_runs.iterator();
+       while(i.hasNext()) {
+               FormatRun run = (FormatRun)i.next();
+               if(run.fontIndex == oldFontIndex) {
+                       run.fontIndex = newFontIndex;
+               }
+       }
+    }
+    
     /**
      * unlike the real records we return the same as "getString()" rather than debug info
      * @see #getDebugInfo()
diff --git a/src/java/org/apache/poi/hssf/usermodel/HSSFOptimiser.java b/src/java/org/apache/poi/hssf/usermodel/HSSFOptimiser.java
new file mode 100644 (file)
index 0000000..c01037a
--- /dev/null
@@ -0,0 +1,178 @@
+/* ====================================================================
+   Copyright 2002-2004   Apache Software Foundation
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.hssf.usermodel;
+
+import java.util.HashSet;
+import java.util.Iterator;
+
+import org.apache.poi.hssf.record.ExtendedFormatRecord;
+import org.apache.poi.hssf.record.FontRecord;
+import org.apache.poi.hssf.record.UnicodeString;
+
+/**
+ * Excel can get cranky if you give it files containing too
+ *  many (especially duplicate) objects, and this class can
+ *  help to avoid those.
+ * In general, it's much better to make sure you don't 
+ *  duplicate the objects in your code, as this is likely
+ *  to be much faster than creating lots and lots of
+ *  excel objects+records, only to optimise them down to
+ *  many fewer at a later stage.
+ * However, sometimes this is too hard / tricky to do, which
+ *  is where the use of this class comes in.
+ */
+public class HSSFOptimiser {
+       /**
+        * Goes through the Workbook, optimising the fonts by
+        *  removing duplicate ones.
+        * For now, only works on fonts used in {@link HSSFCellStyle}
+        *  and {@link HSSFRichTextString}. Any other font uses
+        *  (eg charts, pictures) may well end up broken!
+        * This can be a slow operation, especially if you have
+        *  lots of cells, cell styles or rich text strings
+        * @param workbook The workbook in which to optimise the fonts
+        */
+       public static void optimiseFonts(HSSFWorkbook workbook) {
+               // Where each font has ended up, and if we need to
+               //  delete the record for it. Start off with no change
+               short[] newPos = 
+                       new short[workbook.getWorkbook().getNumberOfFontRecords()+1];
+               boolean[] zapRecords = new boolean[newPos.length];
+               for(int i=0; i<newPos.length; i++) {
+                       newPos[i] = (short)i;
+                       zapRecords[i] = false;
+               }
+               
+               // Get each font record, so we can do deletes
+               //  without getting confused
+               FontRecord[] frecs = new FontRecord[newPos.length]; 
+               for(int i=0; i<newPos.length; i++) {
+                       // There is no 4!
+                       if(i == 4) continue;
+                       
+                       frecs[i] = workbook.getWorkbook().getFontRecordAt(i);
+               }
+               
+               // Loop over each font, seeing if it is the same
+               //  as an earlier one. If it is, point users of the
+               //  later duplicate copy to the earlier one, and 
+               //  mark the later one as needing deleting
+               // Note - don't change built in fonts (those before 5)
+               for(int i=5; i<newPos.length; i++) {
+                       // Check this one for being a duplicate
+                       //  of an earlier one
+                       int earlierDuplicate = -1;
+                       for(int j=0; j<i && earlierDuplicate == -1; j++) {
+                               if(j == 4) continue;
+                               
+                               FontRecord frCheck = workbook.getWorkbook().getFontRecordAt(j);
+                               if(frCheck.sameProperties(frecs[i])) {
+                                       earlierDuplicate = j;
+                               }
+                       }
+                       
+                       // If we got a duplicate, mark it as such
+                       if(earlierDuplicate != -1) {
+                               newPos[i] = (short)earlierDuplicate;
+                               zapRecords[i] = true;
+                       }
+               }
+               
+               // Update the new positions based on
+               //  deletes that have occurred between
+               //  the start and them
+               // Only need to worry about user fonts
+               for(int i=5; i<newPos.length; i++) {
+                       // Find the number deleted to that
+                       //  point, and adjust
+                       short preDeletePos = newPos[i];
+                       short newPosition = preDeletePos;
+                       for(int j=0; j<preDeletePos; j++) {
+                               if(zapRecords[j]) newPosition--;
+                       }
+                       
+                       // Update the new position
+                       newPos[i] = newPosition;
+               }
+               
+               // Zap the un-needed user font records
+               for(int i=5; i<newPos.length; i++) {
+                       if(zapRecords[i]) {
+                               workbook.getWorkbook().removeFontRecord(
+                                               frecs[i]
+                               );
+                       }
+               }
+               
+               // Tell HSSFWorkbook that it needs to
+               //  re-start its HSSFFontCache
+               workbook.resetFontCache();
+               
+               // Update the cell styles to point at the 
+               //  new locations of the fonts
+               for(int i=0; i<workbook.getWorkbook().getNumExFormats(); i++) {
+                       ExtendedFormatRecord xfr = workbook.getWorkbook().getExFormatAt(i);
+                       xfr.setFontIndex(
+                                       newPos[ xfr.getFontIndex() ]
+                       );
+               }
+               
+               // Update the rich text strings to point at
+               //  the new locations of the fonts
+               // Remember that one underlying unicode string
+               //  may be shared by multiple RichTextStrings!
+               HashSet doneUnicodeStrings = new HashSet();
+               for(int sheetNum=0; sheetNum<workbook.getNumberOfSheets(); sheetNum++) {
+                       HSSFSheet s = workbook.getSheetAt(sheetNum);
+                       Iterator rIt = s.rowIterator();
+                       while(rIt.hasNext()) {
+                               HSSFRow row = (HSSFRow)rIt.next();
+                               Iterator cIt = row.cellIterator();
+                               while(cIt.hasNext()) {
+                                       HSSFCell cell = (HSSFCell)cIt.next();
+                                       if(cell.getCellType() == HSSFCell.CELL_TYPE_STRING) {
+                                               HSSFRichTextString rtr = cell.getRichStringCellValue();
+                                               UnicodeString u = rtr.getRawUnicodeString();
+                                               
+                                               // Have we done this string already?
+                                               if(! doneUnicodeStrings.contains(u)) {
+                                                       // Update for each new position
+                                                       for(short i=5; i<newPos.length; i++) {
+                                                               if(i != newPos[i]) {
+                                                                       u.swapFontUse(i, newPos[i]);
+                                                               }
+                                                       }
+                                                       
+                                                       // Mark as done
+                                                       doneUnicodeStrings.add(u);
+                                               }
+                                       }
+                               }
+                       }
+               }
+       }
+       
+       /**
+        * Goes through the Wokrbook, optimising the cell styles
+        *  by removing duplicate ones.
+        * For best results, optimise the fonts via a call to
+        *  {@link #optimiseFonts(HSSFWorkbook)} first.
+        * @param workbook The workbook in which to optimise the cell styles
+        */
+       public static void optimiseCellStyles(HSSFWorkbook workbook) {
+               
+       }
+}
index 93db9214a60d0b547af4449543f293ae9718a7a0..1b7342be614cb2830e277654d02c3fe43d286ce7 100644 (file)
@@ -67,7 +67,7 @@ public class HSSFRichTextString
     
     /** Called whenever the unicode string is modified. When it is modified
      *  we need to create a new SST index, so that other LabelSSTRecords will not
-     *  be affected by changes tat we make to this string.
+     *  be affected by changes that we make to this string.
      */
     private UnicodeString cloneStringIfRequired() {
       if (book == null)
@@ -167,10 +167,25 @@ public class HSSFRichTextString
         return string.getString();
     }
 
-    /** Used internally by the HSSFCell to get the internal string value*/
+    /** 
+     * Used internally by the HSSFCell to get the internal 
+     * string value.
+     * Will ensure the string is not shared
+     */
     UnicodeString getUnicodeString() {
       return cloneStringIfRequired();
     }
+    
+    /**
+     * Returns the raw, probably shared Unicode String. 
+     * Used when tweaking the styles, eg updating font 
+     *  positions.
+     * Changes to this string may well effect
+     *  other RichTextStrings too! 
+     */
+    UnicodeString getRawUnicodeString() {
+       return string;
+    }
 
     /** Used internally by the HSSFCell to set the internal string value*/
     void setUnicodeString(UnicodeString str) {
index 0191b4248a0d201dff3dc20bd318a536a81ef609..84d7817fc444005612fbb95c605401fc1f905498 100644 (file)
@@ -1073,6 +1073,16 @@ public class HSSFWorkbook extends POIDocument
 
         return retval;
     }
+    
+    /**
+     * Reset the fonts cache, causing all new calls
+     *  to getFontAt() to create new objects.
+     * Should only be called after deleting fonts,
+     *  and that's not something you should normally do
+     */
+    protected void resetFontCache() {
+       fonts = new Hashtable();
+    }
 
     /**
      * create a new Cell style and add it to the workbook's style table
index 363e58c142788aad28784887703e87e7f483af2a..15a19c09121c9ae679415c61170aa68ae8c66acd 100755 (executable)
@@ -47,6 +47,7 @@ public class AllUserModelTests {
                result.addTestSuite(TestHSSFDateUtil.class);
                result.addTestSuite(TestHSSFHeaderFooter.class);
                result.addTestSuite(TestHSSFHyperlink.class);
+               result.addTestSuite(TestHSSFOptimiser.class);
                result.addTestSuite(TestHSSFPalette.class);
                result.addTestSuite(TestHSSFPatriarch.class);
                result.addTestSuite(TestHSSFPicture.class);
diff --git a/src/testcases/org/apache/poi/hssf/usermodel/TestHSSFOptimiser.java b/src/testcases/org/apache/poi/hssf/usermodel/TestHSSFOptimiser.java
new file mode 100644 (file)
index 0000000..7ba55e7
--- /dev/null
@@ -0,0 +1,147 @@
+/* ====================================================================
+   Copyright 2002-2004   Apache Software Foundation
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.hssf.usermodel;
+
+import junit.framework.TestCase;
+
+public class TestHSSFOptimiser extends TestCase {
+       public void testDoesNoHarmIfNothingToDo() throws Exception {
+               HSSFWorkbook wb = new HSSFWorkbook();
+               
+               HSSFFont f = wb.createFont();
+               f.setFontName("Testing");
+               HSSFCellStyle s = wb.createCellStyle();
+               s.setFont(f);
+               
+               assertEquals(5, wb.getNumberOfFonts());
+               assertEquals(22, wb.getNumCellStyles());
+               
+               // Optimise fonts
+               HSSFOptimiser.optimiseFonts(wb);
+               
+               assertEquals(5, wb.getNumberOfFonts());
+               assertEquals(22, wb.getNumCellStyles());
+               
+               assertEquals(f, s.getFont(wb));
+               
+               // Optimise styles
+//             HSSFOptimiser.optimiseCellStyles(wb);
+               
+               assertEquals(5, wb.getNumberOfFonts());
+               assertEquals(22, wb.getNumCellStyles());
+               
+               assertEquals(f, s.getFont(wb));
+       }
+       
+       public void testOptimiseFonts() throws Exception {
+               HSSFWorkbook wb = new HSSFWorkbook();
+               
+               // Add 6 fonts, some duplicates
+               HSSFFont f1 = wb.createFont();
+               f1.setFontHeight((short)11);
+               f1.setFontName("Testing");
+               
+               HSSFFont f2 = wb.createFont();
+               f2.setFontHeight((short)22);
+               f2.setFontName("Also Testing");
+               
+               HSSFFont f3 = wb.createFont();
+               f3.setFontHeight((short)33);
+               f3.setFontName("Unique");
+               
+               HSSFFont f4 = wb.createFont();
+               f4.setFontHeight((short)11);
+               f4.setFontName("Testing");
+               
+               HSSFFont f5 = wb.createFont();
+               f5.setFontHeight((short)22);
+               f5.setFontName("Also Testing");
+               
+               HSSFFont f6 = wb.createFont();
+               f6.setFontHeight((short)66);
+               f6.setFontName("Also Unique");
+               
+               
+               
+               // Use all three of the four in cell styles
+               HSSFCellStyle cs1 = wb.createCellStyle();
+               cs1.setFont(f1);
+               assertEquals(5, cs1.getFontIndex());
+               
+               HSSFCellStyle cs2 = wb.createCellStyle();
+               cs2.setFont(f4);
+               assertEquals(8, cs2.getFontIndex());
+               
+               HSSFCellStyle cs3 = wb.createCellStyle();
+               cs3.setFont(f5);
+               assertEquals(9, cs3.getFontIndex());
+               
+               HSSFCellStyle cs4 = wb.createCellStyle();
+               cs4.setFont(f6);
+               assertEquals(10, cs4.getFontIndex());
+               
+               
+               // And three in rich text
+               HSSFSheet s = wb.createSheet();
+               HSSFRow r = s.createRow(0);
+               
+               HSSFRichTextString rtr1 = new HSSFRichTextString("Test");
+               rtr1.applyFont(0, 2, f1);
+               rtr1.applyFont(3, 4, f2);
+               r.createCell((short)0).setCellValue(rtr1);
+               
+               HSSFRichTextString rtr2 = new HSSFRichTextString("AlsoTest");
+               rtr2.applyFont(0, 2, f3);
+               rtr2.applyFont(3, 5, f5);
+               rtr2.applyFont(6, 8, f6);
+               r.createCell((short)1).setCellValue(rtr2);
+               
+               
+               // Check what we have now
+               assertEquals(10, wb.getNumberOfFonts());
+               assertEquals(25, wb.getNumCellStyles());
+               
+               // Optimise
+               HSSFOptimiser.optimiseFonts(wb);
+               
+               // Check font count
+               assertEquals(8, wb.getNumberOfFonts());
+               assertEquals(25, wb.getNumCellStyles());
+               
+               // Check font use in cell styles
+               assertEquals(5, cs1.getFontIndex());
+               assertEquals(5, cs2.getFontIndex()); // duplicate of 1
+               assertEquals(6, cs3.getFontIndex()); // duplicate of 2
+               assertEquals(8, cs4.getFontIndex()); // two have gone
+               
+               
+               // And in rich text
+               
+               // RTR 1 had f1 and f2, unchanged 
+               assertEquals(5, r.getCell(0).getRichStringCellValue().getFontAtIndex(0));
+               assertEquals(5, r.getCell(0).getRichStringCellValue().getFontAtIndex(1));
+               assertEquals(6, r.getCell(0).getRichStringCellValue().getFontAtIndex(3));
+               assertEquals(6, r.getCell(0).getRichStringCellValue().getFontAtIndex(4));
+               
+               // RTR 2 had f3 (unchanged), f5 (=f2) and f6 (moved down)
+               assertEquals(7, r.getCell(1).getRichStringCellValue().getFontAtIndex(0));
+               assertEquals(7, r.getCell(1).getRichStringCellValue().getFontAtIndex(1));
+               assertEquals(6, r.getCell(1).getRichStringCellValue().getFontAtIndex(3));
+               assertEquals(6, r.getCell(1).getRichStringCellValue().getFontAtIndex(4));
+               assertEquals(8, r.getCell(1).getRichStringCellValue().getFontAtIndex(6));
+               assertEquals(8, r.getCell(1).getRichStringCellValue().getFontAtIndex(7));
+       }
+}