]> source.dussan.org Git - poi.git/commitdiff
fix Bug 51524 -- PapBinTable constructor is slow (regression)
authorSergey Vladimirov <sergey@apache.org>
Mon, 18 Jul 2011 18:44:03 +0000 (18:44 +0000)
committerSergey Vladimirov <sergey@apache.org>
Mon, 18 Jul 2011 18:44:03 +0000 (18:44 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1148002 13f79535-47bb-0310-9956-ffa450edef68

src/documentation/content/xdocs/status.xml
src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java
src/scratchpad/src/org/apache/poi/hwpf/model/PropertyNode.java
src/scratchpad/testcases/log4j.properties [new file with mode: 0644]
src/scratchpad/testcases/org/apache/poi/hwpf/HWPFTestDataSamples.java
src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestProblems.java

index 707aca954358789df1375b7ce08c4118460c1366..9abeb80d1b9a7a80500371403a012d93dbbfc85f 100644 (file)
@@ -34,6 +34,7 @@
 
     <changes>
         <release version="3.8-beta4" date="2011-??-??">
+           <action dev="poi-developers" type="fix">51524 - PapBinTable constructor is slow (regression)</action>
            <action dev="poi-developers" type="fix">51514 - allow HSSFObjectData to work with both POIFS and NPOIFS</action>
            <action dev="poi-developers" type="fix">51514 - avoid NPE when copying nodes from one HSSF workbook to a new one, when opened from NPOIFS</action>
            <action dev="poi-developers" type="fix">51504 - avoid NPE when DefaultRowHeight or DefaultColumnWidth records are missing</action>
index 4aaa8e2268b61409904fc70b111676201023b8a4..53b92fcfaa7a81ce5863529fb0bb4418d98a364a 100644 (file)
@@ -21,8 +21,11 @@ import java.io.IOException;
 import java.io.OutputStream;
 import java.util.ArrayList;
 import java.util.Collections;
+import java.util.Comparator;
+import java.util.IdentityHashMap;
 import java.util.LinkedList;
 import java.util.List;
+import java.util.Map;
 
 import org.apache.poi.hwpf.model.io.HWPFFileSystem;
 import org.apache.poi.hwpf.model.io.HWPFOutputStream;
@@ -69,37 +72,52 @@ public class PAPBinTable
     }
 
     public PAPBinTable( byte[] documentStream, byte[] tableStream,
-            byte[] dataStream, int offset, int size, ComplexFileTable complexFileTable,
-            TextPieceTable tpt, boolean reconstructPapxTable )
+            byte[] dataStream, int offset, int size,
+            ComplexFileTable complexFileTable, TextPieceTable tpt,
+            boolean reconstructPapxTable )
     {
-    PlexOfCps binTable = new PlexOfCps(tableStream, offset, size, 4);
-    this.tpt = tpt;
+        long start = System.currentTimeMillis();
 
-    int length = binTable.length();
-    for (int x = 0; x < length; x++)
-    {
-      GenericPropertyNode node = binTable.getProperty(x);
+        {
+            PlexOfCps binTable = new PlexOfCps( tableStream, offset, size, 4 );
+            this.tpt = tpt;
 
-      int pageNum = LittleEndian.getInt(node.getBytes());
-      int pageOffset = POIFSConstants.SMALLER_BIG_BLOCK_SIZE * pageNum;
+            int length = binTable.length();
+            for ( int x = 0; x < length; x++ )
+            {
+                GenericPropertyNode node = binTable.getProperty( x );
 
-      PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(documentStream,
-        dataStream, pageOffset, tpt, reconstructPapxTable);
+                int pageNum = LittleEndian.getInt( node.getBytes() );
+                int pageOffset = POIFSConstants.SMALLER_BIG_BLOCK_SIZE
+                        * pageNum;
 
-      int fkpSize = pfkp.size();
+                PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(
+                        documentStream, dataStream, pageOffset, tpt,
+                        reconstructPapxTable );
 
-      for (int y = 0; y < fkpSize; y++)
-      {
-       PAPX papx = pfkp.getPAPX(y);
+                int fkpSize = pfkp.size();
 
-       if (papx != null)
-           _paragraphs.add(papx);
-      }
-    }
+                for ( int y = 0; y < fkpSize; y++ )
+                {
+                    PAPX papx = pfkp.getPAPX( y );
+
+                    if ( papx != null )
+                        _paragraphs.add( papx );
+                }
+            }
+        }
+
+        logger.log( POILogger.DEBUG, "PAPX tables loaded in ",
+                Long.valueOf( System.currentTimeMillis() - start ), " ms (",
+                Integer.valueOf( _paragraphs.size() ), " elements)" );
+        start = System.currentTimeMillis();
 
         if ( !reconstructPapxTable )
         {
             Collections.sort( _paragraphs );
+
+            logger.log( POILogger.DEBUG, "PAPX sorted in ",
+                    Long.valueOf( System.currentTimeMillis() - start ), " ms" );
             return;
         }
 
@@ -107,7 +125,7 @@ public class PAPBinTable
         {
             SprmBuffer[] sprmBuffers = complexFileTable.getGrpprls();
 
-            // adding CHPX from fast-saved SPRMs
+            // adding PAPX from fast-saved SPRMs
             for ( TextPiece textPiece : tpt.getTextPieces() )
             {
                 PropertyModifier prm = textPiece.getPieceDescriptor().getPrm();
@@ -137,7 +155,7 @@ public class PAPBinTable
 
                 if ( hasPap )
                 {
-                    SprmBuffer newSprmBuffer = new SprmBuffer(2);
+                    SprmBuffer newSprmBuffer = new SprmBuffer( 2 );
                     newSprmBuffer.append( sprmBuffer.toByteArray() );
 
                     PAPX papx = new PAPX( textPiece.getStart(),
@@ -145,6 +163,13 @@ public class PAPBinTable
                     _paragraphs.add( papx );
                 }
             }
+
+            logger.log( POILogger.DEBUG,
+                    "Merged (?) with PAPX from complex file table in ",
+                    Long.valueOf( System.currentTimeMillis() - start ),
+                    " ms (", Integer.valueOf( _paragraphs.size() ),
+                    " elements in total)" );
+            start = System.currentTimeMillis();
         }
 
         // rebuild document paragraphs structure
@@ -170,9 +195,35 @@ public class PAPBinTable
             docText.replace( textPiece.getStart(), textPiece.getStart()
                     + toAppendLength, toAppend );
         }
+        logger.log( POILogger.DEBUG, "Document text rebuilded in ",
+                Long.valueOf( System.currentTimeMillis() - start ), " ms (",
+                Integer.valueOf( docText.length() ), " chars)" );
+        start = System.currentTimeMillis();
+
+        List<PAPX> oldPapxSortedByEndPos = new ArrayList<PAPX>( _paragraphs );
+        Collections.sort( oldPapxSortedByEndPos,
+                PropertyNode.EndComparator.instance );
+
+        logger.log( POILogger.DEBUG, "PAPX sorted by end position in ",
+                Long.valueOf( System.currentTimeMillis() - start ), " ms" );
+        start = System.currentTimeMillis();
+
+        final Map<PAPX, Integer> papxToFileOrder = new IdentityHashMap<PAPX, Integer>();
+        {
+        int counter = 0;
+        for ( PAPX papx : _paragraphs )
+        {
+            papxToFileOrder.put( papx, Integer.valueOf( counter++ ) );
+        }
+        }
+
+        logger.log( POILogger.DEBUG, "PAPX's order map created in ",
+                Long.valueOf( System.currentTimeMillis() - start ), " ms" );
+        start = System.currentTimeMillis();
 
         List<PAPX> newPapxs = new LinkedList<PAPX>();
         int lastParStart = 0;
+        int lastPapxIndex = 0;
         for ( int charIndex = 0; charIndex < docText.length(); charIndex++ )
         {
             final char c = docText.charAt( charIndex );
@@ -183,20 +234,19 @@ public class PAPBinTable
             final int endExclusive = charIndex + 1;
 
             List<PAPX> papxs = new LinkedList<PAPX>();
-            for ( PAPX papx : _paragraphs )
+            for ( int papxIndex = lastPapxIndex; papxIndex < oldPapxSortedByEndPos
+                    .size(); papxIndex++ )
             {
-                // TODO: Tests, check, etc
-                for ( int f = papx.getEnd() - 1; f <= charIndex; f++ )
+                PAPX papx = oldPapxSortedByEndPos.get( papxIndex );
+
+                assert papx.getEnd() > startInclusive;
+                if ( papx.getEnd() - 1 > charIndex )
                 {
-                    if ( f == charIndex )
-                    {
-                        papxs.add( papx );
-                        break;
-                    }
-                    final char fChar = docText.charAt( charIndex );
-                    if ( fChar == 13 || fChar == 7 || fChar == 12 )
-                        break;
+                    lastPapxIndex = papxIndex;
+                    break;
                 }
+
+                papxs.add( papx );
             }
 
             if ( papxs.size() == 0 )
@@ -226,6 +276,17 @@ public class PAPBinTable
                 }
             }
 
+            // restore file order of PAPX
+            Collections.sort( papxs, new Comparator<PAPX>()
+            {
+                public int compare( PAPX o1, PAPX o2 )
+                {
+                    Integer i1 = papxToFileOrder.get( o1 );
+                    Integer i2 = papxToFileOrder.get( o2 );
+                    return i1.compareTo( i2 );
+                }
+            } );
+
             SprmBuffer sprmBuffer = null;
             for ( PAPX papx : papxs )
             {
index f0f6f4e92156d7db238452efc0b45a09258253cb..78f4b35bf488be83a5a38610b0b6cf82da800403 100644 (file)
@@ -35,6 +35,19 @@ import org.apache.poi.util.POILogger;
 public abstract class PropertyNode<T extends PropertyNode<T>>  implements Comparable<T>, Cloneable
 {
 
+    static final class EndComparator implements Comparator<PropertyNode<?>>
+    {
+        static EndComparator instance = new EndComparator();
+
+        public int compare( PropertyNode<?> o1, PropertyNode<?> o2 )
+        {
+            int thisVal = o1.getEnd();
+            int anotherVal = o2.getEnd();
+            return ( thisVal < anotherVal ? -1 : ( thisVal == anotherVal ? 0
+                    : 1 ) );
+        }
+    }
+
     static final class StartComparator implements Comparator<PropertyNode<?>>
     {
         static StartComparator instance = new StartComparator();
diff --git a/src/scratchpad/testcases/log4j.properties b/src/scratchpad/testcases/log4j.properties
new file mode 100644 (file)
index 0000000..083b5da
--- /dev/null
@@ -0,0 +1,6 @@
+log4j.rootLogger=ALL,CONSOLE
+
+log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
+log4j.appender.CONSOLE.target=System.out
+log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
+log4j.appender.CONSOLE.layout.ConversionPattern=%d{dd.MM HH:mm:ss} %-30.30c %5p %m%n
index 2c5253d9cfa1ed3ba992c4379d6285193ef97728..4f0f81457ea40fda7805955691f82af6234a1ed2 100644 (file)
 ==================================================================== */\r
 package org.apache.poi.hwpf;\r
 \r
+import java.io.ByteArrayInputStream;\r
+import java.io.ByteArrayOutputStream;\r
+import java.io.IOException;\r
+import java.io.InputStream;\r
+import java.util.zip.ZipInputStream;\r
+\r
 import org.apache.poi.POIDataSamples;\r
 import org.apache.poi.poifs.filesystem.POIFSFileSystem;\r
-\r
-import java.io.*;\r
+import org.apache.poi.util.IOUtils;\r
+import org.apache.poi.util.POILogFactory;\r
+import org.apache.poi.util.POILogger;\r
 \r
 public class HWPFTestDataSamples {\r
 \r
+    private static final POILogger logger = POILogFactory\r
+            .getLogger( HWPFTestDataSamples.class );\r
+\r
     public static HWPFDocument openSampleFile(String sampleFileName) {\r
         try {\r
             InputStream is = POIDataSamples.getDocumentInstance().openResourceAsStream(sampleFileName);\r
@@ -31,6 +41,55 @@ public class HWPFTestDataSamples {
             throw new RuntimeException(e);\r
         }\r
     }\r
+\r
+    public static HWPFDocument openSampleFileFromArchive( String sampleFileName )\r
+    {\r
+        final long start = System.currentTimeMillis();\r
+        try\r
+        {\r
+            ZipInputStream is = new ZipInputStream( POIDataSamples\r
+                    .getDocumentInstance()\r
+                    .openResourceAsStream( sampleFileName ) );\r
+            try\r
+            {\r
+                is.getNextEntry();\r
+                ByteArrayOutputStream baos = new ByteArrayOutputStream();\r
+                try\r
+                {\r
+                    IOUtils.copy( is, baos );\r
+                }\r
+                finally\r
+                {\r
+                    baos.close();\r
+                }\r
+\r
+                final long endUnzip = System.currentTimeMillis();\r
+                byte[] byteArray = baos.toByteArray();\r
+\r
+                logger.log( POILogger.DEBUG, "Unzipped in ",\r
+                        Long.valueOf( endUnzip - start ), " ms -- ",\r
+                        Long.valueOf( byteArray.length ), " byte(s)" );\r
+\r
+                ByteArrayInputStream bais = new ByteArrayInputStream( byteArray );\r
+                HWPFDocument doc = new HWPFDocument( bais );\r
+                final long endParse = System.currentTimeMillis();\r
+\r
+                logger.log( POILogger.DEBUG, "Parsed in ",\r
+                        Long.valueOf( endParse - start ), " ms" );\r
+\r
+                return doc;\r
+            }\r
+            finally\r
+            {\r
+                is.close();\r
+            }\r
+        }\r
+        catch ( IOException e )\r
+        {\r
+            throw new RuntimeException( e );\r
+        }\r
+    }\r
+\r
     public static HWPFOldDocument openOldSampleFile(String sampleFileName) {\r
        try {\r
            InputStream is = POIDataSamples.getDocumentInstance().openResourceAsStream(sampleFileName);\r
index 5b13f781fd6381bd8fd8ad395d332b5e14787b7e..60b4df7a33eaea50b612858c82d30b4c4a3f9f6a 100644 (file)
@@ -22,6 +22,7 @@ import java.io.InputStream;
 import java.util.List;
 
 import junit.framework.AssertionFailedError;
+
 import org.apache.commons.codec.digest.DigestUtils;
 import org.apache.poi.EncryptedDocumentException;
 import org.apache.poi.POIDataSamples;
@@ -43,6 +44,7 @@ import org.apache.poi.util.IOUtils;
  */
 public final class TestProblems extends HWPFTestCase {
 
+    
    /**
     * ListEntry passed no ListTable
     */
@@ -825,4 +827,12 @@ public final class TestProblems extends HWPFTestCase {
         }
     }
 
+    /**
+     * Bug 51524 - PapBinTable constructor is slow
+     */
+    public void test51524()
+    {
+        HWPFTestDataSamples.openSampleFileFromArchive( "Bug51524.zip" );
+    }
+
 }