diff options
author | Dominik Stadler <centic@apache.org> | 2017-11-05 20:33:28 +0000 |
---|---|---|
committer | Dominik Stadler <centic@apache.org> | 2017-11-05 20:33:28 +0000 |
commit | ce77cd62701744ba37dc37d46baea7ceb3153f29 (patch) | |
tree | 2f39d0fbc2aa2dd182637db6bec7a068038da885 /src/java | |
parent | d4ad2ffd53609f395141ea39c1dac6e0f34902cc (diff) | |
download | poi-ce77cd62701744ba37dc37d46baea7ceb3153f29.tar.gz poi-ce77cd62701744ba37dc37d46baea7ceb3153f29.zip |
Bug #57517: Fix various things in HSSFOptimiser to make many more cases work fine: Column styles, row styles, user defined styles, ...
Also call optimise in integration-tests and handle some cases of invalid content in files.
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1814373 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'src/java')
4 files changed, 182 insertions, 49 deletions
diff --git a/src/java/org/apache/poi/hssf/model/InternalSheet.java b/src/java/org/apache/poi/hssf/model/InternalSheet.java index 650b4de270..83e1d37907 100644 --- a/src/java/org/apache/poi/hssf/model/InternalSheet.java +++ b/src/java/org/apache/poi/hssf/model/InternalSheet.java @@ -1680,4 +1680,12 @@ public final class InternalSheet { public int getColumnOutlineLevel(int columnIndex) { return _columnInfos.getOutlineLevel(columnIndex); } + + public int getMinColumnIndex() { + return _columnInfos.getMinColumnIndex(); + } + + public int getMaxColumnIndex() { + return _columnInfos.getMaxColumnIndex(); + } } diff --git a/src/java/org/apache/poi/hssf/model/InternalWorkbook.java b/src/java/org/apache/poi/hssf/model/InternalWorkbook.java index 5b9e7c57ae..d16eec62ee 100644 --- a/src/java/org/apache/poi/hssf/model/InternalWorkbook.java +++ b/src/java/org/apache/poi/hssf/model/InternalWorkbook.java @@ -937,6 +937,27 @@ public final class InternalWorkbook { } /** + * Update the StyleRecord to point to the new + * given index. + * + * @param oldXf the extended format index that was previously associated with this StyleRecord + * @param newXf the extended format index that is now associated with this StyleRecord + */ + public void updateStyleRecord(int oldXf, int newXf) { + // Style records always follow after + // the ExtendedFormat records + for(int i=records.getXfpos(); i<records.size(); i++) { + Record r = records.get(i); + if (r instanceof StyleRecord) { + StyleRecord sr = (StyleRecord)r; + if (sr.getXFIndex() == oldXf) { + sr.setXFIndex(newXf); + } + } + } + } + + /** * Creates a new StyleRecord, for the given Extended * Format index, and adds it onto the end of the * records collection diff --git a/src/java/org/apache/poi/hssf/record/aggregates/ColumnInfoRecordsAggregate.java b/src/java/org/apache/poi/hssf/record/aggregates/ColumnInfoRecordsAggregate.java index 83c3a65185..5debe7361c 100644 --- a/src/java/org/apache/poi/hssf/record/aggregates/ColumnInfoRecordsAggregate.java +++ b/src/java/org/apache/poi/hssf/record/aggregates/ColumnInfoRecordsAggregate.java @@ -489,6 +489,7 @@ public final class ColumnInfoRecordsAggregate extends RecordAggregate implements setColumn(i, null, null, Integer.valueOf(level), null, null); } } + /** * Finds the <tt>ColumnInfoRecord</tt> which contains the specified columnIndex * @param columnIndex index of the column (not the index of the ColumnInfoRecord) @@ -504,6 +505,7 @@ public final class ColumnInfoRecordsAggregate extends RecordAggregate implements } return null; } + public int getMaxOutlineLevel() { int result = 0; int count=records.size(); @@ -513,6 +515,7 @@ public final class ColumnInfoRecordsAggregate extends RecordAggregate implements } return result; } + public int getOutlineLevel(int columnIndex) { ColumnInfoRecord ci = findColumnInfo(columnIndex); if (ci != null) { @@ -521,4 +524,34 @@ public final class ColumnInfoRecordsAggregate extends RecordAggregate implements return 0; } } + + public int getMinColumnIndex() { + if(records.isEmpty()) { + return 0; + } + + int minIndex = Integer.MAX_VALUE; + int nInfos = records.size(); + for(int i=0; i< nInfos; i++) { + ColumnInfoRecord ci = getColInfo(i); + minIndex = Math.min(minIndex, ci.getFirstColumn()); + } + + return minIndex; + } + + public int getMaxColumnIndex() { + if(records.isEmpty()) { + return 0; + } + + int maxIndex = 0; + int nInfos = records.size(); + for(int i=0; i< nInfos; i++) { + ColumnInfoRecord ci = getColInfo(i); + maxIndex = Math.max(maxIndex, ci.getLastColumn()); + } + + return maxIndex; + } } diff --git a/src/java/org/apache/poi/hssf/usermodel/HSSFOptimiser.java b/src/java/org/apache/poi/hssf/usermodel/HSSFOptimiser.java index c38fd375b2..f4287ce4b5 100644 --- a/src/java/org/apache/poi/hssf/usermodel/HSSFOptimiser.java +++ b/src/java/org/apache/poi/hssf/usermodel/HSSFOptimiser.java @@ -20,6 +20,7 @@ import java.util.HashSet; import org.apache.poi.hssf.record.ExtendedFormatRecord; import org.apache.poi.hssf.record.FontRecord; +import org.apache.poi.hssf.record.StyleRecord; import org.apache.poi.hssf.record.common.UnicodeString; import org.apache.poi.ss.usermodel.Cell; import org.apache.poi.ss.usermodel.CellType; @@ -185,50 +186,81 @@ public class HSSFOptimiser { // Get each style record, so we can do deletes // without getting confused - ExtendedFormatRecord[] xfrs = new ExtendedFormatRecord[newPos.length]; + ExtendedFormatRecord[] xfrs = new ExtendedFormatRecord[newPos.length]; for(int i=0; i<newPos.length; i++) { xfrs[i] = workbook.getWorkbook().getExFormatAt(i); } - // Loop over each style, seeing if it is the same - // as an earlier one. If it is, point users of the - // later duplicate copy to the earlier one, and - // mark the later one as needing deleting - // Only work on user added ones, which come after 20 - for(int i=21; i<newPos.length; i++) { - // Check this one for being a duplicate - // of an earlier one - int earlierDuplicate = -1; - for(int j=0; j<i && earlierDuplicate == -1; j++) { - ExtendedFormatRecord xfCheck = workbook.getWorkbook().getExFormatAt(j); - if(xfCheck.equals(xfrs[i])) { - earlierDuplicate = j; - } - } + // Loop over each style, seeing if it is the same + // as an earlier one. If it is, point users of the + // later duplicate copy to the earlier one, and + // mark the later one as needing deleting + // Only work on user added ones, which come after 20 + for (int i = 21; i < newPos.length; i++) { + // Check this one for being a duplicate + // of an earlier one + int earlierDuplicate = -1; + for (int j = 0; j < i && earlierDuplicate == -1; j++) { + ExtendedFormatRecord xfCheck = workbook.getWorkbook().getExFormatAt(j); + if (xfCheck.equals(xfrs[i]) && + // newer duplicate user defined styles + !isUserDefined(workbook, j)) { + earlierDuplicate = j; + } + } // If we got a duplicate, mark it as such if(earlierDuplicate != -1) { newPos[i] = (short)earlierDuplicate; zapRecords[i] = true; } - // If we got a duplicate, mark the one we're keeping as used - if(earlierDuplicate != -1) { - isUsed[earlierDuplicate] = true; - } } - // Loop over all the cells in the file, and identify any user defined - // styles aren't actually being used (don't touch built-in ones) - for(int sheetNum=0; sheetNum<workbook.getNumberOfSheets(); sheetNum++) { - HSSFSheet s = workbook.getSheetAt(sheetNum); - for (Row row : s) { - for (Cell cellI : row) { - HSSFCell cell = (HSSFCell)cellI; - short oldXf = cell.getCellValueRecord().getXFIndex(); - isUsed[oldXf] = true; - } - } - } + // Loop over all the cells in the file, and identify any user defined + // styles aren't actually being used (don't touch built-in ones) + for (int sheetNum = 0; sheetNum < workbook.getNumberOfSheets(); sheetNum++) { + HSSFSheet s = workbook.getSheetAt(sheetNum); + for (Row row : s) { + for (Cell cellI : row) { + HSSFCell cell = (HSSFCell) cellI; + short oldXf = cell.getCellValueRecord().getXFIndex(); + // some documents contain invalid values here + if(oldXf < newPos.length) { + isUsed[oldXf] = true; + } + } + + // also mark row style as being used + short oldXf = ((HSSFRow) row).getRowRecord().getXFIndex(); + // some documents contain invalid values here + if(oldXf < newPos.length) { + isUsed[oldXf] = true; + } + } + + // also mark column styles as being used + for (int col = s.getSheet().getMinColumnIndex(); col <= s.getSheet().getMaxColumnIndex(); col++) { + short oldXf = s.getSheet().getXFIndexForColAt((short) col); + // some documents contain invalid values here + if(oldXf < newPos.length) { + isUsed[oldXf] = true; + } + } + } + + // Propagate isUsed for duplicates and always set user styles to being used to never optimize them away + for (int i = 21; i < isUsed.length; i++) { + // user defined styles are always "used" + if (isUserDefined(workbook, i)) { + isUsed[i] = true; + } + + // If we got a duplicate which is used, mark the one we're keeping as used + if(newPos[i] != i && isUsed[i]) { + isUsed[newPos[i]] = true; + } + } + // Mark any that aren't used as needing zapping for (int i=21; i<isUsed.length; i++) { if (! isUsed[i]) { @@ -251,9 +283,21 @@ public class HSSFOptimiser { if(zapRecords[j]) newPosition--; } - // Update the new position - newPos[i] = newPosition; - } + // Update the new position + newPos[i] = newPosition; + // also update StyleRecord and Parent-link + if (i != newPosition && newPosition != 0) { + workbook.getWorkbook().updateStyleRecord(i, newPosition); + + ExtendedFormatRecord exFormat = workbook.getWorkbook().getExFormatAt(i); + short oldParent = exFormat.getParentIndex(); + // some documents contain invalid values here + if(oldParent < newPos.length) { + short newParent = newPos[oldParent]; + exFormat.setParentIndex(newParent); + } + } + } // Zap the un-needed user style records // removing by index, because removing by object may delete @@ -269,20 +313,47 @@ public class HSSFOptimiser { } } - // Finally, update the cells to point at their new extended format records - for(int sheetNum=0; sheetNum<workbook.getNumberOfSheets(); sheetNum++) { - HSSFSheet s = workbook.getSheetAt(sheetNum); - for (Row row : s) { - for (Cell cellI : row) { - HSSFCell cell = (HSSFCell)cellI; - short oldXf = cell.getCellValueRecord().getXFIndex(); + // Finally, update the cells to point at their new extended format records + for (int sheetNum = 0; sheetNum < workbook.getNumberOfSheets(); sheetNum++) { + HSSFSheet s = workbook.getSheetAt(sheetNum); + for (Row row : s) { + for (Cell cell : row) { + short oldXf = ((HSSFCell) cell).getCellValueRecord().getXFIndex(); + // some documents contain invalid values here + if(oldXf >= newPos.length) { + continue; + } + HSSFCellStyle newStyle = workbook.getCellStyleAt(newPos[oldXf]); + cell.setCellStyle(newStyle); + } + + // adjust row column style + short oldXf = ((HSSFRow) row).getRowRecord().getXFIndex(); + // some documents contain invalid values here + if(oldXf >= newPos.length) { + continue; + } + HSSFCellStyle newStyle = workbook.getCellStyleAt(newPos[oldXf]); + row.setRowStyle(newStyle); + } - HSSFCellStyle newStyle = workbook.getCellStyleAt( - newPos[oldXf] - ); - cell.setCellStyle(newStyle); - } - } - } + // adjust cell column style + for (int col = s.getSheet().getMinColumnIndex(); col <= s.getSheet().getMaxColumnIndex(); col++) { + short oldXf = s.getSheet().getXFIndexForColAt((short) col); + // some documents contain invalid values here + if(oldXf >= newPos.length) { + continue; + } + HSSFCellStyle newStyle = workbook.getCellStyleAt(newPos[oldXf]); + s.setDefaultColumnStyle(col, newStyle); + } + } } + + private static boolean isUserDefined(HSSFWorkbook workbook, int index) { + StyleRecord styleRecord = workbook.getWorkbook().getStyleRecord(index); + return styleRecord != null && + !styleRecord.isBuiltin() && + styleRecord.getName() != null; + } } |