From e29279896e0ed8ac3f05de980dffa86d6fc26cc4 Mon Sep 17 00:00:00 2001
From: Javen O'Neal
Date: Mon, 30 Nov 2015 00:22:50 +0000
Subject: patch from Stefan Thurnherr: bug 57450: autosize columns in SXSSF
using rows that have been flushed to disk
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1717146 13f79535-47bb-0310-9956-ffa450edef68
---
src/java/org/apache/poi/ss/util/SheetUtil.java | 4 +-
.../poi/xssf/streaming/AutoSizeColumnTracker.java | 371 +++++++++++++++++++++
.../org/apache/poi/xssf/streaming/SXSSFSheet.java | 181 +++++++++-
.../xssf/streaming/TestAutoSizeColumnTracker.java | 203 +++++++++++
.../apache/poi/xssf/streaming/TestSXSSFSheet.java | 8 +
.../streaming/TestSXSSFSheetAutoSizeColumn.java | 363 ++++++++++++++++++++
.../apache/poi/xssf/usermodel/TestSXSSFBugs.java | 7 +
.../poi/ss/usermodel/BaseTestBugzillaIssues.java | 7 +
.../org/apache/poi/ss/usermodel/BaseTestSheet.java | 5 +
.../ss/usermodel/BaseTestSheetAutosizeColumn.java | 11 +
10 files changed, 1148 insertions(+), 12 deletions(-)
create mode 100644 src/ooxml/java/org/apache/poi/xssf/streaming/AutoSizeColumnTracker.java
create mode 100644 src/ooxml/testcases/org/apache/poi/xssf/streaming/TestAutoSizeColumnTracker.java
create mode 100644 src/ooxml/testcases/org/apache/poi/xssf/streaming/TestSXSSFSheetAutoSizeColumn.java
diff --git a/src/java/org/apache/poi/ss/util/SheetUtil.java b/src/java/org/apache/poi/ss/util/SheetUtil.java
index 8d63e9b360..2103fdf2f7 100644
--- a/src/java/org/apache/poi/ss/util/SheetUtil.java
+++ b/src/java/org/apache/poi/ss/util/SheetUtil.java
@@ -36,6 +36,7 @@ import org.apache.poi.ss.usermodel.RichTextString;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
+import org.apache.poi.util.Internal;
/**
@@ -244,7 +245,8 @@ public class SheetUtil {
* @param wb the workbook to get the default character width from
* @return default character width in pixels
*/
- private static int getDefaultCharWidth(final Workbook wb) {
+ @Internal
+ public static int getDefaultCharWidth(final Workbook wb) {
Font defaultFont = wb.getFontAt((short) 0);
AttributedString str = new AttributedString(String.valueOf(defaultChar));
diff --git a/src/ooxml/java/org/apache/poi/xssf/streaming/AutoSizeColumnTracker.java b/src/ooxml/java/org/apache/poi/xssf/streaming/AutoSizeColumnTracker.java
new file mode 100644
index 0000000000..f350469110
--- /dev/null
+++ b/src/ooxml/java/org/apache/poi/xssf/streaming/AutoSizeColumnTracker.java
@@ -0,0 +1,371 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.streaming;
+
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+import java.util.SortedSet;
+import java.util.TreeSet;
+
+import org.apache.poi.ss.usermodel.Cell;
+import org.apache.poi.ss.usermodel.DataFormatter;
+import org.apache.poi.ss.usermodel.Row;
+import org.apache.poi.ss.usermodel.Sheet;
+import org.apache.poi.ss.util.SheetUtil;
+import org.apache.poi.util.Internal;
+
+/**
+ * Tracks best fit column width for rows of an {@link SXSSFSheet},
+ * to be able to correctly calculate auto-sized column widths even
+ * if some rows are already flushed to disk.
+ * This is an auxiliary data structure that uses a TreeMap containing
+ * one entry per tracked column, where the key is the column index and
+ * the value is a pair of doubles. This data structure's memory footprint
+ * is linear with the number of *tracked* columns and invariant with
+ * the number of rows or columns in the sheet.
+ * @since 3.14beta1
+*/
+@Internal
+/*package*/ class AutoSizeColumnTracker {
+ private final int defaultCharWidth;
+ private final DataFormatter dataFormatter = new DataFormatter();
+
+ // map of tracked columns, with values containing the best-fit width for the column
+ // Using a HashMap instead of a TreeMap because insertion (trackColumn), removal (untrackColumn), and membership (everything)
+ // will be called more frequently than getTrackedColumns(). The O(1) cost of insertion, removal, and membership operations
+ // outweigh the infrequent O(n*log n) cost of sorting getTrackedColumns().
+ // Memory consumption for a HashMap and TreeMap is about the same
+ private final Map maxColumnWidths = new HashMap();
+ // untrackedColumns stores columns have been explicitly untracked so they aren't implicitly re-tracked by trackAllColumns
+ // Using a HashSet instead of a TreeSet because we don't care about order.
+ private final Set untrackedColumns = new HashSet();
+ private boolean trackAllColumns = false;
+
+ /**
+ * Tuple to store the column widths considering and not considering merged cells
+ * If more permutations are needed, it may be prudent to require the user to specify
+ * how they intend to auto-size a column when they track the column, so calculations
+ * are limited to the desired intentions. Unless this proves to be a performance problem,
+ * it's probably better to let the user defer how they want to auto-size to SXSSFSheet.autoSizeColumn,
+ * rather than twice (via SXSSFSheet.trackColumn(int column, boolean useMergedCells) and again at
+ * SXSFSheet.autoSizeColumn(int column, boolean useMergedCells))
+ * @since 3.14beta1
+ */
+ private static class ColumnWidthPair {
+ private double withSkipMergedCells;
+ private double withUseMergedCells;
+
+ public ColumnWidthPair() {
+ this(-1.0, -1.0);
+ }
+
+ public ColumnWidthPair(final double columnWidthSkipMergedCells, final double columnWidthUseMergedCells) {
+ withSkipMergedCells = columnWidthSkipMergedCells;
+ withUseMergedCells = columnWidthUseMergedCells;
+ }
+
+ /**
+ * Gets the current best-fit column width for the provided settings
+ *
+ * @param useMergedCells true if merged cells are considered into the best-fit column width calculation
+ * @return best fit column width, measured in default character widths.
+ */
+ public double getMaxColumnWidth(final boolean useMergedCells) {
+ return useMergedCells ? withUseMergedCells : withSkipMergedCells;
+ }
+
+ /**
+ * Sets the best-fit column width to the maximum of the current width and the provided width
+ *
+ * @param unmergedWidth the best-fit column width calculated with useMergedCells=False
+ * @param mergedWidth the best-fit column width calculated with useMergedCells=True
+ */
+ public void setMaxColumnWidths(double unmergedWidth, double mergedWidth) {
+ withUseMergedCells = Math.max(withUseMergedCells, mergedWidth);
+ withSkipMergedCells = Math.max(withUseMergedCells, unmergedWidth);
+ }
+ }
+
+ /**
+ * AutoSizeColumnTracker constructor. Holds no reference to sheet
+ *
+ * @param sheet the sheet associated with this auto-size column tracker
+ * @since 3.14beta1
+ */
+ public AutoSizeColumnTracker(final Sheet sheet) {
+ // If sheet needs to be saved, use a java.lang.ref.WeakReference to avoid garbage collector gridlock.
+ defaultCharWidth = SheetUtil.getDefaultCharWidth(sheet.getWorkbook());
+ }
+
+ /**
+ * Get the currently tracked columns, naturally ordered.
+ * Note if all columns are tracked, this will only return the columns that have been explicitly or implicitly tracked,
+ * which is probably only columns containing 1 or more non-blank values
+ *
+ * @return a set of the indices of all tracked columns
+ * @since 3.14beta1
+ */
+ public SortedSet getTrackedColumns() {
+ SortedSet sorted = new TreeSet(maxColumnWidths.keySet());
+ return Collections.unmodifiableSortedSet(sorted);
+ }
+
+ /**
+ * Returns true if column is currently tracked for auto-sizing.
+ *
+ * @param column the index of the column to check
+ * @return true if column is tracked
+ * @since 3.14beta1
+ */
+ public boolean isColumnTracked(int column) {
+ return trackAllColumns || maxColumnWidths.containsKey(column);
+ }
+
+ /**
+ * Returns true if all columns are implicitly tracked.
+ *
+ * @return true if all columns are implicitly tracked
+ * @since 3.14beta1
+ */
+ public boolean isAllColumnsTracked() {
+ return trackAllColumns;
+ }
+
+ /**
+ * Tracks all non-blank columns
+ * Allows columns that have been explicitly untracked to be tracked
+ * @since 3.14beta1
+ */
+ public void trackAllColumns() {
+ trackAllColumns = true;
+ untrackedColumns.clear();
+ }
+
+ /**
+ * Untrack all columns that were previously tracked for auto-sizing.
+ * All best-fit column widths are forgotten.
+ * @since 3.14beta1
+ */
+ public void untrackAllColumns() {
+ trackAllColumns = false;
+ maxColumnWidths.clear();
+ untrackedColumns.clear();
+ }
+
+ /**
+ * Marks multiple columns for inclusion in auto-size column tracking.
+ * Note this has undefined behavior if columns are tracked after one or more rows are written to the sheet.
+ * Any column in columns that are already tracked are ignored by this call.
+ *
+ * @param columns the indices of the columns to track
+ * @since 3.14beta1
+ */
+ public void trackColumns(Collection columns)
+ {
+ for (final int column : columns) {
+ trackColumn(column);
+ }
+ }
+
+ /**
+ * Marks a column for inclusion in auto-size column tracking.
+ * Note this has undefined behavior if a column is tracked after one or more rows are written to the sheet.
+ * If column is already tracked, this call does nothing.
+ *
+ * @param column the index of the column to track for auto-sizing
+ * @return if column is already tracked, the call does nothing and returns false
+ * @since 3.14beta1
+ */
+ public boolean trackColumn(int column) {
+ untrackedColumns.remove(column);
+ if (!maxColumnWidths.containsKey(column)) {
+ maxColumnWidths.put(column, new ColumnWidthPair());
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Implicitly track a column if it has not been explicitly untracked
+ * If it has been explicitly untracked, this call does nothing and returns false.
+ * Otherwise return true
+ *
+ * @param column the column to implicitly track
+ * @return false if column has been explicitly untracked, otherwise return true
+ */
+ private boolean implicitlyTrackColumn(int column) {
+ if (!untrackedColumns.contains(column)) {
+ trackColumn(column);
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Removes columns that were previously marked for inclusion in auto-size column tracking.
+ * When a column is untracked, the best-fit width is forgotten.
+ * Any column in columns that is not tracked will be ignored by this call.
+ *
+ * @param columns the indices of the columns to track for auto-sizing
+ * @return true if one or more columns were untracked as a result of this call
+ * @since 3.14beta1
+ */
+ public boolean untrackColumns(Collection columns)
+ {
+ untrackedColumns.addAll(columns);
+ return maxColumnWidths.keySet().removeAll(columns);
+ }
+
+ /**
+ * Removes a column that was previously marked for inclusion in auto-size column tracking.
+ * When a column is untracked, the best-fit width is forgotten.
+ * If column is not tracked, it will be ignored by this call.
+ *
+ * @param column the index of the column to track for auto-sizing
+ * @return true if column was tracked prior this call, false if no action was taken
+ * @since 3.14beta1
+ */
+ public boolean untrackColumn(int column) {
+ untrackedColumns.add(column);
+ return maxColumnWidths.keySet().remove(column);
+ }
+
+ /**
+ * Get the best-fit width of a tracked column
+ *
+ * @param column the index of the column to get the current best-fit width of
+ * @param useMergedCells true if merged cells should be considered when computing the best-fit width
+ * @return best-fit column width, measured in units of 1/256th of a character width
+ * @throws IllegalStateException if column is not tracked and trackAllColumns is false
+ * @since 3.14beta1
+ */
+ public int getBestFitColumnWidth(int column, boolean useMergedCells) {
+ if (!maxColumnWidths.containsKey(column)) {
+ // if column is not tracked, implicitly track the column if trackAllColumns is True and column has not been explicitly untracked
+ if (trackAllColumns) {
+ if (!implicitlyTrackColumn(column)) {
+ final Throwable reason = new IllegalStateException(
+ "Column was explicitly untracked after trackAllColumns() was called.");
+ throw new IllegalStateException(
+ "Cannot get best fit column width on explicitly untracked column " + column + ". " +
+ "Either explicitly track the column or track all columns.", reason);
+ }
+ }
+ else {
+ final Throwable reason = new IllegalStateException(
+ "Column was never explicitly tracked and isAllColumnsTracked() is false " +
+ "(trackAllColumns() was never called or untrackAllColumns() was called after trackAllColumns() was called).");
+ throw new IllegalStateException(
+ "Cannot get best fit column width on untracked column " + column + ". " +
+ "Either explicitly track the column or track all columns.", reason);
+ }
+ }
+ final double width = maxColumnWidths.get(column).getMaxColumnWidth(useMergedCells);
+ return (int) (256*width);
+ }
+
+
+
+ /**
+ * Calculate the best fit width for each tracked column in row
+ *
+ * @param row the row to get the cells
+ * @since 3.14beta1
+ */
+ public void updateColumnWidths(Row row) {
+ // track new columns
+ implicitlyTrackColumnsInRow(row);
+
+ // update the widths
+ // for-loop over the shorter of the number of cells in the row and the number of tracked columns
+ // these two for-loops should do the same thing
+ if (maxColumnWidths.size() < row.getPhysicalNumberOfCells()) {
+ // loop over the tracked columns, because there are fewer tracked columns than cells in this row
+ for (final Entry e : maxColumnWidths.entrySet()) {
+ final int column = e.getKey();
+ final Cell cell = row.getCell(column); //is MissingCellPolicy=Row.RETURN_NULL_AND_BLANK needed?
+
+ // FIXME: if cell belongs to a merged region, some of the merged region may have fallen outside of the random access window
+ // In this case, getting the column width may result in an error. Need to gracefully handle this.
+
+ // FIXME: Most cells are not merged, so calling getCellWidth twice re-computes the same value twice.
+ // Need to rewrite this to avoid unnecessary computation if this proves to be a performance bottleneck.
+
+ if (cell != null) {
+ final ColumnWidthPair pair = e.getValue();
+ updateColumnWidth(cell, pair);
+ }
+ }
+ }
+ else {
+ // loop over the cells in this row, because there are fewer cells in this row than tracked columns
+ for (final Cell cell : row) {
+ final int column = cell.getColumnIndex();
+
+ // FIXME: if cell belongs to a merged region, some of the merged region may have fallen outside of the random access window
+ // In this case, getting the column width may result in an error. Need to gracefully handle this.
+
+ // FIXME: Most cells are not merged, so calling getCellWidth twice re-computes the same value twice.
+ // Need to rewrite this to avoid unnecessary computation if this proves to be a performance bottleneck.
+
+ if (maxColumnWidths.containsKey(column)) {
+ final ColumnWidthPair pair = maxColumnWidths.get(column);
+ updateColumnWidth(cell, pair);
+ }
+ }
+ }
+ }
+
+ /**
+ * Helper for {@link #updateColumnWidths(Row)}.
+ * Implicitly track the columns corresponding to the cells in row.
+ * If all columns in the row are already tracked, this call does nothing.
+ * Explicitly untracked columns will not be tracked.
+ *
+ * @param row the row containing cells to implicitly track the columns
+ * @since 3.14beta1
+ */
+ private void implicitlyTrackColumnsInRow(Row row) {
+ // track new columns
+ if (trackAllColumns) {
+ // if column is not tracked, implicitly track the column if trackAllColumns is True and column has not been explicitly untracked
+ for (final Cell cell : row) {
+ final int column = cell.getColumnIndex();
+ implicitlyTrackColumn(column);
+ }
+ }
+ }
+
+ /**
+ * Helper for {@link #updateColumnWidths(Row)}.
+ *
+ * @param cell the cell to compute the best fit width on
+ * @param pair the column width pair to update
+ * @since 3.14beta1
+ */
+ private void updateColumnWidth(final Cell cell, final ColumnWidthPair pair) {
+ final double unmergedWidth = SheetUtil.getCellWidth(cell, defaultCharWidth, dataFormatter, false);
+ final double mergedWidth = SheetUtil.getCellWidth(cell, defaultCharWidth, dataFormatter, true);
+ pair.setMaxColumnWidths(unmergedWidth, mergedWidth);
+ }
+}
diff --git a/src/ooxml/java/org/apache/poi/xssf/streaming/SXSSFSheet.java b/src/ooxml/java/org/apache/poi/xssf/streaming/SXSSFSheet.java
index 9134eb97db..6e0354de52 100644
--- a/src/ooxml/java/org/apache/poi/xssf/streaming/SXSSFSheet.java
+++ b/src/ooxml/java/org/apache/poi/xssf/streaming/SXSSFSheet.java
@@ -19,9 +19,11 @@ package org.apache.poi.xssf.streaming;
import java.io.IOException;
import java.io.InputStream;
+import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
+import java.util.Set;
import java.util.TreeMap;
import org.apache.poi.hssf.util.PaneInformation;
@@ -62,6 +64,7 @@ public class SXSSFSheet implements Sheet, Cloneable
private final TreeMap _rows=new TreeMap();
private final SheetDataWriter _writer;
private int _randomAccessWindowSize = SXSSFWorkbook.DEFAULT_WINDOW_SIZE;
+ private final AutoSizeColumnTracker _autoSizeColumnTracker;
private int outlineLevelRow = 0;
private int lastFlushedRowNumber = -1;
private boolean allFlushed = false;
@@ -71,6 +74,7 @@ public class SXSSFSheet implements Sheet, Cloneable
_sh = xSheet;
_writer = workbook.createSheetDataWriter();
setRandomAccessWindowSize(_workbook.getRandomAccessWindowSize());
+ _autoSizeColumnTracker = new AutoSizeColumnTracker(this);
}
/**
@@ -1377,6 +1381,118 @@ public class SXSSFSheet implements Sheet, Cloneable
{
_sh.setDefaultColumnStyle(column, style);
}
+
+
+ /**
+ * Track a column in the sheet for auto-sizing.
+ * Note this has undefined behavior if a column is tracked after one or more rows are written to the sheet.
+ * If column is already tracked, this call does nothing.
+ *
+ * @param column the column to track for autosizing
+ * @since 3.14beta1
+ * @see #trackColumnsForAutoSizing(Collection)
+ * @see #trackAllColumnsForAutoSizing()
+ */
+ public void trackColumnForAutoSizing(int column)
+ {
+ _autoSizeColumnTracker.trackColumn(column);
+ }
+
+ /**
+ * Track several columns in the sheet for auto-sizing.
+ * Note this has undefined behavior if columns are tracked after one or more rows are written to the sheet.
+ * Any column in columns that are already tracked are ignored by this call.
+ *
+ * @param columns the columns to track for autosizing
+ * @since 3.14beta1
+ */
+ public void trackColumnsForAutoSizing(Collection columns)
+ {
+ _autoSizeColumnTracker.trackColumns(columns);
+ }
+
+ /**
+ * Tracks all columns in the sheet for auto-sizing. If this is called, individual columns do not need to be tracked.
+ * Because determining the best-fit width for a cell is expensive, this may affect the performance.
+ * @since 3.14beta1
+ */
+ public void trackAllColumnsForAutoSizing()
+ {
+ _autoSizeColumnTracker.trackAllColumns();
+ }
+
+ /**
+ * Removes a column that was previously marked for inclusion in auto-size column tracking.
+ * When a column is untracked, the best-fit width is forgotten.
+ * If column is not tracked, it will be ignored by this call.
+ *
+ * @param column the index of the column to track for auto-sizing
+ * @return true if column was tracked prior to being untracked, false if no action was taken
+ */
+ /**
+ *
+ *
+ * @param column the index of the column to track for auto-sizing
+ * @return true if column was tracked prior to this call, false if no action was taken
+ * @since 3.14beta1
+ * @see #untrackColumnsForAutoSizing(Collection)
+ * @see #untrackAllColumnsForAutoSizing(int)
+ */
+ public boolean untrackColumnForAutoSizing(int column)
+ {
+ return _autoSizeColumnTracker.untrackColumn(column);
+ }
+
+ /**
+ * Untracks several columns in the sheet for auto-sizing.
+ * When a column is untracked, the best-fit width is forgotten.
+ * Any column in columns that is not tracked will be ignored by this call.
+ *
+ * @param columns the indices of the columns to track for auto-sizing
+ * @return true if one or more columns were untracked as a result of this call
+ *
+ * @param columns the columns to track for autosizing
+ * @since 3.14beta1
+ */
+ public boolean untrackColumnsForAutoSizing(Collection columns)
+ {
+ return _autoSizeColumnTracker.untrackColumns(columns);
+ }
+
+ /**
+ * Untracks all columns in the sheet for auto-sizing. Best-fit column widths are forgotten.
+ * If this is called, individual columns do not need to be untracked.
+ * @since 3.14beta1
+ */
+ public void untrackAllColumnsForAutoSizing()
+ {
+ _autoSizeColumnTracker.untrackAllColumns();
+ }
+
+ /**
+ * Returns true if column is currently tracked for auto-sizing.
+ *
+ * @param column the index of the column to check
+ * @return true if column is tracked
+ * @since 3.14beta1
+ */
+ public boolean isColumnTrackedForAutoSizing(int column)
+ {
+ return _autoSizeColumnTracker.isColumnTracked(column);
+ }
+
+ /**
+ * Get the currently tracked columns for auto-sizing.
+ * Note if all columns are tracked, this will only return the columns that have been explicitly or implicitly tracked,
+ * which is probably only columns containing 1 or more non-blank values
+ *
+ * @return a set of the indices of all tracked columns
+ * @since 3.14beta1
+ */
+ public Set getTrackedColumnsForAutoSizing()
+ {
+ return _autoSizeColumnTracker.getTrackedColumns();
+ }
/**
* Adjusts the column width to fit the contents.
@@ -1388,8 +1504,17 @@ public class SXSSFSheet implements Sheet, Cloneable
*
* You can specify whether the content of merged cells should be considered or ignored.
* Default is to ignore merged cells.
+ *
+ *
+ * Special note about SXSSF implementation: You must register the columns you wish to track with
+ * the SXSSFSheet using {@link #trackColumnForAutoSizing(int)} or {@link #trackAllColumnsForAutoSizing()}.
+ * This is needed because the rows needed to compute the column width may have fallen outside the
+ * random access window and been flushed to disk.
+ * Tracking columns is required even if all rows are in the random access window.
+ *
+ *
New in POI 3.14 beta 1: auto-sizes columns using cells from current and flushed rows.
*
- * @param column the column index
+ * @param column the column index to auto-size
*/
@Override
public void autoSizeColumn(int column)
@@ -1406,22 +1531,54 @@ public class SXSSFSheet implements Sheet, Cloneable
*
* You can specify whether the content of merged cells should be considered or ignored.
* Default is to ignore merged cells.
- *
- * @param column the column index
+ *
+ *
+ * Special note about SXSSF implementation: You must register the columns you wish to track with
+ * the SXSSFSheet using {@link #trackColumnForAutoSizing(int)} or {@link #trackAllColumnsForAutoSizing()}.
+ * This is needed because the rows needed to compute the column width may have fallen outside the
+ * random access window and been flushed to disk.
+ * Tracking columns is required even if all rows are in the random access window.
+ *
+ *
New in POI 3.14 beta 1: auto-sizes columns using cells from current and flushed rows.
+ *
+ * @param column the column index to auto-size
* @param useMergedCells whether to use the contents of merged cells when calculating the width of the column
*/
@Override
public void autoSizeColumn(int column, boolean useMergedCells)
{
- double width = SheetUtil.getColumnWidth(this, column, useMergedCells);
+ // Multiple calls to autoSizeColumn need to look up the best-fit width
+ // of rows already flushed to disk plus re-calculate the best-fit width
+ // of rows in the current window. It isn't safe to update the column
+ // widths before flushing to disk because columns in the random access
+ // window rows may change in best-fit width. The best-fit width of a cell
+ // is only fixed when it becomes inaccessible for modification.
+ // Changes to the shared strings table, styles table, or formulas might
+ // be able to invalidate the auto-size width without the opportunity
+ // to recalculate the best-fit width for the flushed rows. This is an
+ // inherent limitation of SXSSF. If having correct auto-sizing is
+ // critical, the flushed rows would need to be re-read by the read-only
+ // XSSF eventmodel (SAX) or the memory-heavy XSSF usermodel (DOM).
+ final int flushedWidth;
+ try {
+ // get the best fit width of rows already flushed to disk
+ flushedWidth = _autoSizeColumnTracker.getBestFitColumnWidth(column, useMergedCells);
+ }
+ catch (final IllegalStateException e) {
+ throw new IllegalStateException("Could not auto-size column. Make sure the column was tracked prior to auto-sizing the column.", e);
+ }
+
+ // get the best-fit width of rows currently in the random access window
+ final int activeWidth = (int) (256 * SheetUtil.getColumnWidth(this, column, useMergedCells));
- if (width != -1) {
- width *= 256;
- int maxColumnWidth = 255*256; // The maximum column width for an individual cell is 255 characters
- if (width > maxColumnWidth) {
- width = maxColumnWidth;
- }
- setColumnWidth(column, (int)(width));
+ // the best-fit width for both flushed rows and random access window rows
+ // flushedWidth or activeWidth may be negative if column contains only blank cells
+ final int bestFitWidth = Math.max(flushedWidth, activeWidth);
+
+ if (bestFitWidth > 0) {
+ final int maxColumnWidth = 255*256; // The maximum column width for an individual cell is 255 characters
+ final int width = Math.min(bestFitWidth, maxColumnWidth);
+ setColumnWidth(column, width);
}
}
@@ -1681,6 +1838,8 @@ public class SXSSFSheet implements Sheet, Cloneable
if (firstRowNum!=null) {
int rowIndex = firstRowNum.intValue();
SXSSFRow row = _rows.get(firstRowNum);
+ // Update the best fit column widths for auto-sizing just before the rows are flushed
+ _autoSizeColumnTracker.updateColumnWidths(row);
_writer.writeRow(rowIndex, row);
_rows.remove(firstRowNum);
lastFlushedRowNumber = rowIndex;
diff --git a/src/ooxml/testcases/org/apache/poi/xssf/streaming/TestAutoSizeColumnTracker.java b/src/ooxml/testcases/org/apache/poi/xssf/streaming/TestAutoSizeColumnTracker.java
new file mode 100644
index 0000000000..ac520f27be
--- /dev/null
+++ b/src/ooxml/testcases/org/apache/poi/xssf/streaming/TestAutoSizeColumnTracker.java
@@ -0,0 +1,203 @@
+package org.apache.poi.xssf.streaming;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+import static org.junit.Assume.assumeFalse;
+import static org.junit.Assume.assumeTrue;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.SortedSet;
+import java.util.TreeSet;
+
+import org.apache.poi.ss.usermodel.Cell;
+import org.apache.poi.ss.usermodel.Font;
+import org.apache.poi.ss.usermodel.Row;
+import org.apache.poi.ss.usermodel.Workbook;
+import org.apache.poi.ss.util.CellRangeAddress;
+import org.apache.poi.ss.util.SheetUtil;
+import org.junit.After;
+import org.junit.Assume;
+import org.junit.Before;
+import org.junit.Test;
+
+
+/**
+ * Tests the auto-sizing behaviour of {@link SXSSFSheet} when not all
+ * rows fit into the memory window size etc.
+ *
+ * @see Bug #57450 which reported the original mis-behaviour
+ */
+public class TestAutoSizeColumnTracker {
+
+ private SXSSFSheet sheet;
+ private SXSSFWorkbook workbook;
+ private AutoSizeColumnTracker tracker;
+ private static final SortedSet columns;
+ static {
+ SortedSet_columns = new TreeSet();
+ _columns.add(0);
+ _columns.add(1);
+ _columns.add(3);
+ columns = Collections.unmodifiableSortedSet(_columns);
+ }
+ private final static String SHORT_MESSAGE = "short";
+ private final static String LONG_MESSAGE = "This is a test of a long message! This is a test of a long message!";
+
+ @Before
+ public void setUpSheetAndWorkbook() {
+ workbook = new SXSSFWorkbook();
+ sheet = workbook.createSheet();
+ tracker = new AutoSizeColumnTracker(sheet);
+ }
+
+ @After
+ public void tearDownSheetAndWorkbook() throws IOException {
+ if (sheet != null) {
+ sheet.dispose();
+ }
+ if (workbook != null) {
+ workbook.close();
+ }
+ }
+
+ @Test
+ public void trackAndUntrackColumn() {
+ assumeTrue(tracker.getTrackedColumns().isEmpty());
+ tracker.trackColumn(0);
+ Set expected = new HashSet();
+ expected.add(0);
+ assertEquals(expected, tracker.getTrackedColumns());
+ tracker.untrackColumn(0);
+ assertTrue(tracker.getTrackedColumns().isEmpty());
+ }
+
+ @Test
+ public void trackAndUntrackColumns() {
+ assumeTrue(tracker.getTrackedColumns().isEmpty());
+ tracker.trackColumns(columns);
+ assertEquals(columns, tracker.getTrackedColumns());
+ tracker.untrackColumn(3);
+ tracker.untrackColumn(0);
+ tracker.untrackColumn(1);
+ assertTrue(tracker.getTrackedColumns().isEmpty());
+ tracker.trackColumn(0);
+ tracker.trackColumns(columns);
+ tracker.untrackColumn(4);
+ assertEquals(columns, tracker.getTrackedColumns());
+ tracker.untrackColumns(columns);
+ assertTrue(tracker.getTrackedColumns().isEmpty());
+ }
+
+ @Test
+ public void trackAndUntrackAllColumns() {
+ assumeTrue(tracker.getTrackedColumns().isEmpty());
+ tracker.trackAllColumns();
+ assertTrue(tracker.getTrackedColumns().isEmpty());
+
+ Row row = sheet.createRow(0);
+ for (int column : columns) {
+ row.createCell(column);
+ }
+ // implicitly track the columns
+ tracker.updateColumnWidths(row);
+ assertEquals(columns, tracker.getTrackedColumns());
+
+ tracker.untrackAllColumns();
+ assertTrue(tracker.getTrackedColumns().isEmpty());
+ }
+
+ @Test
+ public void isColumnTracked() {
+ assumeFalse(tracker.isColumnTracked(0));
+ tracker.trackColumn(0);
+ assertTrue(tracker.isColumnTracked(0));
+ tracker.untrackColumn(0);
+ assertFalse(tracker.isColumnTracked(0));
+ }
+
+ @Test
+ public void getTrackedColumns() {
+ assumeTrue(tracker.getTrackedColumns().isEmpty());
+
+ for (int column : columns) {
+ tracker.trackColumn(column);
+ }
+
+ assertEquals(3, tracker.getTrackedColumns().size());
+ assertEquals(columns, tracker.getTrackedColumns());
+ }
+
+ @Test
+ public void isAllColumnsTracked() {
+ assertFalse(tracker.isAllColumnsTracked());
+ tracker.trackAllColumns();
+ assertTrue(tracker.isAllColumnsTracked());
+ tracker.untrackAllColumns();
+ assertFalse(tracker.isAllColumnsTracked());
+ }
+
+ @Test
+ public void updateColumnWidths_and_getBestFitColumnWidth() {
+ tracker.trackAllColumns();
+ Row row1 = sheet.createRow(0);
+ Row row2 = sheet.createRow(1);
+ // A1, B1, D1
+ for (int column : columns) {
+ row1.createCell(column).setCellValue(LONG_MESSAGE);
+ row2.createCell(column+1).setCellValue(SHORT_MESSAGE);
+ }
+ tracker.updateColumnWidths(row1);
+ tracker.updateColumnWidths(row2);
+ sheet.addMergedRegion(CellRangeAddress.valueOf("D1:E1"));
+
+ assumeRequiredFontsAreInstalled(workbook, row1.getCell(columns.iterator().next()));
+
+ // Excel 2013 and LibreOffice 4.2.8.2 both treat columns with merged regions as blank
+ /** A B C D E
+ * 1 LONG LONG LONGMERGE
+ * 2 SHORT SHORT SHORT
+ */
+
+ // measured in Excel 2013. Sizes may vary.
+ final int longMsgWidth = (int) (57.43*256);
+ final int shortMsgWidth = (int) (4.86*256);
+
+ checkColumnWidth(longMsgWidth, 0, true);
+ checkColumnWidth(longMsgWidth, 0, false);
+ checkColumnWidth(longMsgWidth, 1, true);
+ checkColumnWidth(longMsgWidth, 1, false);
+ checkColumnWidth(shortMsgWidth, 2, true);
+ checkColumnWidth(shortMsgWidth, 2, false);
+ checkColumnWidth(-1, 3, true);
+ checkColumnWidth(longMsgWidth, 3, false);
+ checkColumnWidth(shortMsgWidth, 4, true); //but is it really? shouldn't autosizing column E use "" from E1 and SHORT from E2?
+ checkColumnWidth(shortMsgWidth, 4, false);
+ }
+
+ private void checkColumnWidth(int expectedWidth, int column, boolean useMergedCells) {
+ final int bestFitWidth = tracker.getBestFitColumnWidth(column, useMergedCells);
+ if (bestFitWidth < 0 && expectedWidth < 0) return;
+ final double abs_error = Math.abs(bestFitWidth-expectedWidth);
+ final double rel_error = abs_error / expectedWidth;
+ if (rel_error > 0.25) {
+ fail("check column width: " +
+ rel_error + ", " + abs_error + ", " +
+ expectedWidth + ", " + bestFitWidth);
+ }
+
+ }
+
+ private static void assumeRequiredFontsAreInstalled(final Workbook workbook, final Cell cell) {
+ // autoSize will fail if required fonts are not installed, skip this test then
+ Font font = workbook.getFontAt(cell.getCellStyle().getFontIndex());
+ System.out.println(font.getFontHeightInPoints());
+ System.out.println(font.getFontName());
+ Assume.assumeTrue("Cannot verify autoSizeColumn() because the necessary Fonts are not installed on this machine: " + font,
+ SheetUtil.canComputeColumnWidth(font));
+ }
+}
diff --git a/src/ooxml/testcases/org/apache/poi/xssf/streaming/TestSXSSFSheet.java b/src/ooxml/testcases/org/apache/poi/xssf/streaming/TestSXSSFSheet.java
index 52f4a8692e..fbe14f1067 100644
--- a/src/ooxml/testcases/org/apache/poi/xssf/streaming/TestSXSSFSheet.java
+++ b/src/ooxml/testcases/org/apache/poi/xssf/streaming/TestSXSSFSheet.java
@@ -23,6 +23,8 @@ import static org.junit.Assert.assertEquals;
import static org.junit.Assert.fail;
import java.io.IOException;
+import java.util.SortedSet;
+import java.util.TreeSet;
import org.apache.poi.ss.SpreadsheetVersion;
import org.apache.poi.ss.usermodel.BaseTestSheet;
@@ -45,6 +47,12 @@ public class TestSXSSFSheet extends BaseTestSheet {
public void tearDown(){
SXSSFITestDataProvider.instance.cleanup();
}
+
+ @Override
+ protected void trackColumnsForAutoSizingIfSXSSF(Sheet sheet) {
+ SXSSFSheet sxSheet = (SXSSFSheet) sheet;
+ sxSheet.trackAllColumnsForAutoSizing();
+ }
/**
diff --git a/src/ooxml/testcases/org/apache/poi/xssf/streaming/TestSXSSFSheetAutoSizeColumn.java b/src/ooxml/testcases/org/apache/poi/xssf/streaming/TestSXSSFSheetAutoSizeColumn.java
new file mode 100644
index 0000000000..2201d25ba8
--- /dev/null
+++ b/src/ooxml/testcases/org/apache/poi/xssf/streaming/TestSXSSFSheetAutoSizeColumn.java
@@ -0,0 +1,363 @@
+package org.apache.poi.xssf.streaming;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+import static org.junit.Assume.assumeFalse;
+import static org.junit.Assume.assumeTrue;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.SortedSet;
+import java.util.TreeSet;
+
+import org.apache.poi.ss.usermodel.Cell;
+import org.apache.poi.ss.usermodel.Font;
+import org.apache.poi.ss.usermodel.Row;
+import org.apache.poi.ss.usermodel.Sheet;
+import org.apache.poi.ss.usermodel.Workbook;
+import org.apache.poi.ss.util.CellRangeAddress;
+import org.apache.poi.ss.util.SheetUtil;
+import org.junit.After;
+import org.junit.Assume;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameter;
+import org.junit.runners.Parameterized.Parameters;
+
+
+/**
+ * Tests the auto-sizing behaviour of {@link SXSSFSheet} when not all
+ * rows fit into the memory window size etc.
+ *
+ * @see Bug #57450 which reported the original mis-behaviour
+ */
+@RunWith(Parameterized.class)
+public class TestSXSSFSheetAutoSizeColumn {
+
+ private static final String SHORT_CELL_VALUE = "Ben";
+ private static final String LONG_CELL_VALUE = "B Be Ben Beni Benif Benify Benif Beni Ben Be B";
+
+ // Approximative threshold to decide whether test is PASS or FAIL:
+ // shortCellValue ends up with approx column width 1_000 (on my machine),
+ // longCellValue ends up with approx. column width 10_000 (on my machine)
+ // so shortCellValue can be expected to be < 5000 for all fonts
+ // and longCellValue can be expected to be > 5000 for all fonts
+ private static final int COLUMN_WIDTH_THRESHOLD_BETWEEN_SHORT_AND_LONG = 5000;
+ private static final int MAX_COLUMN_WIDTH = 255*256;
+
+ private static final SortedSet columns;
+ static {
+ SortedSet_columns = new TreeSet();
+ _columns.add(0);
+ _columns.add(1);
+ _columns.add(3);
+ columns = Collections.unmodifiableSortedSet(_columns);
+ }
+
+
+ private SXSSFSheet sheet;
+ private SXSSFWorkbook workbook;
+
+ @Parameter(0)
+ public boolean useMergedCells;
+
+ @Parameters(name="{index}: useMergedCells={0}")
+ public static Collection