You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

AutoSizeColumnTracker.java 16KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376
  1. /* ====================================================================
  2. Licensed to the Apache Software Foundation (ASF) under one or more
  3. contributor license agreements. See the NOTICE file distributed with
  4. this work for additional information regarding copyright ownership.
  5. The ASF licenses this file to You under the Apache License, Version 2.0
  6. (the "License"); you may not use this file except in compliance with
  7. the License. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. ==================================================================== */
  15. package org.apache.poi.xssf.streaming;
  16. import java.util.Collection;
  17. import java.util.Collections;
  18. import java.util.HashMap;
  19. import java.util.HashSet;
  20. import java.util.Map;
  21. import java.util.Map.Entry;
  22. import java.util.Set;
  23. import java.util.SortedSet;
  24. import java.util.TreeSet;
  25. import org.apache.poi.ss.usermodel.Cell;
  26. import org.apache.poi.ss.usermodel.DataFormatter;
  27. import org.apache.poi.ss.usermodel.Row;
  28. import org.apache.poi.ss.usermodel.Sheet;
  29. import org.apache.poi.ss.util.SheetUtil;
  30. import org.apache.poi.util.Internal;
  31. /**
  32. * Tracks best fit column width for rows of an {@link SXSSFSheet},
  33. * to be able to correctly calculate auto-sized column widths even
  34. * if some rows are already flushed to disk.
  35. * This is an auxiliary data structure that uses a TreeMap containing
  36. * one entry per tracked column, where the key is the column index and
  37. * the value is a pair of doubles. This data structure's memory footprint
  38. * is linear with the number of *tracked* columns and invariant with
  39. * the number of rows or columns in the sheet.
  40. * @since 3.14beta1
  41. */
  42. @Internal
  43. /*package*/ class AutoSizeColumnTracker {
  44. private final float defaultCharWidth;
  45. private final DataFormatter dataFormatter = new DataFormatter();
  46. // map of tracked columns, with values containing the best-fit width for the column
  47. // Using a HashMap instead of a TreeMap because insertion (trackColumn), removal (untrackColumn), and membership (everything)
  48. // will be called more frequently than getTrackedColumns(). The O(1) cost of insertion, removal, and membership operations
  49. // outweigh the infrequent O(n*log n) cost of sorting getTrackedColumns().
  50. // Memory consumption for a HashMap and TreeMap is about the same
  51. private final Map<Integer, ColumnWidthPair> maxColumnWidths = new HashMap<>();
  52. // untrackedColumns stores columns have been explicitly untracked so they aren't implicitly re-tracked by trackAllColumns
  53. // Using a HashSet instead of a TreeSet because we don't care about order.
  54. private final Set<Integer> untrackedColumns = new HashSet<>();
  55. private boolean trackAllColumns;
  56. /**
  57. * Tuple to store the column widths considering and not considering merged cells
  58. * If more permutations are needed, it may be prudent to require the user to specify
  59. * how they intend to auto-size a column when they track the column, so calculations
  60. * are limited to the desired intentions. Unless this proves to be a performance problem,
  61. * it's probably better to let the user defer how they want to auto-size to SXSSFSheet.autoSizeColumn,
  62. * rather than twice (via SXSSFSheet.trackColumn(int column, boolean useMergedCells) and again at
  63. * SXSFSheet.autoSizeColumn(int column, boolean useMergedCells))
  64. * @since 3.14beta1
  65. */
  66. private static class ColumnWidthPair {
  67. private double withSkipMergedCells;
  68. private double withUseMergedCells;
  69. public ColumnWidthPair() {
  70. this(-1.0, -1.0);
  71. }
  72. public ColumnWidthPair(final double columnWidthSkipMergedCells, final double columnWidthUseMergedCells) {
  73. withSkipMergedCells = columnWidthSkipMergedCells;
  74. withUseMergedCells = columnWidthUseMergedCells;
  75. }
  76. /**
  77. * Gets the current best-fit column width for the provided settings
  78. *
  79. * @param useMergedCells true if merged cells are considered into the best-fit column width calculation
  80. * @return best fit column width, measured in default character widths.
  81. */
  82. public double getMaxColumnWidth(final boolean useMergedCells) {
  83. return useMergedCells ? withUseMergedCells : withSkipMergedCells;
  84. }
  85. /**
  86. * Sets the best-fit column width to the maximum of the current width and the provided width
  87. *
  88. * @param unmergedWidth the best-fit column width calculated with useMergedCells=False
  89. * @param mergedWidth the best-fit column width calculated with useMergedCells=True
  90. */
  91. public void setMaxColumnWidths(double unmergedWidth, double mergedWidth) {
  92. withUseMergedCells = Math.max(withUseMergedCells, mergedWidth);
  93. withSkipMergedCells = Math.max(withSkipMergedCells, unmergedWidth);
  94. }
  95. }
  96. /**
  97. * AutoSizeColumnTracker constructor. Holds no reference to <code>sheet</code>
  98. *
  99. * @param sheet the sheet associated with this auto-size column tracker
  100. * @since 3.14beta1
  101. */
  102. public AutoSizeColumnTracker(final Sheet sheet) {
  103. // If sheet needs to be saved, use a java.lang.ref.WeakReference to avoid garbage collector gridlock.
  104. defaultCharWidth = SheetUtil.getDefaultCharWidthAsFloat(sheet.getWorkbook());
  105. }
  106. /**
  107. * Get the currently tracked columns, naturally ordered.
  108. * Note if all columns are tracked, this will only return the columns that have been explicitly or implicitly tracked,
  109. * which is probably only columns containing 1 or more non-blank values
  110. *
  111. * @return a set of the indices of all tracked columns
  112. * @since 3.14beta1
  113. */
  114. public SortedSet<Integer> getTrackedColumns() {
  115. SortedSet<Integer> sorted = new TreeSet<>(maxColumnWidths.keySet());
  116. return Collections.unmodifiableSortedSet(sorted);
  117. }
  118. /**
  119. * Returns true if column is currently tracked for auto-sizing.
  120. *
  121. * @param column the index of the column to check
  122. * @return true if column is tracked
  123. * @since 3.14beta1
  124. */
  125. public boolean isColumnTracked(int column) {
  126. return (trackAllColumns && !untrackedColumns.contains(column))
  127. || maxColumnWidths.containsKey(column);
  128. }
  129. /**
  130. * Returns true if all columns are implicitly tracked.
  131. *
  132. * @return true if all columns are implicitly tracked
  133. * @since 3.14beta1
  134. */
  135. public boolean isAllColumnsTracked() {
  136. return trackAllColumns;
  137. }
  138. /**
  139. * Tracks all non-blank columns
  140. * Allows columns that have been explicitly untracked to be tracked
  141. * @since 3.14beta1
  142. */
  143. public void trackAllColumns() {
  144. trackAllColumns = true;
  145. untrackedColumns.clear();
  146. }
  147. /**
  148. * Untrack all columns that were previously tracked for auto-sizing.
  149. * All best-fit column widths are forgotten.
  150. * @since 3.14beta1
  151. */
  152. public void untrackAllColumns() {
  153. trackAllColumns = false;
  154. maxColumnWidths.clear();
  155. untrackedColumns.clear();
  156. }
  157. /**
  158. * Marks multiple columns for inclusion in auto-size column tracking.
  159. * Note this has undefined behavior if columns are tracked after one or more rows are written to the sheet.
  160. * Any column in <code>columns</code> that are already tracked are ignored by this call.
  161. *
  162. * @param columns the indices of the columns to track
  163. * @since 3.14beta1
  164. */
  165. public void trackColumns(Collection<Integer> columns)
  166. {
  167. for (final int column : columns) {
  168. trackColumn(column);
  169. }
  170. }
  171. /**
  172. * Marks a column for inclusion in auto-size column tracking.
  173. * Note this has undefined behavior if a column is tracked after one or more rows are written to the sheet.
  174. * If <code>column</code> is already tracked, this call does nothing.
  175. *
  176. * @param column the index of the column to track for auto-sizing
  177. * @return if column is already tracked, the call does nothing and returns false
  178. * @since 3.14beta1
  179. */
  180. public boolean trackColumn(int column) {
  181. untrackedColumns.remove(column);
  182. if (!maxColumnWidths.containsKey(column)) {
  183. maxColumnWidths.put(column, new ColumnWidthPair());
  184. return true;
  185. }
  186. return false;
  187. }
  188. /**
  189. * Implicitly track a column if it has not been explicitly untracked
  190. * If it has been explicitly untracked, this call does nothing and returns false.
  191. * Otherwise return true
  192. *
  193. * @param column the column to implicitly track
  194. * @return false if column has been explicitly untracked, otherwise return true
  195. */
  196. private boolean implicitlyTrackColumn(int column) {
  197. if (!untrackedColumns.contains(column)) {
  198. trackColumn(column);
  199. return true;
  200. }
  201. return false;
  202. }
  203. /**
  204. * Removes columns that were previously marked for inclusion in auto-size column tracking.
  205. * When a column is untracked, the best-fit width is forgotten.
  206. * Any column in <code>columns</code> that is not tracked will be ignored by this call.
  207. *
  208. * @param columns the indices of the columns to track for auto-sizing
  209. * @return true if one or more columns were untracked as a result of this call
  210. * @since 3.14beta1
  211. */
  212. public boolean untrackColumns(Collection<Integer> columns)
  213. {
  214. untrackedColumns.addAll(columns);
  215. boolean result = false;
  216. for (Integer col : columns) {
  217. result = maxColumnWidths.remove(col) != null || result;
  218. }
  219. return result;
  220. }
  221. /**
  222. * Removes a column that was previously marked for inclusion in auto-size column tracking.
  223. * When a column is untracked, the best-fit width is forgotten.
  224. * If <code>column</code> is not tracked, it will be ignored by this call.
  225. *
  226. * @param column the index of the column to track for auto-sizing
  227. * @return true if column was tracked prior this call, false if no action was taken
  228. * @since 3.14beta1
  229. */
  230. public boolean untrackColumn(int column) {
  231. untrackedColumns.add(column);
  232. return maxColumnWidths.remove(column) != null;
  233. }
  234. /**
  235. * Get the best-fit width of a tracked column
  236. *
  237. * @param column the index of the column to get the current best-fit width of
  238. * @param useMergedCells true if merged cells should be considered when computing the best-fit width
  239. * @return best-fit column width, measured in number of characters
  240. * @throws IllegalStateException if column is not tracked and trackAllColumns is false
  241. * @since 3.14beta1
  242. */
  243. public int getBestFitColumnWidth(int column, boolean useMergedCells) {
  244. if (!maxColumnWidths.containsKey(column)) {
  245. // if column is not tracked, implicitly track the column if trackAllColumns is True and column has not been explicitly untracked
  246. if (trackAllColumns) {
  247. if (!implicitlyTrackColumn(column)) {
  248. final Throwable reason = new IllegalStateException(
  249. "Column was explicitly untracked after trackAllColumns() was called.");
  250. throw new IllegalStateException(
  251. "Cannot get best fit column width on explicitly untracked column " + column + ". " +
  252. "Either explicitly track the column or track all columns.", reason);
  253. }
  254. }
  255. else {
  256. final Throwable reason = new IllegalStateException(
  257. "Column was never explicitly tracked and isAllColumnsTracked() is false " +
  258. "(trackAllColumns() was never called or untrackAllColumns() was called after trackAllColumns() was called).");
  259. throw new IllegalStateException(
  260. "Cannot get best fit column width on untracked column " + column + ". " +
  261. "Either explicitly track the column or track all columns.", reason);
  262. }
  263. }
  264. final double width = maxColumnWidths.get(column).getMaxColumnWidth(useMergedCells);
  265. return Math.toIntExact(Math.round(256*width));
  266. }
  267. /**
  268. * Calculate the best fit width for each tracked column in row
  269. *
  270. * @param row the row to get the cells
  271. * @since 3.14beta1
  272. */
  273. public void updateColumnWidths(Row row) {
  274. // track new columns
  275. implicitlyTrackColumnsInRow(row);
  276. // update the widths
  277. // for-loop over the shorter of the number of cells in the row and the number of tracked columns
  278. // these two for-loops should do the same thing
  279. if (maxColumnWidths.size() < row.getPhysicalNumberOfCells()) {
  280. // loop over the tracked columns, because there are fewer tracked columns than cells in this row
  281. for (final Entry<Integer, ColumnWidthPair> e : maxColumnWidths.entrySet()) {
  282. final int column = e.getKey();
  283. final Cell cell = row.getCell(column); //is MissingCellPolicy=Row.RETURN_NULL_AND_BLANK needed?
  284. // FIXME: if cell belongs to a merged region, some of the merged region may have fallen outside of the random access window
  285. // In this case, getting the column width may result in an error. Need to gracefully handle this.
  286. // FIXME: Most cells are not merged, so calling getCellWidth twice re-computes the same value twice.
  287. // Need to rewrite this to avoid unnecessary computation if this proves to be a performance bottleneck.
  288. if (cell != null) {
  289. final ColumnWidthPair pair = e.getValue();
  290. updateColumnWidth(cell, pair);
  291. }
  292. }
  293. }
  294. else {
  295. // loop over the cells in this row, because there are fewer cells in this row than tracked columns
  296. for (final Cell cell : row) {
  297. final int column = cell.getColumnIndex();
  298. // FIXME: if cell belongs to a merged region, some of the merged region may have fallen outside of the random access window
  299. // In this case, getting the column width may result in an error. Need to gracefully handle this.
  300. // FIXME: Most cells are not merged, so calling getCellWidth twice re-computes the same value twice.
  301. // Need to rewrite this to avoid unnecessary computation if this proves to be a performance bottleneck.
  302. if (maxColumnWidths.containsKey(column)) {
  303. final ColumnWidthPair pair = maxColumnWidths.get(column);
  304. updateColumnWidth(cell, pair);
  305. }
  306. }
  307. }
  308. }
  309. /**
  310. * Helper for {@link #updateColumnWidths(Row)}.
  311. * Implicitly track the columns corresponding to the cells in row.
  312. * If all columns in the row are already tracked, this call does nothing.
  313. * Explicitly untracked columns will not be tracked.
  314. *
  315. * @param row the row containing cells to implicitly track the columns
  316. * @since 3.14beta1
  317. */
  318. private void implicitlyTrackColumnsInRow(Row row) {
  319. // track new columns
  320. if (trackAllColumns) {
  321. // if column is not tracked, implicitly track the column if trackAllColumns is True and column has not been explicitly untracked
  322. for (final Cell cell : row) {
  323. final int column = cell.getColumnIndex();
  324. implicitlyTrackColumn(column);
  325. }
  326. }
  327. }
  328. /**
  329. * Helper for {@link #updateColumnWidths(Row)}.
  330. *
  331. * @param cell the cell to compute the best fit width on
  332. * @param pair the column width pair to update
  333. * @since 3.14beta1
  334. */
  335. private void updateColumnWidth(final Cell cell, final ColumnWidthPair pair) {
  336. final double unmergedWidth = SheetUtil.getCellWidth(cell, defaultCharWidth, dataFormatter, false);
  337. final double mergedWidth = SheetUtil.getCellWidth(cell, defaultCharWidth, dataFormatter, true);
  338. pair.setMaxColumnWidths(unmergedWidth, mergedWidth);
  339. }
  340. }