From f9c9def944e1b4a1425a6f467e8057e516ef1c82 Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Thu, 20 Jun 2013 13:37:26 +0000 Subject: Patch and unit test from Tim Allen from bug #54686 - Improve how DataFormatter handles fractions git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1494986 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/poi/ss/usermodel/DataFormatter.java | 158 +++--------- .../apache/poi/ss/usermodel/FractionFormat.java | 271 +++++++++++++++++++++ .../apache/poi/ss/usermodel/TestDataFormatter.java | 16 +- .../poi/ss/usermodel/TestFractionFormat.java | 83 +++++++ 4 files changed, 403 insertions(+), 125 deletions(-) create mode 100644 src/java/org/apache/poi/ss/usermodel/FractionFormat.java create mode 100644 src/testcases/org/apache/poi/ss/usermodel/TestFractionFormat.java (limited to 'src') diff --git a/src/java/org/apache/poi/ss/usermodel/DataFormatter.java b/src/java/org/apache/poi/ss/usermodel/DataFormatter.java index 930f8b4207..46740cc381 100644 --- a/src/java/org/apache/poi/ss/usermodel/DataFormatter.java +++ b/src/java/org/apache/poi/ss/usermodel/DataFormatter.java @@ -28,7 +28,6 @@ import java.text.DecimalFormat; import java.text.DecimalFormatSymbols; import java.text.FieldPosition; import java.text.Format; -import java.text.NumberFormat; import java.text.ParsePosition; import java.text.SimpleDateFormat; import java.util.ArrayList; @@ -41,7 +40,6 @@ import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.apache.poi.ss.formula.eval.NotImplementedException; /** * DataFormatter contains methods for formatting the value stored in an @@ -100,12 +98,10 @@ import org.apache.poi.ss.formula.eval.NotImplementedException; *
  • simulate Excel's handling of a format string of all # when the value is 0. * Excel will output "", DataFormatter will output "0". * - * @author James May (james dot may at fmr dot com) - * @author Robert Kish - * */ public class DataFormatter { - + private static final String defaultFractionWholePartFormat = "#"; + private static final String defaultFractionFractionPartFormat = "#/##"; /** Pattern to find a number format: "0" or "#" */ private static final Pattern numPattern = Pattern.compile("[0#]+"); @@ -131,6 +127,17 @@ public class DataFormatter { "(\\[MAGENTA\\])|(\\[RED\\])|(\\[WHITE\\])|(\\[YELLOW\\])|" + "(\\[COLOR\\s*\\d\\])|(\\[COLOR\\s*[0-5]\\d\\])", Pattern.CASE_INSENSITIVE); + /** + * A regex to identify a fraction pattern. + * This requires that replaceAll("\\?", "#") has already been called + */ + private static final Pattern fractionPattern = Pattern.compile("(?:([#\\d]+)\\s+)?(#+)\\s*\\/\\s*([#\\d]+)"); + + /** + * A regex to strip junk out of fraction formats + */ + private static final Pattern fractionStripper = Pattern.compile("(\"[^\"]*\")|([^ \\?#\\d\\/]+)"); + /** * Cells formatted with a date or time format and which contain invalid date or time values * show 255 pound signs ("#"). @@ -372,23 +379,26 @@ public class DataFormatter { DateUtil.isValidExcelDate(cellValue)) { return createDateFormat(formatStr, cellValue); } - // Excel supports fractions in format strings, which Java doesn't - if (formatStr.indexOf("#/#") >= 0 || formatStr.indexOf("?/?") >= 0) { - // Strip custom text in quotes and escaped characters for now as it can cause performance problems in fractions. - String strippedFormatStr = formatStr.replaceAll("\\\\ ", " ").replaceAll("\\\\.", "").replaceAll("\"[^\"]*\"", " "); - - boolean ok = true; - for (String part: strippedFormatStr.split(";")) { - int indexOfFraction = indexOfFraction(part); - if (indexOfFraction == -1 || indexOfFraction != lastIndexOfFraction(part)) { - ok = false; - break; - } - } - if (ok) { - return new FractionFormat(strippedFormatStr); + if (formatStr.indexOf("#/") >= 0 || formatStr.indexOf("?/") >= 0) { + String[] chunks = formatStr.split(";"); + for (int i = 0; i < chunks.length; i++){ + String chunk = chunks[i].replaceAll("\\?", "#"); + Matcher matcher = fractionStripper.matcher(chunk); + chunk = matcher.replaceAll(" "); + chunk = chunk.replaceAll(" +", " "); + Matcher fractionMatcher = fractionPattern.matcher(chunk); + //take the first match + if (fractionMatcher.find()){ + String wholePart = (fractionMatcher.group(1) == null) ? "" : defaultFractionWholePartFormat; + return new FractionFormat(wholePart, fractionMatcher.group(3)); + } } + + // Strip custom text in quotes and escaped characters for now as it can cause performance problems in fractions. + //String strippedFormatStr = formatStr.replaceAll("\\\\ ", " ").replaceAll("\\\\.", "").replaceAll("\"[^\"]*\"", " ").replaceAll("\\?", "#"); + //System.out.println("formatStr: "+strippedFormatStr); + return new FractionFormat(defaultFractionWholePartFormat, defaultFractionFractionPartFormat); } if (numPattern.matcher(formatStr).find()) { @@ -402,17 +412,7 @@ public class DataFormatter { return null; } - private int indexOfFraction(String format) { - int i = format.indexOf("#/#"); - int j = format.indexOf("?/?"); - return i == -1 ? j : j == -1 ? i : Math.min(i, j); - } - - private int lastIndexOfFraction(String format) { - int i = format.lastIndexOf("#/#"); - int j = format.lastIndexOf("?/?"); - return i == -1 ? j : j == -1 ? i : Math.max(i, j); - } + private Format createDateFormat(String pFormatStr, double cellValue) { String formatStr = pFormatStr; @@ -786,7 +786,7 @@ public class DataFormatter { * @return a string value of the cell */ public String formatCellValue(Cell cell, FormulaEvaluator evaluator) { - + if (cell == null) { return ""; } @@ -1018,97 +1018,9 @@ public class DataFormatter { } } - /** - * Format class that handles Excel style fractions, such as "# #/#" and "#/###" - */ - @SuppressWarnings("serial") - private static final class FractionFormat extends Format { - private final String str; - public FractionFormat(String s) { - str = s; - } - - public String format(Number num) { - - double doubleValue = num.doubleValue(); - - // Format may be p or p;n or p;n;z (okay we never get a z). - // Fall back to p when n or z is not specified. - String[] formatBits = str.split(";"); - int f = doubleValue > 0.0 ? 0 : doubleValue < 0.0 ? 1 : 2; - String str = (f < formatBits.length) ? formatBits[f] : formatBits[0]; - - double wholePart = Math.floor(Math.abs(doubleValue)); - double decPart = Math.abs(doubleValue) - wholePart; - if (wholePart + decPart == 0) { - return "0"; - } - if (doubleValue < 0.0) { - wholePart *= -1.0; - } - - // Split the format string into decimal and fraction parts - String[] parts = str.replaceAll(" *", " ").split(" "); - String[] fractParts; - if (parts.length == 2) { - fractParts = parts[1].split("/"); - } else { - fractParts = str.split("/"); - } - - // Excel supports both #/# and ?/?, but Java only the former - for (int i=0; i 0; i--) { - for(int i2 = (int)(Math.pow(10, fractPart1Length)- 1d); i2 > 0; i2--){ - if (minVal >= Math.abs((double)i2/(double)i - decPart)) { - currDenom = i; - currNeum = i2; - minVal = Math.abs((double)i2/(double)i - decPart); - } - } - } - NumberFormat neumFormatter = new DecimalFormat(fractParts[0]); - NumberFormat denomFormatter = new DecimalFormat(fractParts[1]); - if (parts.length == 2) { - NumberFormat wholeFormatter = new DecimalFormat(parts[0]); - String result = wholeFormatter.format(wholePart) + " " + neumFormatter.format(currNeum) + "/" + denomFormatter.format(currDenom); - return result; - } else { - String result = neumFormatter.format(currNeum + (currDenom * wholePart)) + "/" + denomFormatter.format(currDenom); - return result; - } - } else { - throw new IllegalArgumentException("Fraction must have 2 parts, found " + fractParts.length + " for fraction format " + this.str); - } - } - - private int countHashes(String format) { - int count = 0; - for (int i=format.length()-1; i >= 0; i--) { - if (format.charAt(i) == '#') { - count++; - } - } - return count; - } - - public StringBuffer format(Object obj, StringBuffer toAppendTo, FieldPosition pos) { - return toAppendTo.append(format((Number)obj)); - } - - public Object parseObject(String source, ParsePosition pos) { - throw new NotImplementedException("Reverse parsing not supported"); - } - } + + /** * Format class that does nothing and always returns a constant string. * diff --git a/src/java/org/apache/poi/ss/usermodel/FractionFormat.java b/src/java/org/apache/poi/ss/usermodel/FractionFormat.java new file mode 100644 index 0000000000..1522e7afb8 --- /dev/null +++ b/src/java/org/apache/poi/ss/usermodel/FractionFormat.java @@ -0,0 +1,271 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.poi.ss.usermodel; +import java.text.FieldPosition; +import java.text.Format; +import java.text.ParsePosition; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.poi.ss.formula.eval.NotImplementedException; + +/** + *

    Format class that handles Excel style fractions, such as "# #/#" and "#/###"

    + * + *

    As of this writing, this is still not 100% accurate, but it does a reasonable job + * of trying to mimic Excel's fraction calculations. It does not currently + * maintain Excel's spacing.

    + * + *

    This class relies on a method lifted nearly verbatim from org.apache.math.fraction. + * If further uses for Commons Math are found, we will consider adding it as a dependency. + * For now, we have in-lined the one method to keep things simple.

    + */ +/* One question remains...is the value of epsilon in calcFractionMaxDenom reasonable? */ +@SuppressWarnings("serial") +public class FractionFormat extends Format { + private final static Pattern DENOM_FORMAT_PATTERN = Pattern.compile("(?:(#+)|(\\d+))"); + + //this was chosen to match the earlier limitation of max denom power + //it can be expanded to get closer to Excel's calculations + //with custom formats # #/######### + //but as of this writing, the numerators and denominators + //with formats of that nature on very small values were quite + //far from Excel's calculations + private final static int MAX_DENOM_POW = 4; + + //there are two options: + //a) an exact denominator is specified in the formatString + //b) the maximum denominator can be calculated from the formatString + private final int exactDenom; + private final int maxDenom; + + private final String wholePartFormatString; + /** + * Single parameter ctor + * @param denomFormatString The format string for the denominator + */ + public FractionFormat(String wholePartFormatString, String denomFormatString) { + this.wholePartFormatString = wholePartFormatString; + //init exactDenom and maxDenom + Matcher m = DENOM_FORMAT_PATTERN.matcher(denomFormatString); + int tmpExact = -1; + int tmpMax = -1; + if (m.find()){ + if (m.group(2) != null){ + try{ + tmpExact = Integer.parseInt(m.group(2)); + //if the denom is 0, fall back to the default: tmpExact=100 + + if (tmpExact == 0){ + tmpExact = -1; + } + } catch (NumberFormatException e){ + //should never happen + } + } else if (m.group(1) != null) { + int len = m.group(1).length(); + len = len > MAX_DENOM_POW ? MAX_DENOM_POW : len; + tmpMax = (int)Math.pow(10, len); + } else { + tmpExact = 100; + } + } + if (tmpExact <= 0 && tmpMax <= 0){ + //use 100 as the default denom if something went horribly wrong + tmpExact = 100; + } + exactDenom = tmpExact; + maxDenom = tmpMax; + } + + public String format(Number num) { + + double doubleValue = num.doubleValue(); + + boolean isNeg = (doubleValue < 0.0f) ? true : false; + double absDoubleValue = Math.abs(doubleValue); + + double wholePart = Math.floor(absDoubleValue); + double decPart = absDoubleValue - wholePart; + if (wholePart + decPart == 0) { + return "0"; + } + + //if the absolute value is smaller than 1 over the exact or maxDenom + //you can stop here and return "0" + if (absDoubleValue < (1/Math.max(exactDenom, maxDenom))){ + return "0"; + } + + //this is necessary to prevent overflow in the maxDenom calculation + //stink1 + if (wholePart+(int)decPart == wholePart+decPart){ + + StringBuilder sb = new StringBuilder(); + if (isNeg){ + sb.append("-"); + } + sb.append(Integer.toString((int)wholePart)); + return sb.toString(); + } + + SimpleFraction fract = null; + try{ + //this should be the case because of the constructor + if (exactDenom > 0){ + fract = calcFractionExactDenom(decPart, exactDenom); + } else { + fract = calcFractionMaxDenom(decPart, maxDenom); + } + } catch (SimpleFractionException e){ + e.printStackTrace(); + return Double.toString(doubleValue); + } + + StringBuilder sb = new StringBuilder(); + + //now format the results + if (isNeg){ + sb.append("-"); + } + + //if whole part has to go into the numerator + if ("".equals(wholePartFormatString)){ + int trueNum = (fract.getDenominator()*(int)wholePart)+fract.getNumerator(); + sb.append(trueNum).append("/").append(fract.getDenominator()); + return sb.toString(); + } + + + //short circuit if fraction is 0 or 1 + if (fract.getNumerator() == 0){ + sb.append(Integer.toString((int)wholePart)); + return sb.toString(); + } else if (fract.getNumerator() == fract.getDenominator()){ + sb.append(Integer.toString((int)wholePart+1)); + return sb.toString(); + } + //as mentioned above, this ignores the exact space formatting in Excel + if (wholePart > 0){ + sb.append(Integer.toString((int)wholePart)).append(" "); + } + sb.append(fract.getNumerator()).append("/").append(fract.getDenominator()); + return sb.toString(); + } + + public StringBuffer format(Object obj, StringBuffer toAppendTo, FieldPosition pos) { + return toAppendTo.append(format((Number)obj)); + } + + public Object parseObject(String source, ParsePosition pos) { + throw new NotImplementedException("Reverse parsing not supported"); + } + + private SimpleFraction calcFractionMaxDenom(double value, int maxDenominator) + throws SimpleFractionException{ + /* + * Lifted wholesale from org.apache.math.fraction.Fraction 2.2 + */ + double epsilon = 0.000000000001f; + int maxIterations = 100; + long overflow = Integer.MAX_VALUE; + double r0 = value; + long a0 = (long)Math.floor(r0); + if (Math.abs(a0) > overflow) { + throw new SimpleFractionException( + String.format("value > Integer.MAX_VALUE: %d.", a0)); + } + + // check for (almost) integer arguments, which should not go + // to iterations. + if (Math.abs(a0 - value) < epsilon) { + return new SimpleFraction((int) a0, 1); + } + + long p0 = 1; + long q0 = 0; + long p1 = a0; + long q1 = 1; + + long p2 = 0; + long q2 = 1; + + int n = 0; + boolean stop = false; + do { + ++n; + double r1 = 1.0 / (r0 - a0); + long a1 = (long)Math.floor(r1); + p2 = (a1 * p1) + p0; + q2 = (a1 * q1) + q0; + if ((Math.abs(p2) > overflow) || (Math.abs(q2) > overflow)) { + throw new SimpleFractionException( + String.format("Greater than overflow in loop %f, %d, %d", value, p2, q2)); + } + + double convergent = (double)p2 / (double)q2; + if (n < maxIterations && Math.abs(convergent - value) > epsilon && q2 < maxDenominator) { + p0 = p1; + p1 = p2; + q0 = q1; + q1 = q2; + a0 = a1; + r0 = r1; + } else { + stop = true; + } + } while (!stop); + + if (n >= maxIterations) { + throw new SimpleFractionException("n greater than max iterations " + value + " : " + maxIterations); + } + + if (q2 < maxDenominator) { + return new SimpleFraction((int) p2, (int) q2); + } else { + return new SimpleFraction((int) p1, (int) q1); + } + } + + private SimpleFraction calcFractionExactDenom(double val, int exactDenom){ + int num = (int)Math.round(val*(double)exactDenom); + return new SimpleFraction(num,exactDenom); + } + + private class SimpleFraction { + private final int num; + private final int denom; + + public SimpleFraction(int num, int denom) { + this.num = num; + this.denom = denom; + } + + public int getNumerator() { + return num; + } + public int getDenominator() { + return denom; + } + } + private class SimpleFractionException extends Throwable{ + private SimpleFractionException(String message){ + super(message); + } + } +} diff --git a/src/testcases/org/apache/poi/ss/usermodel/TestDataFormatter.java b/src/testcases/org/apache/poi/ss/usermodel/TestDataFormatter.java index c6233fcf39..6dcd6ebeba 100644 --- a/src/testcases/org/apache/poi/ss/usermodel/TestDataFormatter.java +++ b/src/testcases/org/apache/poi/ss/usermodel/TestDataFormatter.java @@ -229,13 +229,25 @@ public class TestDataFormatter extends TestCase { assertEquals("321 321/1000", dfUS.formatRawCellContents(321.321, -1, "# #/##########")); // Not a valid fraction formats (too many #/# or ?/?) - hence the strange expected results - assertEquals("321 / ?/?", dfUS.formatRawCellContents(321.321, -1, "# #/# ?/?")); + +/* assertEquals("321 / ?/?", dfUS.formatRawCellContents(321.321, -1, "# #/# ?/?")); assertEquals("321 / /", dfUS.formatRawCellContents(321.321, -1, "# #/# #/#")); assertEquals("321 ?/? ?/?", dfUS.formatRawCellContents(321.321, -1, "# ?/? ?/?")); assertEquals("321 ?/? / /", dfUS.formatRawCellContents(321.321, -1, "# ?/? #/# #/#")); +*/ + + //Bug54686 patch sets default behavior of # #/## if there is a failure to parse + assertEquals("321 1/3", dfUS.formatRawCellContents(321.321, -1, "# #/# ?/?")); + assertEquals("321 1/3", dfUS.formatRawCellContents(321.321, -1, "# #/# #/#")); + assertEquals("321 1/3", dfUS.formatRawCellContents(321.321, -1, "# ?/? ?/?")); + assertEquals("321 1/3", dfUS.formatRawCellContents(321.321, -1, "# ?/? #/# #/#")); // Where both p and n don't include a fraction, so cannot always be formatted - assertEquals("123", dfUS.formatRawCellContents(-123.321, -1, "0 ?/?;0")); + // assertEquals("123", dfUS.formatRawCellContents(-123.321, -1, "0 ?/?;0")); + + //Bug54868 patch has a hit on the first string before the ";" + assertEquals("-123 1/3", dfUS.formatRawCellContents(-123.321, -1, "0 ?/?;0")); + } /** diff --git a/src/testcases/org/apache/poi/ss/usermodel/TestFractionFormat.java b/src/testcases/org/apache/poi/ss/usermodel/TestFractionFormat.java new file mode 100644 index 0000000000..1db2507ea6 --- /dev/null +++ b/src/testcases/org/apache/poi/ss/usermodel/TestFractionFormat.java @@ -0,0 +1,83 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.ss.usermodel; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.InputStreamReader; + +import junit.framework.TestCase; + +import org.apache.poi.hssf.HSSFTestDataSamples; +import org.apache.poi.ss.usermodel.Cell; +import org.apache.poi.ss.usermodel.DataFormatter; +import org.apache.poi.ss.usermodel.FormulaEvaluator; +import org.apache.poi.ss.usermodel.FractionFormat; +import org.apache.poi.ss.usermodel.Row; + +/** + * Tests for the Fraction Formatting part of DataFormatter. + * Largely taken from bug #54686 + */ +public final class TestFractionFormat extends TestCase { + public void testSingle() throws Exception { + FractionFormat f = new FractionFormat("", "##"); + double val = 321.321; + String ret = f.format(val); + assertEquals("26027/81", ret); + } + + public void testTruthFile() throws Exception { + File truthFile = HSSFTestDataSamples.getSampleFile("54686_fraction_formats.txt"); + BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(truthFile))); + Workbook wb = HSSFTestDataSamples.openSampleWorkbook("54686_fraction_formats.xls"); + Sheet sheet = wb.getSheetAt(0); + DataFormatter formatter = new DataFormatter(); + FormulaEvaluator evaluator = wb.getCreationHelper().createFormulaEvaluator(); + + // Skip over the header row + String truthLine = reader.readLine(); + String[] headers = truthLine.split("\t"); + truthLine = reader.readLine(); + + for (int i = 1; i < sheet.getLastRowNum() && truthLine != null; i++){ + Row r = sheet.getRow(i); + String[] truths = truthLine.split("\t"); + // Intentionally ignore the last column (tika-1132), for now + for (short j = 3; j < 12; j++){ + Cell cell = r.getCell(j, Row.CREATE_NULL_AS_BLANK); + String formatted = clean(formatter.formatCellValue(cell, evaluator)); + if (truths.length <= j){ + continue; + } + + String truth = clean(truths[j]); + String testKey = truths[0]+":"+truths[1]+":"+headers[j]; + assertEquals(testKey, truth, formatted); + } + truthLine = reader.readLine(); + } + reader.close(); + } + + private String clean(String s){ + s = s.trim().replaceAll(" +", " ").replaceAll("- +", "-"); + return s; + } +} -- cgit v1.2.3