From: Yegor Kozlov Date: Fri, 28 Dec 2012 12:50:15 +0000 (+0000) Subject: Bugzilla 54356: Support of statistical function INTERCEPT X-Git-Tag: 3.10-beta1~73 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=008dea947c46c8defba2ea63ce3f96743c499ec7;p=poi.git Bugzilla 54356: Support of statistical function INTERCEPT git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1426485 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 8fef9acb23..2016c13d2d 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + 54356 - Support for INTERCEPT function 54282 - Improve the performance of ColumnHelper addCleanColIntoCols, speeds up some .xlsx file loading 53650 - Prevent unreadable content and disalow to overwrite rows from input template in SXSSF 54228,53672 - Fixed XSSF to read cells with missing R attribute diff --git a/src/java/org/apache/poi/ss/formula/eval/FunctionEval.java b/src/java/org/apache/poi/ss/formula/eval/FunctionEval.java index c76da52c17..021d6980f0 100644 --- a/src/java/org/apache/poi/ss/formula/eval/FunctionEval.java +++ b/src/java/org/apache/poi/ss/formula/eval/FunctionEval.java @@ -28,6 +28,7 @@ import java.util.TreeSet; /** * @author Amol S. Deshmukh < amolweb at ya hoo dot com > + * @author Johan Karlsteen - added Intercept */ public final class FunctionEval { /** @@ -208,6 +209,8 @@ public final class FunctionEval { retval[304] = new Sumx2my2(); retval[305] = new Sumx2py2(); + retval[311] = new Intercept(); + retval[318] = AggregateFunction.DEVSQ; retval[321] = AggregateFunction.SUMSQ; diff --git a/src/java/org/apache/poi/ss/formula/functions/Intercept.java b/src/java/org/apache/poi/ss/formula/functions/Intercept.java new file mode 100644 index 0000000000..06bb6f97db --- /dev/null +++ b/src/java/org/apache/poi/ss/formula/functions/Intercept.java @@ -0,0 +1,223 @@ +/* + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ==================================================================== + */ + +package org.apache.poi.ss.formula.functions; + +import org.apache.poi.ss.formula.TwoDEval; +import org.apache.poi.ss.formula.eval.ErrorEval; +import org.apache.poi.ss.formula.eval.EvaluationException; +import org.apache.poi.ss.formula.eval.NumberEval; +import org.apache.poi.ss.formula.eval.RefEval; +import org.apache.poi.ss.formula.eval.ValueEval; +import org.apache.poi.ss.formula.functions.LookupUtils.ValueVector; + +/** + * Implementation of Excel function INTERCEPT()

+ * + * Calculates the INTERCEPT of the linear regression line that is used to predict y values from x values
+ * (http://introcs.cs.princeton.edu/java/97data/LinearRegression.java.html) + * Syntax:
+ * INTERCEPT(arrayX, arrayY)

+ * + * + * @author Johan Karlsteen + */ +public final class Intercept extends Fixed2ArgFunction { + + private static abstract class ValueArray implements ValueVector { + private final int _size; + protected ValueArray(int size) { + _size = size; + } + + public ValueEval getItem(int index) { + if (index < 0 || index > _size) { + throw new IllegalArgumentException("Specified index " + index + + " is outside range (0.." + (_size - 1) + ")"); + } + return getItemInternal(index); + } + protected abstract ValueEval getItemInternal(int index); + + public final int getSize() { + return _size; + } + } + + private static final class SingleCellValueArray extends ValueArray { + private final ValueEval _value; + public SingleCellValueArray(ValueEval value) { + super(1); + _value = value; + } + @Override + protected ValueEval getItemInternal(int index) { + return _value; + } + } + + private static final class RefValueArray extends ValueArray { + private final RefEval _ref; + public RefValueArray(RefEval ref) { + super(1); + _ref = ref; + } + @Override + protected ValueEval getItemInternal(int index) { + return _ref.getInnerValueEval(); + } + } + + private static final class AreaValueArray extends ValueArray { + private final TwoDEval _ae; + private final int _width; + + public AreaValueArray(TwoDEval ae) { + super(ae.getWidth() * ae.getHeight()); + _ae = ae; + _width = ae.getWidth(); + } + @Override + protected ValueEval getItemInternal(int index) { + int rowIx = index / _width; + int colIx = index % _width; + return _ae.getValue(rowIx, colIx); + } + } + + + public ValueEval evaluate(int srcRowIndex, int srcColumnIndex, + ValueEval arg0, ValueEval arg1) { + double result; + try { + ValueVector vvX = createValueVector(arg0); + ValueVector vvY = createValueVector(arg1); + int size = vvX.getSize(); + if (size == 0 || vvY.getSize() != size) { + return ErrorEval.NA; + } + result = evaluateInternal(vvX, vvY, size); + } catch (EvaluationException e) { + return e.getErrorEval(); + } + if (Double.isNaN(result) || Double.isInfinite(result)) { + return ErrorEval.NUM_ERROR; + } + return new NumberEval(result); + } + + private double evaluateInternal(ValueVector x, ValueVector y, int size) + throws EvaluationException { + + // error handling is as if the x is fully evaluated before y + ErrorEval firstXerr = null; + ErrorEval firstYerr = null; + boolean accumlatedSome = false; + double result = 0.0; + // first pass: read in data, compute xbar and ybar + double sumx = 0.0, sumy = 0.0; + + for (int i = 0; i < size; i++) { + ValueEval vx = x.getItem(i); + ValueEval vy = y.getItem(i); + if (vx instanceof ErrorEval) { + if (firstXerr == null) { + firstXerr = (ErrorEval) vx; + continue; + } + } + if (vy instanceof ErrorEval) { + if (firstYerr == null) { + firstYerr = (ErrorEval) vy; + continue; + } + } + // only count pairs if both elements are numbers + if (vx instanceof NumberEval && vy instanceof NumberEval) { + accumlatedSome = true; + NumberEval nx = (NumberEval) vx; + NumberEval ny = (NumberEval) vy; + sumx += nx.getNumberValue(); + sumy += ny.getNumberValue(); + } else { + // all other combinations of value types are silently ignored + } + } + double xbar = sumx / size; + double ybar = sumy / size; + + // second pass: compute summary statistics + double xxbar = 0.0, xybar = 0.0; + for (int i = 0; i < size; i++) { + ValueEval vx = x.getItem(i); + ValueEval vy = y.getItem(i); + + if (vx instanceof ErrorEval) { + if (firstXerr == null) { + firstXerr = (ErrorEval) vx; + continue; + } + } + if (vy instanceof ErrorEval) { + if (firstYerr == null) { + firstYerr = (ErrorEval) vy; + continue; + } + } + + // only count pairs if both elements are numbers + if (vx instanceof NumberEval && vy instanceof NumberEval) { + NumberEval nx = (NumberEval) vx; + NumberEval ny = (NumberEval) vy; + xxbar += (nx.getNumberValue() - xbar) * (nx.getNumberValue() - xbar); + xybar += (nx.getNumberValue() - xbar) * (ny.getNumberValue() - ybar); + } else { + // all other combinations of value types are silently ignored + } + } + double beta1 = xybar / xxbar; + double beta0 = ybar - beta1 * xbar; + + if (firstXerr != null) { + throw new EvaluationException(firstXerr); + } + if (firstYerr != null) { + throw new EvaluationException(firstYerr); + } + if (!accumlatedSome) { + throw new EvaluationException(ErrorEval.DIV_ZERO); + } + + result = beta0; + return result; + } + + private static ValueVector createValueVector(ValueEval arg) throws EvaluationException { + if (arg instanceof ErrorEval) { + throw new EvaluationException((ErrorEval) arg); + } + if (arg instanceof TwoDEval) { + return new AreaValueArray((TwoDEval) arg); + } + if (arg instanceof RefEval) { + return new RefValueArray((RefEval) arg); + } + return new SingleCellValueArray(arg); + } +} \ No newline at end of file diff --git a/src/testcases/org/apache/poi/ss/formula/functions/TestIntercept.java b/src/testcases/org/apache/poi/ss/formula/functions/TestIntercept.java new file mode 100644 index 0000000000..942ab6b019 --- /dev/null +++ b/src/testcases/org/apache/poi/ss/formula/functions/TestIntercept.java @@ -0,0 +1,159 @@ +/* + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ==================================================================== + */ + +package org.apache.poi.ss.formula.functions; + +import junit.framework.TestCase; + +import org.apache.poi.hssf.HSSFTestDataSamples; +import org.apache.poi.hssf.usermodel.HSSFCell; +import org.apache.poi.hssf.usermodel.HSSFFormulaEvaluator; +import org.apache.poi.hssf.usermodel.HSSFSheet; +import org.apache.poi.hssf.usermodel.HSSFWorkbook; +import org.apache.poi.ss.formula.eval.ErrorEval; +import org.apache.poi.ss.formula.eval.NumberEval; +import org.apache.poi.ss.formula.eval.ValueEval; +/** + * Test for Excel function INTERCEPT() + * + * @author Johan Karlsteen + */ +public final class TestIntercept extends TestCase { + private static final Function INTERCEPT = new Intercept(); + + private static ValueEval invoke(Function function, ValueEval xArray, ValueEval yArray) { + ValueEval[] args = new ValueEval[] { xArray, yArray, }; + return function.evaluate(args, -1, (short)-1); + } + + private void confirm(Function function, ValueEval xArray, ValueEval yArray, double expected) { + ValueEval result = invoke(function, xArray, yArray); + assertEquals(NumberEval.class, result.getClass()); + assertEquals(expected, ((NumberEval)result).getNumberValue(), 0); + } + private void confirmError(Function function, ValueEval xArray, ValueEval yArray, ErrorEval expectedError) { + ValueEval result = invoke(function, xArray, yArray); + assertEquals(ErrorEval.class, result.getClass()); + assertEquals(expectedError.getErrorCode(), ((ErrorEval)result).getErrorCode()); + } + + private void confirmError(ValueEval xArray, ValueEval yArray, ErrorEval expectedError) { + confirmError(INTERCEPT, xArray, yArray, expectedError); + } + + public void testBasic() { + Double exp = Math.pow(10, 7.5); + ValueEval[] xValues = { + new NumberEval(3+exp), + new NumberEval(4+exp), + new NumberEval(2+exp), + new NumberEval(5+exp), + new NumberEval(4+exp), + new NumberEval(7+exp), + }; + ValueEval areaEvalX = createAreaEval(xValues); + + ValueEval[] yValues = { + new NumberEval(1), + new NumberEval(2), + new NumberEval(3), + new NumberEval(4), + new NumberEval(5), + new NumberEval(6), + }; + ValueEval areaEvalY = createAreaEval(yValues); + confirm(INTERCEPT, areaEvalX, areaEvalY, -24516534.39905822); + // Excel 2010 gives -24516534.3990583 + } + + /** + * number of items in array is not limited to 30 + */ + public void testLargeArrays() { + ValueEval[] xValues = createMockNumberArray(100, 3); // [1,2,0,1,2,0,...,0,1] + xValues[0] = new NumberEval(2.0); // Changes first element to 2 + ValueEval[] yValues = createMockNumberArray(100, 101); // [1,2,3,4,...,99,100] + + confirm(INTERCEPT, createAreaEval(xValues), createAreaEval(yValues), 51.74384236453202); + // Excel 2010 gives 51.74384236453200 + } + + private ValueEval[] createMockNumberArray(int size, double value) { + ValueEval[] result = new ValueEval[size]; + for (int i = 0; i < result.length; i++) { + result[i] = new NumberEval((i+1)%value); + } + return result; + } + + private static ValueEval createAreaEval(ValueEval[] values) { + String refStr = "A1:A" + values.length; + return EvalFactory.createAreaEval(refStr, values); + } + + public void testErrors() { + ValueEval[] xValues = { + ErrorEval.REF_INVALID, + new NumberEval(2), + }; + ValueEval areaEvalX = createAreaEval(xValues); + ValueEval[] yValues = { + new NumberEval(2), + ErrorEval.NULL_INTERSECTION, + }; + ValueEval areaEvalY = createAreaEval(yValues); + ValueEval[] zValues = { // wrong size + new NumberEval(2), + }; + ValueEval areaEvalZ = createAreaEval(zValues); + + // if either arg is an error, that error propagates + confirmError(ErrorEval.REF_INVALID, ErrorEval.NAME_INVALID, ErrorEval.REF_INVALID); + confirmError(areaEvalX, ErrorEval.NAME_INVALID, ErrorEval.NAME_INVALID); + confirmError(ErrorEval.NAME_INVALID, areaEvalX, ErrorEval.NAME_INVALID); + + // array sizes must match + confirmError(areaEvalX, areaEvalZ, ErrorEval.NA); + confirmError(areaEvalZ, areaEvalY, ErrorEval.NA); + + // any error in an array item propagates up + confirmError(areaEvalX, areaEvalX, ErrorEval.REF_INVALID); + + // search for errors array by array, not pair by pair + confirmError(areaEvalX, areaEvalY, ErrorEval.REF_INVALID); + confirmError(areaEvalY, areaEvalX, ErrorEval.NULL_INTERSECTION); + } + + /** + * Example from + * http://office.microsoft.com/en-us/excel-help/intercept-function-HP010062512.aspx?CTT=5&origin=HA010277524 + */ + public void testFromFile() { + + HSSFWorkbook wb = HSSFTestDataSamples.openSampleWorkbook("intercept.xls"); + HSSFFormulaEvaluator fe = new HSSFFormulaEvaluator(wb); + + HSSFSheet example1 = wb.getSheet("Example 1"); + HSSFCell a8 = example1.getRow(7).getCell(0); + assertEquals("INTERCEPT(A2:A6,B2:B6)", a8.getCellFormula()); + fe.evaluate(a8); + assertEquals(0.048387097, a8.getNumericCellValue(), 0.000000001); + + } +} \ No newline at end of file diff --git a/test-data/spreadsheet/intercept.xls b/test-data/spreadsheet/intercept.xls new file mode 100644 index 0000000000..b734887c39 Binary files /dev/null and b/test-data/spreadsheet/intercept.xls differ