]> source.dussan.org Git - poi.git/commitdiff
Bugzilla 54356: Support of statistical function INTERCEPT
authorYegor Kozlov <yegor@apache.org>
Fri, 28 Dec 2012 12:50:15 +0000 (12:50 +0000)
committerYegor Kozlov <yegor@apache.org>
Fri, 28 Dec 2012 12:50:15 +0000 (12:50 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1426485 13f79535-47bb-0310-9956-ffa450edef68

src/documentation/content/xdocs/status.xml
src/java/org/apache/poi/ss/formula/eval/FunctionEval.java
src/java/org/apache/poi/ss/formula/functions/Intercept.java [new file with mode: 0644]
src/testcases/org/apache/poi/ss/formula/functions/TestIntercept.java [new file with mode: 0644]
test-data/spreadsheet/intercept.xls [new file with mode: 0644]

index 8fef9acb234ad1275cd748dff5df5a79f6063531..2016c13d2d7bdf3c660af8e693ee07a6371c80d7 100644 (file)
@@ -34,6 +34,7 @@
 
     <changes>
         <release version="4.0-beta1" date="2013-??-??">
+          <action dev="poi-developers" type="add">54356 - Support for INTERCEPT function</action>
           <action dev="poi-developers" type="fix">54282 - Improve the performance of ColumnHelper addCleanColIntoCols, speeds up some .xlsx file loading</action>
           <action dev="poi-developers" type="fix">53650 - Prevent unreadable content and disalow to overwrite rows from input template in SXSSF</action>
           <action dev="poi-developers" type="fix">54228,53672 - Fixed XSSF to read cells with missing R attribute</action>
index c76da52c17807d2f9da5ba071e15123071a3be6e..021d6980f08623c0cef21f378d4559db2d49632f 100644 (file)
@@ -28,6 +28,7 @@ import java.util.TreeSet;
 
 /**
  * @author Amol S. Deshmukh &lt; amolweb at ya hoo dot com &gt;
+ * @author Johan Karlsteen - added Intercept
  */
 public final class FunctionEval {
        /**
@@ -208,6 +209,8 @@ public final class FunctionEval {
                retval[304] = new Sumx2my2();
                retval[305] = new Sumx2py2();
 
+               retval[311] = new Intercept();
+
                retval[318] = AggregateFunction.DEVSQ;
 
                retval[321] = AggregateFunction.SUMSQ;
diff --git a/src/java/org/apache/poi/ss/formula/functions/Intercept.java b/src/java/org/apache/poi/ss/formula/functions/Intercept.java
new file mode 100644 (file)
index 0000000..06bb6f9
--- /dev/null
@@ -0,0 +1,223 @@
+/*
+ *  ====================================================================
+ *    Licensed to the Apache Software Foundation (ASF) under one or more
+ *    contributor license agreements.  See the NOTICE file distributed with
+ *    this work for additional information regarding copyright ownership.
+ *    The ASF licenses this file to You under the Apache License, Version 2.0
+ *    (the "License"); you may not use this file except in compliance with
+ *    the License.  You may obtain a copy of the License at
+ *
+ *        http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *    Unless required by applicable law or agreed to in writing, software
+ *    distributed under the License is distributed on an "AS IS" BASIS,
+ *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *    See the License for the specific language governing permissions and
+ *    limitations under the License.
+ * ====================================================================
+ */
+
+package org.apache.poi.ss.formula.functions;
+
+import org.apache.poi.ss.formula.TwoDEval;
+import org.apache.poi.ss.formula.eval.ErrorEval;
+import org.apache.poi.ss.formula.eval.EvaluationException;
+import org.apache.poi.ss.formula.eval.NumberEval;
+import org.apache.poi.ss.formula.eval.RefEval;
+import org.apache.poi.ss.formula.eval.ValueEval;
+import org.apache.poi.ss.formula.functions.LookupUtils.ValueVector;
+
+/**
+ * Implementation of Excel function INTERCEPT()<p/>
+ *
+ * Calculates the INTERCEPT of the linear regression line that is used to predict y values from x values<br/>
+ * (http://introcs.cs.princeton.edu/java/97data/LinearRegression.java.html)
+ * <b>Syntax</b>:<br/>
+ * <b>INTERCEPT</b>(<b>arrayX</b>, <b>arrayY</b>)<p/>
+ *
+ *
+ * @author Johan Karlsteen
+ */
+public final class Intercept extends Fixed2ArgFunction {
+
+       private static abstract class ValueArray implements ValueVector {
+               private final int _size;
+               protected ValueArray(int size) {
+                       _size = size;
+               }
+
+               public ValueEval getItem(int index) {
+                       if (index < 0 || index > _size) {
+                               throw new IllegalArgumentException("Specified index " + index
+                                               + " is outside range (0.." + (_size - 1) + ")");
+                       }
+                       return getItemInternal(index);
+               }
+               protected abstract ValueEval getItemInternal(int index);
+
+               public final int getSize() {
+                       return _size;
+               }
+       }
+
+       private static final class SingleCellValueArray extends ValueArray {
+               private final ValueEval _value;
+               public SingleCellValueArray(ValueEval value) {
+                       super(1);
+                       _value = value;
+               }
+               @Override
+               protected ValueEval getItemInternal(int index) {
+                       return _value;
+               }
+       }
+
+       private static final class RefValueArray extends ValueArray {
+               private final RefEval _ref;
+               public RefValueArray(RefEval ref) {
+                       super(1);
+                       _ref = ref;
+               }
+               @Override
+               protected ValueEval getItemInternal(int index) {
+                       return _ref.getInnerValueEval();
+               }
+       }
+
+       private static final class AreaValueArray extends ValueArray {
+               private final TwoDEval _ae;
+               private final int _width;
+
+               public AreaValueArray(TwoDEval ae) {
+                       super(ae.getWidth() * ae.getHeight());
+                       _ae = ae;
+                       _width = ae.getWidth();
+               }
+               @Override
+               protected ValueEval getItemInternal(int index) {
+                       int rowIx = index / _width;
+                       int colIx = index % _width;
+                       return _ae.getValue(rowIx, colIx);
+               }
+       }
+
+       
+       public ValueEval evaluate(int srcRowIndex, int srcColumnIndex,
+                       ValueEval arg0, ValueEval arg1) {
+               double result;
+               try {
+                       ValueVector vvX = createValueVector(arg0);
+                       ValueVector vvY = createValueVector(arg1);
+                       int size = vvX.getSize();
+                       if (size == 0 || vvY.getSize() != size) {
+                               return ErrorEval.NA;
+                       }
+                       result = evaluateInternal(vvX, vvY, size);
+               } catch (EvaluationException e) {
+                       return e.getErrorEval();
+               }
+               if (Double.isNaN(result) || Double.isInfinite(result)) {
+                       return ErrorEval.NUM_ERROR;
+               }
+               return new NumberEval(result);
+       }
+       
+       private double evaluateInternal(ValueVector x, ValueVector y, int size)
+                       throws EvaluationException {
+
+               // error handling is as if the x is fully evaluated before y
+               ErrorEval firstXerr = null;
+               ErrorEval firstYerr = null;
+               boolean accumlatedSome = false;
+               double result = 0.0;
+        // first pass: read in data, compute xbar and ybar
+        double sumx = 0.0, sumy = 0.0;
+        
+               for (int i = 0; i < size; i++) {
+                       ValueEval vx = x.getItem(i);
+                       ValueEval vy = y.getItem(i);
+                       if (vx instanceof ErrorEval) {
+                               if (firstXerr == null) {
+                                       firstXerr = (ErrorEval) vx;
+                                       continue;
+                               }
+                       }
+                       if (vy instanceof ErrorEval) {
+                               if (firstYerr == null) {
+                                       firstYerr = (ErrorEval) vy;
+                                       continue;
+                               }
+                       }
+                       // only count pairs if both elements are numbers
+                       if (vx instanceof NumberEval && vy instanceof NumberEval) {
+                               accumlatedSome = true;
+                               NumberEval nx = (NumberEval) vx;
+                               NumberEval ny = (NumberEval) vy;
+                               sumx  += nx.getNumberValue();
+                   sumy  += ny.getNumberValue();
+                       } else {
+                               // all other combinations of value types are silently ignored
+                       }
+               }
+               double xbar = sumx / size;
+        double ybar = sumy / size;
+               
+                // second pass: compute summary statistics
+        double xxbar = 0.0, xybar = 0.0;
+        for (int i = 0; i < size; i++) {
+                       ValueEval vx = x.getItem(i);
+                       ValueEval vy = y.getItem(i);
+                       
+                       if (vx instanceof ErrorEval) {
+                               if (firstXerr == null) {
+                                       firstXerr = (ErrorEval) vx;
+                                       continue;
+                               }
+                       }
+                       if (vy instanceof ErrorEval) {
+                               if (firstYerr == null) {
+                                       firstYerr = (ErrorEval) vy;
+                                       continue;
+                               }
+                       }
+                       
+                       // only count pairs if both elements are numbers
+                       if (vx instanceof NumberEval && vy instanceof NumberEval) {
+                               NumberEval nx = (NumberEval) vx;
+                               NumberEval ny = (NumberEval) vy;
+                   xxbar += (nx.getNumberValue() - xbar) * (nx.getNumberValue() - xbar);
+                   xybar += (nx.getNumberValue() - xbar) * (ny.getNumberValue() - ybar);
+                       } else {
+                               // all other combinations of value types are silently ignored
+                       }
+        }
+        double beta1 = xybar / xxbar;
+        double beta0 = ybar - beta1 * xbar;
+               
+               if (firstXerr != null) {
+                       throw new EvaluationException(firstXerr);
+               }
+               if (firstYerr != null) {
+                       throw new EvaluationException(firstYerr);
+               }
+               if (!accumlatedSome) {
+                       throw new EvaluationException(ErrorEval.DIV_ZERO);
+               }
+               
+               result = beta0;
+               return result;
+       }
+
+       private static ValueVector createValueVector(ValueEval arg) throws EvaluationException {
+               if (arg instanceof ErrorEval) {
+                       throw new EvaluationException((ErrorEval) arg);
+               }
+               if (arg instanceof TwoDEval) {
+                       return new AreaValueArray((TwoDEval) arg);
+               }
+               if (arg instanceof RefEval) {
+                       return new RefValueArray((RefEval) arg);
+               }
+               return new SingleCellValueArray(arg);
+       }
+}
\ No newline at end of file
diff --git a/src/testcases/org/apache/poi/ss/formula/functions/TestIntercept.java b/src/testcases/org/apache/poi/ss/formula/functions/TestIntercept.java
new file mode 100644 (file)
index 0000000..942ab6b
--- /dev/null
@@ -0,0 +1,159 @@
+/*
+ *  ====================================================================
+ *    Licensed to the Apache Software Foundation (ASF) under one or more
+ *    contributor license agreements.  See the NOTICE file distributed with
+ *    this work for additional information regarding copyright ownership.
+ *    The ASF licenses this file to You under the Apache License, Version 2.0
+ *    (the "License"); you may not use this file except in compliance with
+ *    the License.  You may obtain a copy of the License at
+ *
+ *        http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *    Unless required by applicable law or agreed to in writing, software
+ *    distributed under the License is distributed on an "AS IS" BASIS,
+ *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *    See the License for the specific language governing permissions and
+ *    limitations under the License.
+ * ====================================================================
+ */
+
+package org.apache.poi.ss.formula.functions;
+
+import junit.framework.TestCase;
+
+import org.apache.poi.hssf.HSSFTestDataSamples;
+import org.apache.poi.hssf.usermodel.HSSFCell;
+import org.apache.poi.hssf.usermodel.HSSFFormulaEvaluator;
+import org.apache.poi.hssf.usermodel.HSSFSheet;
+import org.apache.poi.hssf.usermodel.HSSFWorkbook;
+import org.apache.poi.ss.formula.eval.ErrorEval;
+import org.apache.poi.ss.formula.eval.NumberEval;
+import org.apache.poi.ss.formula.eval.ValueEval;
+/**
+ * Test for Excel function INTERCEPT()
+ *
+ * @author Johan Karlsteen
+ */
+public final class TestIntercept extends TestCase {
+       private static final Function INTERCEPT = new Intercept();
+
+       private static ValueEval invoke(Function function, ValueEval xArray, ValueEval yArray) {
+               ValueEval[] args = new ValueEval[] { xArray, yArray, };
+               return function.evaluate(args, -1, (short)-1);
+       }
+
+       private void confirm(Function function, ValueEval xArray, ValueEval yArray, double expected) {
+               ValueEval result = invoke(function, xArray, yArray);
+               assertEquals(NumberEval.class, result.getClass());
+               assertEquals(expected, ((NumberEval)result).getNumberValue(), 0);
+       }
+       private void confirmError(Function function, ValueEval xArray, ValueEval yArray, ErrorEval expectedError) {
+               ValueEval result = invoke(function, xArray, yArray);
+               assertEquals(ErrorEval.class, result.getClass());
+               assertEquals(expectedError.getErrorCode(), ((ErrorEval)result).getErrorCode());
+       }
+
+       private void confirmError(ValueEval xArray, ValueEval yArray, ErrorEval expectedError) {
+               confirmError(INTERCEPT, xArray, yArray, expectedError);
+       }
+
+       public void testBasic() {
+               Double exp = Math.pow(10, 7.5);
+               ValueEval[] xValues = {
+                       new NumberEval(3+exp),
+                       new NumberEval(4+exp),
+                       new NumberEval(2+exp),
+                       new NumberEval(5+exp),
+                       new NumberEval(4+exp),
+                       new NumberEval(7+exp),
+               };
+               ValueEval areaEvalX = createAreaEval(xValues);
+
+               ValueEval[] yValues = {
+                       new NumberEval(1),
+                       new NumberEval(2),
+                       new NumberEval(3),
+                       new NumberEval(4),
+                       new NumberEval(5),
+                       new NumberEval(6),
+               };
+               ValueEval areaEvalY = createAreaEval(yValues);
+               confirm(INTERCEPT, areaEvalX, areaEvalY, -24516534.39905822);
+               // Excel 2010 gives -24516534.3990583
+       }
+
+       /**
+        * number of items in array is not limited to 30
+        */
+       public void testLargeArrays() {
+               ValueEval[] xValues = createMockNumberArray(100, 3); // [1,2,0,1,2,0,...,0,1]
+               xValues[0] = new NumberEval(2.0); // Changes first element to 2
+               ValueEval[] yValues = createMockNumberArray(100, 101); // [1,2,3,4,...,99,100]
+
+               confirm(INTERCEPT, createAreaEval(xValues), createAreaEval(yValues), 51.74384236453202);
+               // Excel 2010 gives 51.74384236453200
+       }
+
+       private ValueEval[] createMockNumberArray(int size, double value) {
+               ValueEval[] result = new ValueEval[size];
+               for (int i = 0; i < result.length; i++) {
+                       result[i] = new NumberEval((i+1)%value);
+               }
+               return result;
+       }
+
+       private static ValueEval createAreaEval(ValueEval[] values) {
+               String refStr = "A1:A" + values.length;
+               return EvalFactory.createAreaEval(refStr, values);
+       }
+
+       public void testErrors() {
+               ValueEval[] xValues = {
+                               ErrorEval.REF_INVALID,
+                               new NumberEval(2),
+               };
+               ValueEval areaEvalX = createAreaEval(xValues);
+               ValueEval[] yValues = {
+                               new NumberEval(2),
+                               ErrorEval.NULL_INTERSECTION,
+               };
+               ValueEval areaEvalY = createAreaEval(yValues);
+               ValueEval[] zValues = { // wrong size
+                               new NumberEval(2),
+               };
+               ValueEval areaEvalZ = createAreaEval(zValues);
+
+               // if either arg is an error, that error propagates
+               confirmError(ErrorEval.REF_INVALID, ErrorEval.NAME_INVALID, ErrorEval.REF_INVALID);
+               confirmError(areaEvalX, ErrorEval.NAME_INVALID, ErrorEval.NAME_INVALID);
+               confirmError(ErrorEval.NAME_INVALID, areaEvalX, ErrorEval.NAME_INVALID);
+
+               // array sizes must match
+               confirmError(areaEvalX, areaEvalZ, ErrorEval.NA);
+               confirmError(areaEvalZ, areaEvalY, ErrorEval.NA);
+
+               // any error in an array item propagates up
+               confirmError(areaEvalX, areaEvalX, ErrorEval.REF_INVALID);
+
+               // search for errors array by array, not pair by pair
+               confirmError(areaEvalX, areaEvalY, ErrorEval.REF_INVALID);
+               confirmError(areaEvalY, areaEvalX, ErrorEval.NULL_INTERSECTION);
+       }
+
+    /**
+     *  Example from
+     *  http://office.microsoft.com/en-us/excel-help/intercept-function-HP010062512.aspx?CTT=5&origin=HA010277524
+     */
+    public void testFromFile() {
+
+        HSSFWorkbook wb = HSSFTestDataSamples.openSampleWorkbook("intercept.xls");
+        HSSFFormulaEvaluator fe = new HSSFFormulaEvaluator(wb);
+
+        HSSFSheet example1 = wb.getSheet("Example 1");
+        HSSFCell a8 = example1.getRow(7).getCell(0);
+        assertEquals("INTERCEPT(A2:A6,B2:B6)", a8.getCellFormula());
+        fe.evaluate(a8);
+        assertEquals(0.048387097, a8.getNumericCellValue(), 0.000000001);
+
+    }
+}
\ No newline at end of file
diff --git a/test-data/spreadsheet/intercept.xls b/test-data/spreadsheet/intercept.xls
new file mode 100644 (file)
index 0000000..b734887
Binary files /dev/null and b/test-data/spreadsheet/intercept.xls differ