]> source.dussan.org Git - poi.git/commitdiff
Bug 60405: Add initial support for cetab functions so some macros can be
authorDominik Stadler <centic@apache.org>
Sun, 27 Jan 2019 09:57:39 +0000 (09:57 +0000)
committerDominik Stadler <centic@apache.org>
Sun, 27 Jan 2019 09:57:39 +0000 (09:57 +0000)
parsed
Add some function-definitions for Excel 4 Macros and missing functions
found in regression tests

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1852277 13f79535-47bb-0310-9956-ffa450edef68

13 files changed:
src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java
src/java/org/apache/poi/ss/formula/function/FunctionDataBuilder.java
src/java/org/apache/poi/ss/formula/function/FunctionMetadata.java
src/java/org/apache/poi/ss/formula/function/FunctionMetadataReader.java
src/java/org/apache/poi/ss/formula/function/FunctionMetadataRegistry.java
src/java/org/apache/poi/ss/formula/ptg/AbstractFunctionPtg.java
src/java/org/apache/poi/ss/formula/ptg/ExpPtg.java
src/java/org/apache/poi/ss/formula/ptg/FuncVarPtg.java
src/resources/main/org/apache/poi/ss/formula/function/functionMetadata.txt
src/resources/main/org/apache/poi/ss/formula/function/functionMetadataCetab.txt [new file with mode: 0644]
src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java
src/testcases/org/apache/poi/ss/formula/function/ExcelCetabFunctionExtractor.java [new file with mode: 0644]
test-data/spreadsheet/60405.xls [new file with mode: 0644]

index fbe37a4b0b006e97b20acc01fcb20ba1318cec9f..2b1114ab46c5ce99256d46d4429a6cd4f05eb27b 100644 (file)
@@ -31,9 +31,11 @@ import java.util.Set;
 import org.apache.poi.EncryptedDocumentException;
 import org.apache.poi.extractor.POIOLE2TextExtractor;
 import org.apache.poi.extractor.POITextExtractor;
+import org.apache.poi.hssf.extractor.EventBasedExcelExtractor;
 import org.apache.poi.ooxml.extractor.ExtractorFactory;
 import org.apache.poi.hpsf.extractor.HPSFPropertiesExtractor;
 import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
+import org.apache.poi.ss.extractor.ExcelExtractor;
 import org.apache.poi.util.IOUtils;
 import org.apache.xmlbeans.XmlException;
 
@@ -83,6 +85,7 @@ public abstract class AbstractFileHandler implements FileHandler {
         long modified = file.lastModified();
         
         POITextExtractor extractor = null;
+        String fileAndParentName = file.getParentFile().getName() + "/" + file.getName();
         try {
             extractor = ExtractorFactory.createExtractor(file);
             assertNotNull("Should get a POITextExtractor but had none for file " + file, extractor);
@@ -95,7 +98,7 @@ public abstract class AbstractFileHandler implements FileHandler {
             assertNotNull(metadataExtractor.getText());
 
             assertFalse("Expected Extraction to fail for file " + file + " and handler " + this + ", but did not fail!",
-                    EXPECTED_EXTRACTOR_FAILURES.contains(file.getParentFile().getName() + "/" + file.getName()));
+                    EXPECTED_EXTRACTOR_FAILURES.contains(fileAndParentName));
 
             assertEquals("File should not be modified by extractor", length, file.length());
             assertEquals("File should not be modified by extractor", modified, file.lastModified());
@@ -111,8 +114,24 @@ public abstract class AbstractFileHandler implements FileHandler {
                     assertNotNull(text);
                 }
             }
+
+            // test again with including formulas and cell-comments as this caused some bugs
+            if(extractor instanceof ExcelExtractor &&
+                    // comment-extraction and formula extraction are not well supported in event based extraction
+                    !(extractor instanceof EventBasedExcelExtractor)) {
+                ((ExcelExtractor)extractor).setFormulasNotResults(true);
+
+                String text = extractor.getText();
+                assertNotNull(text);
+                // */
+
+                ((ExcelExtractor) extractor).setIncludeCellComments(true);
+
+                text = extractor.getText();
+                assertNotNull(text);
+            }
         } catch (IllegalArgumentException e) {
-            if(!EXPECTED_EXTRACTOR_FAILURES.contains(file.getParentFile().getName() + "/" + file.getName())) {
+            if(!EXPECTED_EXTRACTOR_FAILURES.contains(fileAndParentName)) {
                 throw e;
             }
         } catch (EncryptedDocumentException e) {
index c535bf4ca2c3c22f11249c498b80c9ebc23f98dc..0be53f555e2cac174cefd4b3cde16eaf8803d5f2 100644 (file)
@@ -81,8 +81,7 @@ final class FunctionDataBuilder {
                FunctionMetadata[] jumbledArray =  new FunctionMetadata[_functionDataByName.size()];
                _functionDataByName.values().toArray(jumbledArray);
                FunctionMetadata[] fdIndexArray = new FunctionMetadata[_maxFunctionIndex+1];
-               for (int i = 0; i < jumbledArray.length; i++) {
-                       FunctionMetadata fd = jumbledArray[i];
+               for (FunctionMetadata fd : jumbledArray) {
                        fdIndexArray[fd.getIndex()] = fd;
                }
 
index 6ab06f0ad8711ce264f4812c9fb98b8566d35556..fa8ca34aca1d9c7e62d411e56f531837c4cb94a6 100644 (file)
@@ -31,6 +31,7 @@ public final class FunctionMetadata {
         * to make that file more version neutral.
         * @see org.apache.poi.ss.formula.FormulaParser#validateNumArgs(int, FunctionMetadata)
         */
+       @SuppressWarnings("JavadocReference")
        private static final short FUNCTION_MAX_PARAMS = 30;
 
        private final int _index;
@@ -49,27 +50,35 @@ public final class FunctionMetadata {
                _returnClassCode = returnClassCode;
                _parameterClassCodes = (parameterClassCodes == null) ? null : parameterClassCodes.clone();
        }
+
        public int getIndex() {
                return _index;
        }
+
        public String getName() {
                return _name;
        }
+
        public int getMinParams() {
                return _minParams;
        }
+
        public int getMaxParams() {
                return _maxParams;
        }
+
        public boolean hasFixedArgsLength() {
                return _minParams == _maxParams;
        }
+
        public byte getReturnClassCode() {
                return _returnClassCode;
        }
+
        public byte[] getParameterClassCodes() {
                return _parameterClassCodes.clone();
        }
+
        /**
         * Some varags functions (like VLOOKUP) have a specific limit to the number of arguments that 
         * can be passed.  Other functions (like SUM) don't have such a limit.  For those functions,
@@ -80,11 +89,8 @@ public final class FunctionMetadata {
        public boolean hasUnlimitedVarags() {
                return FUNCTION_MAX_PARAMS == _maxParams;
        }
+
        public String toString() {
-               StringBuffer sb = new StringBuffer(64);
-               sb.append(getClass().getName()).append(" [");
-               sb.append(_index).append(" ").append(_name);
-               sb.append("]");
-               return sb.toString();
+               return getClass().getName() + " [" + _index + " " + _name + "]";
        }
 }
index dc6a8c025538d060099ff0263ce10470cded02af..dc83e9c6010c690ba67887ae51330b2e04643d5c 100644 (file)
@@ -41,6 +41,7 @@ final class FunctionMetadataReader {
        private static final int MAX_RECORD_LENGTH = 100_000;
 
        private static final String METADATA_FILE_NAME = "functionMetadata.txt";
+       private static final String METADATA_FILE_NAME_CETAB = "functionMetadataCetab.txt";
 
        /** plain ASCII text metadata file uses three dots for ellipsis */
        private static final String ELLIPSIS = "...";
@@ -52,51 +53,56 @@ final class FunctionMetadataReader {
        private static final String[] DIGIT_ENDING_FUNCTION_NAMES = {
                // Digits at the end of a function might be due to a left-over footnote marker.
                // except in these cases
-               "LOG10", "ATAN2", "DAYS360", "SUMXMY2", "SUMX2MY2", "SUMX2PY2",
+               "LOG10", "ATAN2", "DAYS360", "SUMXMY2", "SUMX2MY2", "SUMX2PY2", "A1.R1C1",
        };
        private static final Set<String> DIGIT_ENDING_FUNCTION_NAMES_SET = new HashSet<>(Arrays.asList(DIGIT_ENDING_FUNCTION_NAMES));
 
        public static FunctionMetadataRegistry createRegistry() {
-           try {
-               InputStream is = FunctionMetadataReader.class.getResourceAsStream(METADATA_FILE_NAME);
-               if (is == null) {
-                       throw new RuntimeException("resource '" + METADATA_FILE_NAME + "' not found");
-               }
-    
-               try {
-                       try(BufferedReader br = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8))) {
-                           FunctionDataBuilder fdb = new FunctionDataBuilder(400);
-        
-                               while (true) {
-                                       String line = br.readLine();
-                                       if (line == null) {
-                                               break;
-                                       }
-                                       if (line.length() < 1 || line.charAt(0) == '#') {
-                                               continue;
-                                       }
-                                       String trimLine = line.trim();
-                                       if (trimLine.length() < 1) {
-                                               continue;
-                                       }
-                                       processLine(fdb, line);
-                               }
-
-                               return fdb.build();
-                       }
-               } finally {
-                   is.close();
-               }
-        } catch (IOException e) {
-            throw new RuntimeException(e);
-           } 
+               FunctionDataBuilder fdb = new FunctionDataBuilder(800);
+               readResourceFile(fdb, METADATA_FILE_NAME);
+               return fdb.build();
+       }
+
+       public static FunctionMetadataRegistry createRegistryCetab() {
+               FunctionDataBuilder fdb = new FunctionDataBuilder(800);
+               readResourceFile(fdb, METADATA_FILE_NAME_CETAB);
+               return fdb.build();
+       }
+
+       private static void readResourceFile(FunctionDataBuilder fdb, String resourceFile) {
+               try (InputStream is = FunctionMetadataReader.class.getResourceAsStream(resourceFile)) {
+                       if (is == null) {
+                               throw new RuntimeException("resource '" + resourceFile + "' not found");
+                       }
+
+                       try(BufferedReader br = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8))) {
+
+                               while (true) {
+                                       String line = br.readLine();
+                                       if (line == null) {
+                                               break;
+                                       }
+                                       if (line.length() < 1 || line.charAt(0) == '#') {
+                                               continue;
+                                       }
+                                       String trimLine = line.trim();
+                                       if (trimLine.length() < 1) {
+                                               continue;
+                                       }
+                                       processLine(fdb, line);
+                               }
+                       }
+               } catch (IOException e) {
+                       throw new RuntimeException(e);
+               }
        }
 
        private static void processLine(FunctionDataBuilder fdb, String line) {
 
                String[] parts = TAB_DELIM_PATTERN.split(line, -2);
                if(parts.length != 8) {
-                       throw new RuntimeException("Bad line format '" + line + "' - expected 8 data fields");
+                       throw new RuntimeException("Bad line format '" + line + "' - expected 8 data fields delimited by tab, " +
+                                       "but had " + parts.length + ": " + Arrays.toString(parts));
                }
                int functionIndex = parseInt(parts[0]);
                String functionName = parts[1];
index 34317fe6d8f8448d140e3ba3105fc1441b6e8579..bb01b4116306197ca2dc7308b3146d547a089d99 100644 (file)
@@ -37,6 +37,7 @@ public final class FunctionMetadataRegistry {
        public static final short FUNCTION_INDEX_EXTERNAL = 255;
 
        private static FunctionMetadataRegistry _instance;
+       private static FunctionMetadataRegistry _instanceCetab;
 
        private final FunctionMetadata[] _functionDataByIndex;
        private final Map<String, FunctionMetadata> _functionDataByName;
@@ -48,6 +49,13 @@ public final class FunctionMetadataRegistry {
                return _instance;
        }
 
+       private static FunctionMetadataRegistry getInstanceCetab() {
+               if (_instanceCetab == null) {
+                       _instanceCetab = FunctionMetadataReader.createRegistryCetab();
+               }
+               return _instanceCetab;
+       }
+
        /* package */ FunctionMetadataRegistry(FunctionMetadata[] functionDataByIndex, Map<String, FunctionMetadata> functionDataByName) {
                _functionDataByIndex = (functionDataByIndex == null) ? null : functionDataByIndex.clone();
                _functionDataByName = functionDataByName;
@@ -62,6 +70,10 @@ public final class FunctionMetadataRegistry {
                return getInstance().getFunctionByIndexInternal(index);
        }
 
+       public static FunctionMetadata getCetabFunctionByIndex(int index) {
+               return getInstanceCetab().getFunctionByIndexInternal(index);
+       }
+
        private FunctionMetadata getFunctionByIndexInternal(int index) {
                return _functionDataByIndex[index];
        }
@@ -74,7 +86,11 @@ public final class FunctionMetadataRegistry {
        public static short lookupIndexByName(String name) {
                FunctionMetadata fd = getInstance().getFunctionByNameInternal(name);
                if (fd == null) {
-                       return -1;
+                       // also try the cetab functions
+                       fd = getInstanceCetab().getFunctionByNameInternal(name);
+                       if (fd == null) {
+                               return -1;
+                       }
                }
                return (short) fd.getIndex();
        }
@@ -83,8 +99,12 @@ public final class FunctionMetadataRegistry {
                return _functionDataByName.get(name);
        }
 
-
        public static FunctionMetadata getFunctionByName(String name) {
-               return getInstance().getFunctionByNameInternal(name);
+               FunctionMetadata fm = getInstance().getFunctionByNameInternal(name);
+               if(fm == null) {
+                       return getInstanceCetab().getFunctionByNameInternal(name);
+               }
+
+               return fm;
        }
 }
index 28b5e7d9e54028584dd5d09cf4291c3191c33312..bfb7aeba6a54e6b5b9d873b1e0853f00e50d1430 100644 (file)
@@ -123,13 +123,22 @@ public abstract class AbstractFunctionPtg extends OperationPtg {
         return ix >= 0;
     }
 
-    protected final String lookupName(short index) {
+    protected String lookupName(short index) {
+        return lookupName(index, false);
+    }
+
+    protected final String lookupName(short index, boolean isCetab) {
         if(index == FunctionMetadataRegistry.FUNCTION_INDEX_EXTERNAL) {
             return "#external#";
         }
-        FunctionMetadata fm = FunctionMetadataRegistry.getFunctionByIndex(index);
+        final FunctionMetadata fm;
+        if(isCetab) {
+            fm = FunctionMetadataRegistry.getCetabFunctionByIndex(index);
+        } else {
+            fm = FunctionMetadataRegistry.getFunctionByIndex(index);
+        }
         if(fm == null) {
-            throw new RuntimeException("bad function index (" + index + ")");
+            throw new RuntimeException("bad function index (" + index + ", " + isCetab + ")");
         }
         return fm.getName();
     }
index d128be995170eee5f622c15b8b43fddc78b8f228..83ebc2044d4247395071f889c1d1d5fae5de69d6 100644 (file)
@@ -69,9 +69,6 @@ public final class ExpPtg extends ControlPtg {
 
     @Override
     public String toString() {
-        StringBuffer buffer = new StringBuffer("[Array Formula or Shared Formula]\n");
-        buffer.append("row = ").append(getRow()).append("\n");
-        buffer.append("col = ").append(getColumn()).append("\n");
-        return buffer.toString();
+        return "[Array Formula or Shared Formula]\n" + "row = " + getRow() + "\n" + "col = " + getColumn() + "\n";
     }
 }
index 3392d04a76be3fdda4d68efad9d031c4743aed4c..a1cacd8ba53eefd0f3d1caaddbd04fef1faf7f67 100644 (file)
@@ -18,6 +18,8 @@
 package org.apache.poi.ss.formula.ptg;
 import org.apache.poi.ss.formula.function.FunctionMetadata;
 import org.apache.poi.ss.formula.function.FunctionMetadataRegistry;
+import org.apache.poi.util.BitField;
+import org.apache.poi.util.BitFieldFactory;
 import org.apache.poi.util.LittleEndianInput;
 import org.apache.poi.util.LittleEndianOutput;
 
@@ -25,24 +27,29 @@ import org.apache.poi.util.LittleEndianOutput;
  * @author Jason Height (jheight at chariot dot net dot au)
  */
 public final class FuncVarPtg extends AbstractFunctionPtg{
-
     public final static byte sid  = 0x22;
     private final static int  SIZE = 4;
 
+    // See spec at 2.5.198.63 PtgFuncVar
+    private static final BitField ceFunc = BitFieldFactory.getInstance(0xF000);
+
     /**
      * Single instance of this token for 'sum() taking a single argument'
      */
     public static final OperationPtg SUM = FuncVarPtg.create("SUM", 1);
 
-    private FuncVarPtg(int functionIndex, int returnClass, byte[] paramClasses, int numArgs) {
+    private final boolean _isCetab;
+
+    private FuncVarPtg(int functionIndex, int returnClass, byte[] paramClasses, int numArgs, boolean isCetab) {
         super(functionIndex, returnClass, paramClasses, numArgs);
+        _isCetab = isCetab;
     }
 
     /**Creates new function pointer from a byte array
      * usually called while reading an excel file.
      */
     public static FuncVarPtg create(LittleEndianInput in)  {
-        return create(in.readByte(), in.readShort());
+        return create(in.readByte(), in.readUShort());
     }
 
     /**
@@ -53,12 +60,25 @@ public final class FuncVarPtg extends AbstractFunctionPtg{
     }
 
     private static FuncVarPtg create(int numArgs, int functionIndex) {
-        FunctionMetadata fm = FunctionMetadataRegistry.getFunctionByIndex(functionIndex);
-        if(fm == null) {
+        final FunctionMetadata fm;
+        boolean isCetab = ceFunc.isSet(functionIndex);
+        if(isCetab) {
+            functionIndex = ceFunc.clear(functionIndex);
+            fm = FunctionMetadataRegistry.getCetabFunctionByIndex(functionIndex);
+        } else {
+            fm = FunctionMetadataRegistry.getFunctionByIndex(functionIndex);
+        }
+
+        if (fm == null) {
             // Happens only as a result of a call to FormulaParser.parse(), with a non-built-in function name
-            return new FuncVarPtg(functionIndex, Ptg.CLASS_VALUE, new byte[] {Ptg.CLASS_VALUE}, numArgs);
+            return new FuncVarPtg(functionIndex, Ptg.CLASS_VALUE, new byte[]{Ptg.CLASS_VALUE}, numArgs, isCetab);
         }
-        return new FuncVarPtg(functionIndex, fm.getReturnClassCode(), fm.getParameterClassCodes(), numArgs);
+        return new FuncVarPtg(functionIndex, fm.getReturnClassCode(), fm.getParameterClassCodes(), numArgs, isCetab);
+    }
+
+    @Override
+    protected String lookupName(short index) {
+        return lookupName(index, _isCetab);
     }
 
     public void write(LittleEndianOutput out) {
index f9e91a1e3bb4fe20b3b2e2bdeb16cc32bfc6b395..e3c8fc4d74111cb3bbe073d4f987a55ffec2de7a 100644 (file)
@@ -74,6 +74,8 @@
 50     TREND   1       3       A       R R R           x
 51     LOGEST  1       2       A       R R             x
 52     GROWTH  1       3       A       R R R           x
+53     GOTO    1       1       R       R               
+55     RETURN  1       1       V       V               
 56     PV      3       5       V       V V V V V               
 # Built-In Sheet Functions in BIFF2
 57     FV      3       5       V       V V V V V               
 76     ROWS    1       1       V       A               
 77     COLUMNS 1       1       V       A               
 78     OFFSET  3       5       R       R V V V V       x       
+79     ABSREF  2       2       R       V R             
+80     RELREF  2       2       R       V V             
+81     ARGUMENT        0       3       V       V V R           
 82     SEARCH  2       3       V       V V V           
 83     TRANSPOSE       1       1       A       A               
+84     ERROR   0       2       V       V R             
+85     STEP    0       0                               
 86     TYPE    1       1       V       V               
 97     ATAN2   2       2       V       V V             
 98     ASIN    1       1       V       V               
 102    VLOOKUP 3       3       V       V R R           x
 105    ISREF   1       1       V       R               
 109    LOG     1       2       V       V V             
+110    EXEC    1       4       V       V V V V         
 111    CHAR    1       1       V       V               
 112    LOWER   1       1       V       V               
 113    UPPER   1       1       V       V               
 143    SYD     4       4       V       V V V V         
 144    DDB     4       5       V       V V V V V               
 148    INDIRECT        1       2       R       V V     x       
+150    CALL    1       3       V       V R R           
 162    CLEAN   1       1       V       V               
 163    MDETERM 1       1       V       A               
 164    MINVERSE        1       1       A       A               
 169    COUNTA  0       30      V       R               
 183    PRODUCT 0       30      V       R               
 184    FACT    1       1       V       V               
+185    GET.CELL        1       2       V       V R             
+186    GET.WORKSPACE   1       1       V       V               
+187    GET.WINDOW      1       2       V       V V             
+188    GET.DOCUMENT    1       2       V       V V             
 189    DPRODUCT        3       3       V       R R R           
 190    ISNONTEXT       1       1       V       V               
 193    STDEVP  1       30      V       R               
 220    DAYS360 2       2       V       V V             x
 221    TODAY   0       0       V       -       x       
 222    VDB     5       7       V       V V V V V V V           
+225    END.IF  0       0                               
 227    MEDIAN  1       30      V       R ...           
 228    SUMPRODUCT      1       30      V       A ...           
 229    SINH    1       1       V       V               
 # New Built-In Sheet Functions in BIFF4
 14     FIXED   1       3       V       V V V           x
 204    USDOLLAR        1       2       V       V V             x
+238    LAST.ERROR      0       0                               
 215    DBCS    1       1       V       V               x
 216    RANK    2       3       V       V R V           
 247    DB      4       5       V       V V V V V               
 252    FREQUENCY       2       2       A       R R             
+257    EVALUATE        1       1       V       V               
 261    ERROR.TYPE      1       1       V       V               
+262    APP.TITLE       0       1       V       V               
+263    WINDOW.TITLE    0       1       V       V               
+264    SAVE.TOOLBAR    0       2       V       V V             
+265    ENABLE.TOOL     3       3       V       V V V           
+266    PRESS.TOOL      3       3       V       V V V           
+267    REGISTER.ID     2       3       V       V V V           
+268    GET.WORKBOOK    1       2       V       V V             
 269    AVEDEV  1       30      V       R ...           
 270    BETADIST        3       5       V       V V V V V               
 271    GAMMALN 1       1       V       V               
 350    ISPMT   4       4       V       V V V V         
 351    DATEDIF 3       3       V       V V V           
 352    DATESTRING      1       1       V       V               
-353    NUMBERSTRING    2       2       V       V V             \r
-354    ROMAN   1       2       V       V V             \r
-# New Built-In Sheet Functions in BIFF8\r
-358    GETPIVOTDATA    2       30      V       V R ...         \r
-359    HYPERLINK       1       2       V       V V             \r
-360    PHONETIC        1       1       V       R               \r
-361    AVERAGEA        1       30      V       R ...           \r
+353    NUMBERSTRING    2       2       V       V V             
+354    ROMAN   1       2       V       V V             
+# New Built-In Sheet Functions in BIFF8
+358    GETPIVOTDATA    2       30      V       V R ...         
+359    HYPERLINK       1       2       V       V V             
+360    PHONETIC        1       1       V       R               
+361    AVERAGEA        1       30      V       R ...           
 362    MAXA    1       30      V       R ...           
 363    MINA    1       30      V       R ...           
 364    STDEVPA 1       30      V       R ...           
diff --git a/src/resources/main/org/apache/poi/ss/formula/function/functionMetadataCetab.txt b/src/resources/main/org/apache/poi/ss/formula/function/functionMetadataCetab.txt
new file mode 100644 (file)
index 0000000..b0cf329
--- /dev/null
@@ -0,0 +1,417 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Created by (org.apache.poi.ss.formula.function.ExcelCetabFunctionExtractor)
+# from source file 'org/apache/poi/ss/formula/function/functionMetadataCetab-PDF.txt'
+#
+#Columns: (index, name, minParams, maxParams, returnClass, paramClasses, isVolatile, hasFootnote )
+
+# 
+0      BEEP    0       0                               
+1      OPEN    0       0                               
+2      OPEN.LINKS      0       0                               
+3      CLOSE.ALL       0       0                               
+4      SAVE    0       0                               
+5      SAVE.AS 0       0                               
+6      FILE.DELETE     0       0                               
+7      PAGE.SETUP      0       0                               
+8      PRINT   0       0                               
+9      PRINTER.SETUP   0       0                               
+10     QUIT    0       0                               
+11     NEW.WINDOW      0       0                               
+12     ARRANGE.ALL     0       0                               
+13     WINDOW.SIZE     0       0                               
+14     WINDOW.MOVE     0       0                               
+15     FULL    0       0                               
+16     CLOSE   0       0                               
+17     RUN     0       0                               
+22     SET.PRINT.AREA  0       0                               
+23     SET.PRINT.TITLES        0       0                               
+24     SET.PAGE.BREAK  0       0                               
+25     REMOVE.PAGE.BREAK       0       0                               
+26     FONT    0       0                               
+27     DISPLAY 0       0                               
+28     PROTECT.DOCUMENT        0       0                               
+29     PRECISION       0       0                               
+30     A1.R1C1 0       0                               
+31     CALCULATE.NOW   0       0                               
+32     CALCULATION     0       0                               
+34     DATA.FIND       0       0                               
+35     EXTRACT 0       0                               
+36     DATA.DELETE     0       0                               
+37     SET.DATABASE    0       0                               
+38     SET.CRITERIA    0       0                               
+39     SORT    0       0                               
+40     DATA.SERIES     0       0                               
+41     TABLE   0       0                               
+42     FORMAT.NUMBER   0       0                               
+43     ALIGNMENT       0       0                               
+44     STYLE   0       0                               
+45     BORDER  0       0                               
+46     CELL.PROTECTION 0       0                               
+47     COLUMN.WIDTH    0       0                               
+48     UNDO    0       0                               
+49     CUT     0       0                               
+50     COPY    0       0                               
+51     PASTE   0       0                               
+52     CLEAR   0       0                               
+53     PASTE.SPECIAL   0       0                               
+54     EDIT.DELETE     0       0                               
+55     INSERT  0       0                               
+56     FILL.RIGHT      0       0                               
+57     FILL.DOWN       0       0                               
+61     DEFINE.NAME     0       0                               
+62     CREATE.NAMES    0       0                               
+63     FORMULA.GOTO    0       0                               
+64     FORMULA.FIND    0       0                               
+65     SELECT.LAST.CELL        0       0                               
+66     SHOW.ACTIVE.CELL        0       0                               
+67     GALLERY.AREA    0       0                               
+68     GALLERY.BAR     0       0                               
+69     GALLERY.COLUMN  0       0                               
+70     GALLERY.LINE    0       0                               
+71     GALLERY.PIE     0       0                               
+72     GALLERY.SCATTER 0       0                               
+73     COMBINATION     0       0                               
+74     PREFERRED       0       0                               
+75     ADD.OVERLAY     0       0                               
+76     GRIDLINES       0       0                               
+77     SET.PREFERRED   0       0                               
+78     AXES    0       0                               
+79     LEGEND  0       0                               
+80     ATTACH.TEXT     0       0                               
+81     ADD.ARROW       0       0                               
+82     SELECT.CHART    0       0                               
+83     SELECT.PLOT.AREA        0       0                               
+84     PATTERNS        0       0                               
+85     MAIN.CHART      0       0                               
+86     OVERLAY 0       0                               
+87     SCALE   0       0                               
+88     FORMAT.LEGEND   0       0                               
+89     FORMAT.TEXT     0       0                               
+90     EDIT.REPEAT     0       0                               
+91     PARSE   0       0                               
+92     JUSTIFY 0       0                               
+93     HIDE    0       0                               
+94     UNHIDE  0       0                               
+95     WORKSPACE       0       0                               
+96     FORMULA 0       0                               
+97     FORMULA.FILL    0       0                               
+98     FORMULA.ARRAY   0       0                               
+99     DATA.FIND.NEXT  0       0                               
+100    DATA.FIND.PREV  0       0                               
+101    FORMULA.FIND.NEXT       0       0                               
+102    FORMULA.FIND.PREV       0       0                               
+103    ACTIVATE        0       0                               
+104    ACTIVATE.NEXT   0       0                               
+105    ACTIVATE.PREV   0       0                               
+106    UNLOCKED.NEXT   0       0                               
+107    UNLOCKED.PREV   0       0                               
+108    COPY.PICTURE    0       0                               
+109    SELECT  0       0                               
+110    DELETE.NAME     0       0                               
+111    DELETE.FORMAT   0       0                               
+112    VLINE   0       0                               
+113    HLINE   0       0                               
+114    VPAGE   0       0                               
+115    HPAGE   0       0                               
+116    VSCROLL 0       0                               
+117    HSCROLL 0       0                               
+118    ALERT   0       0                               
+119    NEW     0       0                               
+120    CANCEL.COPY     0       0                               
+121    SHOW.CLIPBOARD  0       0                               
+122    MESSAGE 0       0                               
+124    PASTE.LINK      0       0                               
+125    APP.ACTIVATE    0       0                               
+126    DELETE.ARROW    0       0                               
+127    ROW.HEIGHT      0       0                               
+128    FORMAT.MOVE     0       0                               
+129    FORMAT.SIZE     0       0                               
+130    FORMULA.REPLACE 0       0                               
+131    SEND.KEYS       0       0                               
+132    SELECT.SPECIAL  0       0                               
+133    APPLY.NAMES     0       0                               
+134    REPLACE.FONT    0       0                               
+135    FREEZE.PANES    0       0                               
+136    SHOW.INFO       0       0                               
+137    SPLIT   0       0                               
+138    ON.WINDOW       0       0                               
+139    ON.DATA 0       0                               
+140    DISABLE.INPUT   0       0                               
+142    OUTLINE 0       0                               
+143    LIST.NAMES      0       0                               
+144    FILE.CLOSE      0       0                               
+145    SAVE.WORKBOOK   0       0                               
+146    DATA.FORM       0       0                               
+147    COPY.CHART      0       0                               
+148    ON.TIME 0       0                               
+149    WAIT    0       0                               
+150    FORMAT.FONT     0       0                               
+151    FILL.UP 0       0                               
+152    FILL.LEFT       0       0                               
+153    DELETE.OVERLAY  0       0                               
+155    SHORT.MENUS     0       0                               
+159    SET.UPDATE.STATUS       0       0                               
+161    COLOR.PALETTE   0       0                               
+162    DELETE.STYLE    0       0                               
+163    WINDOW.RESTORE  0       0                               
+164    WINDOW.MAXIMIZE 0       0                               
+166    CHANGE.LINK     0       0                               
+167    CALCULATE.DOCUMENT      0       0                               
+168    ON.KEY  0       0                               
+169    APP.RESTORE     0       0                               
+170    APP.MOVE        0       0                               
+171    APP.SIZE        0       0                               
+172    APP.MINIMIZE    0       0                               
+173    APP.MAXIMIZE    0       0                               
+174    BRING.TO.FRONT  0       0                               
+175    SEND.TO.BACK    0       0                               
+185    MAIN.CHART.TYPE 0       0                               
+186    OVERLAY.CHART.TYPE      0       0                               
+187    SELECT.END      0       0                               
+188    OPEN.MAIL       0       0                               
+189    SEND.MAIL       0       0                               
+190    STANDARD.FONT   0       0                               
+191    CONSOLIDATE     0       0                               
+192    SORT.SPECIAL    0       0                               
+193    GALLERY.3D.AREA 0       0                               
+194    GALLERY.3D.COLUMN       0       0                               
+195    GALLERY.3D.LINE 0       0                               
+196    GALLERY.3D.PIE  0       0                               
+197    VIEW.3D 0       0                               
+198    GOAL.SEEK       0       0                               
+199    WORKGROUP       0       0                               
+200    FILL.GROUP      0       0                               
+201    UPDATE.LINK     0       0                               
+202    PROMOTE 0       0                               
+203    DEMOTE  0       0                               
+204    SHOW.DETAIL     0       0                               
+206    UNGROUP 0       0                               
+207    OBJECT.PROPERTIES       0       0                               
+208    SAVE.NEW.OBJECT 0       0                               
+209    SHARE   0       0                               
+210    SHARE.NAME      0       0                               
+211    DUPLICATE       0       0                               
+212    APPLY.STYLE     0       0                               
+213    ASSIGN.TO.OBJECT        0       0                               
+214    OBJECT.PROTECTION       0       0                               
+215    HIDE.OBJECT     0       0                               
+216    SET.EXTRACT     0       0                               
+217    CREATE.PUBLISHER        0       0                               
+218    SUBSCRIBE.TO    0       0                               
+219    ATTRIBUTES      0       0                               
+220    SHOW.TOOLBAR    0       0                               
+222    PRINT.PREVIEW   0       0                               
+223    EDIT.COLOR      0       0                               
+224    SHOW.LEVELS     0       0                               
+225    FORMAT.MAIN     0       0                               
+226    FORMAT.OVERLAY  0       0                               
+227    ON.RECALC       0       0                               
+228    EDIT.SERIES     0       0                               
+229    DEFINE.STYLE    0       0                               
+240    LINE.PRINT      0       0                               
+243    ENTER.DATA      0       0                               
+249    GALLERY.RADAR   0       0                               
+250    MERGE.STYLES    0       0                               
+251    EDITION.OPTIONS 0       0                               
+252    PASTE.PICTURE   0       0                               
+253    PASTE.PICTURE.LINK      0       0                               
+254    SPELLING        0       0                               
+256    ZOOM    0       0                               
+259    INSERT.OBJECT   0       0                               
+260    WINDOW.MINIMIZE 0       0                               
+265    SOUND.NOTE      0       0                               
+266    SOUND.PLAY      0       0                               
+267    FORMAT.SHAPE    0       0                               
+268    EXTEND.POLYGON  0       0                               
+269    FORMAT.AUTO     0       0                               
+272    GALLERY.3D.BAR  0       0                               
+273    GALLERY.3D.SURFACE      0       0                               
+274    FILL.AUTO       0       0                               
+276    CUSTOMIZE.TOOLBAR       0       0                               
+277    ADD.TOOL        0       0                               
+278    EDIT.OBJECT     0       0                               
+279    ON.DOUBLECLICK  0       0                               
+280    ON.ENTRY        0       0                               
+281    WORKBOOK.ADD    0       0                               
+282    WORKBOOK.MOVE   0       0                               
+283    WORKBOOK.COPY   0       0                               
+284    WORKBOOK.OPTIONS        0       0                               
+285    SAVE.WORKSPACE  0       0                               
+288    CHART.WIZARD    0       0                               
+289    DELETE.TOOL     0       0                               
+290    MOVE.TOOL       0       0                               
+291    WORKBOOK.SELECT 0       0                               
+292    WORKBOOK.ACTIVATE       0       0                               
+293    ASSIGN.TO.TOOL  0       0                               
+295    COPY.TOOL       0       0                               
+296    RESET.TOOL      0       0                               
+297    CONSTRAIN.NUMERIC       0       0                               
+298    PASTE.TOOL      0       0                               
+302    WORKBOOK.NEW    0       0                               
+305    SCENARIO.CELLS  0       0                               
+306    SCENARIO.DELETE 0       0                               
+307    SCENARIO.ADD    0       0                               
+308    SCENARIO.EDIT   0       0                               
+309    SCENARIO.SHOW   0       0                               
+310    SCENARIO.SHOW.NEXT      0       0                               
+311    SCENARIO.SUMMARY        0       0                               
+312    PIVOT.TABLE.WIZARD      0       0                               
+313    PIVOT.FIELD.PROPERTIES  0       0                               
+314    PIVOT.FIELD     0       0                               
+315    PIVOT.ITEM      0       0                               
+316    PIVOT.ADD.FIELDS        0       0                               
+318    OPTIONS.CALCULATION     0       0                               
+319    OPTIONS.EDIT    0       0                               
+320    OPTIONS.VIEW    0       0                               
+321    ADDIN.MANAGER   0       0                               
+322    MENU.EDITOR     0       0                               
+323    ATTACH.TOOLBARS 0       0                               
+324    VBAActivate     0       0                               
+325    OPTIONS.CHART   0       0                               
+328    VBA.INSERT.FILE 0       0                               
+330    VBA.PROCEDURE.DEFINITION        0       0                               
+336    ROUTING.SLIP    0       0                               
+338    ROUTE.DOCUMENT  0       0                               
+339    MAIL.LOGON      0       0                               
+342    INSERT.PICTURE  0       0                               
+343    EDIT.TOOL       0       0                               
+344    GALLERY.DOUGHNUT        0       0                               
+350    CHART.TREND     0       0                               
+352    PIVOT.ITEM.PROPERTIES   0       0                               
+354    WORKBOOK.INSERT 0       0                               
+355    OPTIONS.TRANSITION      0       0                               
+356    OPTIONS.GENERAL 0       0                               
+370    FILTER.ADVANCED 0       0                               
+373    MAIL.ADD.MAILER 0       0                               
+374    MAIL.DELETE.MAILER      0       0                               
+375    MAIL.REPLY      0       0                               
+376    MAIL.REPLY.ALL  0       0                               
+377    MAIL.FORWARD    0       0                               
+378    MAIL.NEXT.LETTER        0       0                               
+379    DATA.LABEL      0       0                               
+380    INSERT.TITLE    0       0                               
+381    FONT.PROPERTIES 0       0                               
+382    MACRO.OPTIONS   0       0                               
+383    WORKBOOK.HIDE   0       0                               
+384    WORKBOOK.UNHIDE 0       0                               
+385    WORKBOOK.DELETE 0       0                               
+386    WORKBOOK.NAME   0       0                               
+388    GALLERY.CUSTOM  0       0                               
+390    ADD.CHART.AUTOFORMAT    0       0                               
+391    DELETE.CHART.AUTOFORMAT 0       0                               
+392    CHART.ADD.DATA  0       0                               
+393    AUTO.OUTLINE    0       0                               
+394    TAB.ORDER       0       0                               
+395    SHOW.DIALOG     0       0                               
+396    SELECT.ALL      0       0                               
+397    UNGROUP.SHEETS  0       0                               
+398    SUBTOTAL.CREATE 0       0                               
+399    SUBTOTAL.REMOVE 0       0                               
+400    RENAME.OBJECT   0       0                               
+412    WORKBOOK.SCROLL 0       0                               
+413    WORKBOOK.NEXT   0       0                               
+414    WORKBOOK.PREV   0       0                               
+415    WORKBOOK.TAB.SPLIT      0       0                               
+416    FULL.SCREEN     0       0                               
+417    WORKBOOK.PROTECT        0       0                               
+420    SCROLLBAR.PROPERTIES    0       0                               
+421    PIVOT.SHOW.PAGES        0       0                               
+422    TEXT.TO.COLUMNS 0       0                               
+423    FORMAT.CHARTTYPE        0       0                               
+424    LINK.FORMAT     0       0                               
+425    TRACER.DISPLAY  0       0                               
+430    TRACER.NAVIGATE 0       0                               
+431    TRACER.CLEAR    0       0                               
+432    TRACER.ERROR    0       0                               
+433    PIVOT.FIELD.GROUP       0       0                               
+434    PIVOT.FIELD.UNGROUP     0       0                               
+435    CHECKBOX.PROPERTIES     0       0                               
+436    LABEL.PROPERTIES        0       0                               
+437    LISTBOX.PROPERTIES      0       0                               
+438    EDITBOX.PROPERTIES      0       0                               
+439    PIVOT.REFRESH   0       0                               
+440    LINK.COMBO      0       0                               
+441    OPEN.TEXT       0       0                               
+442    HIDE.DIALOG     0       0                               
+443    SET.DIALOG.FOCUS        0       0                               
+444    ENABLE.OBJECT   0       0                               
+445    PUSHBUTTON.PROPERTIES   0       0                               
+446    SET.DIALOG.DEFAULT      0       0                               
+447    FILTER  0       0                               
+448    FILTER.SHOW.ALL 0       0                               
+449    CLEAR.OUTLINE   0       0                               
+450    FUNCTION.WIZARD 0       0                               
+451    ADD.LIST.ITEM   0       0                               
+452    SET.LIST.ITEM   0       0                               
+453    REMOVE.LIST.ITEM        0       0                               
+454    SELECT.LIST.ITEM        0       0                               
+455    SET.CONTROL.VALUE       0       0                               
+456    SAVE.COPY.AS    0       0                               
+458    OPTIONS.LISTS.ADD       0       0                               
+459    OPTIONS.LISTS.DELETE    0       0                               
+460    SERIES.AXES     0       0                               
+461    SERIES.X        0       0                               
+462    SERIES.Y        0       0                               
+463    ERRORBAR.X      0       0                               
+464    ERRORBAR.Y      0       0                               
+465    FORMAT.CHART    0       0                               
+466    SERIES.ORDER    0       0                               
+467    MAIL.LOGOFF     0       0                               
+468    CLEAR.ROUTING.SLIP      0       0                               
+469    APP.ACTIVATE.MICROSOFT  0       0                               
+470    MAIL.EDIT.MAILER        0       0                               
+471    ON.SHEET        0       0                               
+472    STANDARD.WIDTH  0       0                               
+473    SCENARIO.MERGE  0       0                               
+474    SUMMARY.INFO    0       0                               
+475    FIND.FILE       0       0                               
+476    ACTIVE.CELL.FONT        0       0                               
+477    ENABLE.TIPWIZARD        0       0                               
+478    VBA.MAKE.ADDIN  0       0                               
+480    INSERTDATATABLE 0       0                               
+481    WORKGROUP.OPTIONS       0       0                               
+482    MAIL.SEND.MAILER        0       0                               
+485    AUTOCORRECT     0       0                               
+489    POST.DOCUMENT   0       0                               
+491    PICKLIST        0       0                               
+493    VIEW.SHOW       0       0                               
+494    VIEW.DEFINE     0       0                               
+495    VIEW.DELETE     0       0                               
+509    SHEET.BACKGROUND        0       0                               
+510    INSERT.MAP.OBJECT       0       0                               
+511    OPTIONS.MENONO  0       0                               
+517    MSOCHECKS       0       0                               
+518    NORMAL  0       0                               
+519    LAYOUT  0       0                               
+520    RM.PRINT.AREA   0       0                               
+521    CLEAR.PRINT.AREA        0       0                               
+522    ADD.PRINT.AREA  0       0                               
+523    MOVE.BRK        0       0                               
+545    HIDECURR.NOTE   0       0                               
+546    HIDEALL.NOTES   0       0                               
+547    DELETE.NOTE     0       0                               
+548    TRAVERSE.NOTES  0       0                               
+549    ACTIVATE.NOTES  0       0                               
+620    PROTECT.REVISIONS       0       0                               
+621    UNPROTECT.REVISIONS     0       0                               
+647    OPTIONS.ME      0       0                               
+653    WEB.PUBLISH     0       0                               
+667    NEWWEBQUERY     0       0                               
+673    PIVOT.TABLE.CHART       0       0                               
+753    OPTIONS.SAVE    0       0                               
+755    OPTIONS.SPELL   0       0                               
+808    HIDEALL.INKANNOTS       0       0                               
index fd9c49dc12a157233147b2e21bac0c7ff465fbc9..ce4f2b7be88060f27e4228cda0db6cf041124db4 100644 (file)
@@ -34,7 +34,6 @@ import org.apache.poi.hssf.usermodel.HSSFWorkbook;
 import org.apache.poi.poifs.filesystem.DirectoryNode;
 import org.apache.poi.poifs.filesystem.POIFSFileSystem;
 import org.apache.poi.util.LocaleUtil;
-import org.junit.After;
 import org.junit.Test;
 
 /**
@@ -130,50 +129,42 @@ public final class TestExcelExtractor {
     public void testEventExtractor() throws Exception {
                // First up, a simple file with string
                //  based formulas in it
-               EventBasedExcelExtractor extractor1 = null;
-               try {
-               extractor1 = new EventBasedExcelExtractor(
-                       new POIFSFileSystem(
-                               HSSFTestDataSamples.openSampleFileStream("SimpleWithFormula.xls")
-                       )
-               );
-               extractor1.setIncludeSheetNames(true);
-        
-               String text = extractor1.getText();
-               assertEquals("Sheet1\nreplaceme\nreplaceme\nreplacemereplaceme\nSheet2\nSheet3\n", text);
-    
-               extractor1.setIncludeSheetNames(false);
-               extractor1.setFormulasNotResults(true);
-    
-               text = extractor1.getText();
-               assertEquals("replaceme\nreplaceme\nCONCATENATE(A1,A2)\n", text);
-               } finally {
-                   if (extractor1 != null) extractor1.close();
+               try (EventBasedExcelExtractor extractor1 = new EventBasedExcelExtractor(
+                               new POIFSFileSystem(
+                                               HSSFTestDataSamples.openSampleFileStream("SimpleWithFormula.xls")
+                               )
+               )) {
+                       extractor1.setIncludeSheetNames(true);
+
+                       String text = extractor1.getText();
+                       assertEquals("Sheet1\nreplaceme\nreplaceme\nreplacemereplaceme\nSheet2\nSheet3\n", text);
+
+                       extractor1.setIncludeSheetNames(false);
+                       extractor1.setFormulasNotResults(true);
+
+                       text = extractor1.getText();
+                       assertEquals("replaceme\nreplaceme\nCONCATENATE(A1,A2)\n", text);
                }
 
                // Now, a slightly longer file with numeric formulas
-               EventBasedExcelExtractor extractor2 = null;
-               try {
-                   extractor2 = new EventBasedExcelExtractor(
-                       new POIFSFileSystem(
-                               HSSFTestDataSamples.openSampleFileStream("sumifformula.xls")
-                       )
-               );                  
-                   
-               extractor2.setIncludeSheetNames(false);
-               extractor2.setFormulasNotResults(true);
-    
-               String text = extractor2.getText();
-               assertEquals(
-                               "1000\t1\tSUMIF(A1:A5,\">4000\",B1:B5)\n" +
-                               "2000\t2\n" +
-                               "3000\t3\n" +
-                               "4000\t4\n" +
-                               "5000\t5\n",
-                               text
-               );
-               } finally {
-                   if (extractor2 != null) extractor2.close();
+               try (EventBasedExcelExtractor extractor2 = new EventBasedExcelExtractor(
+                               new POIFSFileSystem(
+                                               HSSFTestDataSamples.openSampleFileStream("sumifformula.xls")
+                               )
+               )) {
+
+                       extractor2.setIncludeSheetNames(false);
+                       extractor2.setFormulasNotResults(true);
+
+                       String text = extractor2.getText();
+                       assertEquals(
+                                       "1000\t1\tSUMIF(A1:A5,\">4000\",B1:B5)\n" +
+                                                       "2000\t2\n" +
+                                                       "3000\t3\n" +
+                                                       "4000\t4\n" +
+                                                       "5000\t5\n",
+                                       text
+                       );
                }
        }
 
@@ -372,4 +363,25 @@ public final class TestExcelExtractor {
                        assertContains(txt, "NONBUSINESS");
                }
        }
+
+       @Test
+       public void test60405a() throws IOException {
+               //bug 61045. File is govdocs1 626534
+               try (ExcelExtractor extractor = createExtractor("60405.xls")) {
+                       String txt = extractor.getText();
+                       assertContains(txt, "Macro1");
+                       assertContains(txt, "Macro2");
+               }
+       }
+
+       @Test
+       public void test60405b() throws IOException {
+               //bug 61045. File is govdocs1 626534
+               try (ExcelExtractor extractor = createExtractor("60405.xls")) {
+                       extractor.setFormulasNotResults(true);
+                       String txt = extractor.getText();
+                       assertContains(txt, "Macro1");
+                       assertContains(txt, "Macro2");
+               }
+       }
 }
diff --git a/src/testcases/org/apache/poi/ss/formula/function/ExcelCetabFunctionExtractor.java b/src/testcases/org/apache/poi/ss/formula/function/ExcelCetabFunctionExtractor.java
new file mode 100644 (file)
index 0000000..db87e07
--- /dev/null
@@ -0,0 +1,378 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.ss.formula.function;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.io.PrintStream;
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * This class is not used during normal POI run-time but is used at development time to generate
+ * the file 'functionMetadataCetab.txt'.  There are more than 300 built-in functions in Excel in
+ * the Cetab and the intention of this class is to make it easier to maintain the metadata, by extracting
+ * it from a reliable source.
+ */
+public final class ExcelCetabFunctionExtractor {
+
+       private static final String SOURCE_DOC_FILE_NAME = "functionMetadataCetab-PDF.txt";
+
+       /**
+        * For simplicity, the output file is strictly simple ASCII.
+        * This method detects any unexpected characters.
+        */
+       /* package */ static boolean isSimpleAscii(char c) {
+
+               if (c>=0x21 && c<=0x7E) {
+                       // everything from '!' to '~' (includes letters, digits, punctuation
+                       return true;
+               }
+               // some specific whitespace chars below 0x21:
+               switch(c) {
+                       case ' ':
+                       case '\t':
+                       case '\r':
+                       case '\n':
+                               return true;
+               }
+               return false;
+       }
+
+
+       private static final class FunctionData {
+               // special characters from the ooo document
+               private static final int CHAR_ELLIPSIS_8230 = 8230;
+               private static final int CHAR_NDASH_8211 = 8211;
+
+               private final int _index;
+               private final boolean _hasFootnote;
+               private final String _name;
+               private final int _minParams;
+               private final int _maxParams;
+               private final String _returnClass;
+               private final String _paramClasses;
+               private final boolean _isVolatile;
+
+               public FunctionData(int funcIx, boolean hasFootnote, String funcName, int minParams, int maxParams,
+                                       String returnClass, String paramClasses, boolean isVolatile) {
+                       _index = funcIx;
+                       _hasFootnote = hasFootnote;
+                       _name = funcName;
+                       _minParams = minParams;
+                       _maxParams = maxParams;
+                       _returnClass = convertSpecialChars(returnClass);
+                       _paramClasses = convertSpecialChars(paramClasses);
+                       _isVolatile = isVolatile;
+               }
+               private static String convertSpecialChars(String ss) {
+                       StringBuilder sb = new StringBuilder(ss.length() + 4);
+                       for(int i=0; i<ss.length(); i++) {
+                               char c = ss.charAt(i);
+                               if (isSimpleAscii(c)) {
+                                       sb.append(c);
+                                       continue;
+                               }
+                               switch (c) {
+                                       case CHAR_NDASH_8211:
+                                               sb.append('-');
+                                               continue;
+                                       case CHAR_ELLIPSIS_8230:
+                                               sb.append("...");
+                                               continue;
+                               }
+                               throw new RuntimeException("bad char (" + ((int)c) + ") in string '" + ss + "'");
+                       }
+                       return sb.toString();
+               }
+               public int getIndex() {
+                       return _index;
+               }
+               public String getName() {
+                       return _name;
+               }
+               public boolean hasFootnote() {
+                       return _hasFootnote;
+               }
+               public String formatAsDataLine() {
+                       return _index + "\t" + _name + "\t" + _minParams + "\t"
+                                       + _maxParams + "\t" + _returnClass + "\t" + _paramClasses
+                                       + "\t" + checkMark(_isVolatile) + "\t" + checkMark(_hasFootnote);
+               }
+               private static String checkMark(boolean b) {
+                       return b ? "x" : "";
+               }
+       }
+
+       private static final class FunctionDataCollector {
+
+               private final Map<Integer, FunctionData> _allFunctionsByIndex;
+               private final Map<String, FunctionData> _allFunctionsByName;
+               private final Set<Integer> _groupFunctionIndexes;
+               private final Set<String> _groupFunctionNames;
+               private final PrintStream _ps;
+
+               public FunctionDataCollector(PrintStream ps) {
+                       _ps = ps;
+                       _allFunctionsByIndex = new HashMap<>();
+                       _allFunctionsByName = new HashMap<>();
+                       _groupFunctionIndexes = new HashSet<>();
+                       _groupFunctionNames = new HashSet<>();
+               }
+
+               public void addFunction(int funcIx, boolean hasFootnote, String funcName, int minParams, int maxParams,
+                                                               String returnClass, String paramClasses, String volatileFlagStr) {
+                       boolean isVolatile = volatileFlagStr.length() > 0;
+
+                       Integer funcIxKey = Integer.valueOf(funcIx);
+                       if(!_groupFunctionIndexes.add(funcIxKey)) {
+                               throw new RuntimeException("Duplicate function index (" + funcIx + ")");
+                       }
+                       if(!_groupFunctionNames.add(funcName)) {
+                               throw new RuntimeException("Duplicate function name '" + funcName + "'");
+                       }
+
+                       checkRedefinedFunction(hasFootnote, funcName, funcIxKey);
+                       FunctionData fd = new FunctionData(funcIx, hasFootnote, funcName,
+                                       minParams, maxParams, returnClass, paramClasses, isVolatile);
+
+                       _allFunctionsByIndex.put(funcIxKey, fd);
+                       _allFunctionsByName.put(funcName, fd);
+               }
+
+               /**
+                * Some extra validation here.
+                * Any function which changes definition will have a footnote in the source document
+                */
+               private void checkRedefinedFunction(boolean hasNote, String funcName, Integer funcIxKey) {
+                       FunctionData fdPrev;
+                       // check by index
+                       fdPrev = _allFunctionsByIndex.get(funcIxKey);
+                       if(fdPrev != null) {
+                               if(!fdPrev.hasFootnote() || !hasNote) {
+                                       throw new RuntimeException("changing function ["
+                                                       + funcIxKey + "] definition without foot-note");
+                               }
+                               _allFunctionsByName.remove(fdPrev.getName());
+                       }
+                       // check by name
+                       fdPrev = _allFunctionsByName.get(funcName);
+                       if(fdPrev != null) {
+                               if(!fdPrev.hasFootnote() || !hasNote) {
+                                       throw new RuntimeException("changing function '"
+                                                       + funcName + "' definition without foot-note");
+                               }
+                               _allFunctionsByIndex.remove(Integer.valueOf(fdPrev.getIndex()));
+                       }
+               }
+
+               public void endTableGroup(String headingText) {
+                       Integer[] keys = new Integer[_groupFunctionIndexes.size()];
+                       _groupFunctionIndexes.toArray(keys);
+                       _groupFunctionIndexes.clear();
+                       _groupFunctionNames.clear();
+                       Arrays.sort(keys);
+
+                       _ps.println("# " + headingText);
+                       for (Integer key : keys) {
+                               FunctionData fd = _allFunctionsByIndex.get(key);
+                               _ps.println(fd.formatAsDataLine());
+                       }
+               }
+       }
+
+       private static final Pattern ID_MATCH = Pattern.compile("0x([\\dA-F]+)");
+       private static final Pattern NAME_MATCH = Pattern.compile("([0-9A-Z.]+)");
+       private static final Pattern ID_NAME_MATCH = Pattern.compile("0x([\\dA-F]+)\\s+([0-9A-Z.]+)");
+
+       private static final Set<String> IGNORED_LINES = new HashSet<>();
+       static {
+               IGNORED_LINES.add("[MS-XLS] — v20141018");
+               IGNORED_LINES.add("Excel Binary File Format (.xls) Structure");
+               IGNORED_LINES.add("Copyright © 2014 Microsoft Corporation.");
+               IGNORED_LINES.add("Release: October 30, 2014Value Meaning");
+               IGNORED_LINES.add("Release: October 30, 2014Value");
+               IGNORED_LINES.add("Meaning");
+       }
+
+       private static void extractFunctionData(FunctionDataCollector fdc, InputStream is) throws IOException {
+               try (BufferedReader reader = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8))) {
+
+                       String id = null;
+                       String name = null;
+                       while (true) {
+                               String line = reader.readLine();
+                               if(line == null) {
+                                       break;
+                               }
+
+                               if(IGNORED_LINES.contains(line) || line.matches("\\d+ / \\d+")) {
+                                       continue;
+                               }
+
+                               Matcher idMatcher = ID_MATCH.matcher(line);
+                               boolean foundID = idMatcher.matches();
+                               Matcher nameMatcher = NAME_MATCH.matcher(line);
+                               boolean foundName = nameMatcher.matches();
+                               Matcher idAndNameMatcher = ID_NAME_MATCH.matcher(line);
+                               boolean foundIDandName = idAndNameMatcher.matches();
+                               if(foundID && foundName ||
+                                               foundName && foundIDandName ||
+                                               foundID && foundIDandName) {
+                                       throw new IllegalStateException("Should not find id and name: " +
+                                                       foundID + "/" + foundName + "/" + foundIDandName +
+                                                       ", line: " + line);
+                               }
+
+                               if(foundID && id != null) {
+                                       throw new IllegalStateException("Found ID, but already had one: " + id + ", line: " + line);
+                               }
+                               if(foundName && name != null) {
+                                       throw new IllegalStateException("Found name, but already had one: " + name + ", line: " + line);
+                               }
+                               if(foundIDandName && (name != null || id != null)) {
+                                       throw new IllegalStateException("Found name and id, but already had one: id: " + id + ", name: " + name + ", line: " + line);
+                               }
+
+                               if(foundID) {
+                                       id = idMatcher.group(1);
+                               } else if (foundName) {
+                                       name = nameMatcher.group(1);
+                               } else if (foundIDandName) {
+                                       id = idAndNameMatcher.group(1);
+                                       name = idAndNameMatcher.group(2);
+                                       // manual override for one function name which contains lowercase characters
+                               } else if(line.equals("VBAActivate")) {
+                                       name = line;
+                               } else if (id == null || name == null) {
+                                       throw new IllegalStateException("Found params, but had empty id or name, id: " + id +
+                                                       ", name: " + name + ", line: " + line);
+                               } else {
+                                       System.out.println("Found function " + id + " " + name + " " + line);
+                                       fdc.addFunction(Integer.parseInt(id, 16), false, name, 0, 0,
+                                                       "", "", "");
+
+                                       id = null;
+                                       name = null;
+                               }
+                       }
+               }
+
+               fdc.endTableGroup("");
+       }
+       /**
+        * To be sure that no tricky unicode chars make it through to the output file.
+        */
+       private static final class SimpleAsciiOutputStream extends OutputStream {
+
+               private final OutputStream _os;
+
+               public SimpleAsciiOutputStream(OutputStream os) {
+                       _os = os;
+               }
+               
+               @Override
+        public void write(int b) throws IOException {
+                       checkByte(b);
+                       _os.write(b);
+               }
+
+               private static void checkByte(int b) {
+                       if (!isSimpleAscii((char)b)) {
+                               throw new RuntimeException("Encountered char (" + b + ") which was not simple ascii as expected");
+                       }
+               }
+
+               @Override
+        public void write(byte[] b, int off, int len) throws IOException {
+                       for (int i = 0; i < len; i++) {
+                               checkByte(b[i + off]);
+
+                       }
+                       _os.write(b, off, len);
+               }
+       }
+
+       private static void processFile(InputStream input, File outFile) throws IOException {
+               try (OutputStream os = new SimpleAsciiOutputStream(new FileOutputStream(outFile));
+               PrintStream ps = new PrintStream(os, true, "UTF-8")) {
+
+                       outputLicenseHeader(ps);
+                       Class<?> genClass = ExcelCetabFunctionExtractor.class;
+                       ps.println("# Created by (" + genClass.getName() + ")");
+                       // identify the source file
+                       ps.println("# from source file '" + SOURCE_DOC_FILE_NAME + "'");
+                       ps.println("#");
+                       ps.println("#Columns: (index, name, minParams, maxParams, returnClass, paramClasses, isVolatile, hasFootnote )");
+                       ps.println();
+                       extractFunctionData(new FunctionDataCollector(ps), input);
+                       ps.close();
+
+                       String canonicalOutputFileName = outFile.getCanonicalPath();
+                       System.out.println("Successfully output to '" + canonicalOutputFileName + "'");
+               }
+       }
+
+       private static void outputLicenseHeader(PrintStream ps) {
+               String[] lines= {
+                       "Licensed to the Apache Software Foundation (ASF) under one or more",
+                       "contributor license agreements.  See the NOTICE file distributed with",
+                       "this work for additional information regarding copyright ownership.",
+                       "The ASF licenses this file to You under the Apache License, Version 2.0",
+                       "(the \"License\"); you may not use this file except in compliance with",
+                       "the License.  You may obtain a copy of the License at",
+                       "",
+                       "    http://www.apache.org/licenses/LICENSE-2.0",
+                       "",
+                       "Unless required by applicable law or agreed to in writing, software",
+                       "distributed under the License is distributed on an \"AS IS\" BASIS,",
+                       "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.",
+                       "See the License for the specific language governing permissions and",
+                       "limitations under the License.",
+               };
+               for (String line : lines) {
+                       ps.print("# ");
+                       ps.println(line);
+               }
+               ps.println();
+       }
+
+       public static void main(String[] args) throws IOException {
+               if(!new File(SOURCE_DOC_FILE_NAME).exists()) {
+                       throw new IllegalStateException("Did not find file " + SOURCE_DOC_FILE_NAME + " in the resources");
+               }
+
+               try (InputStream stream = new FileInputStream(SOURCE_DOC_FILE_NAME)) {
+                       File outFile = new File("functionMetadataCetab.txt");
+
+                       processFile(stream, outFile);
+               }
+       }
+}
diff --git a/test-data/spreadsheet/60405.xls b/test-data/spreadsheet/60405.xls
new file mode 100644 (file)
index 0000000..a637104
Binary files /dev/null and b/test-data/spreadsheet/60405.xls differ