aboutsummaryrefslogtreecommitdiffstats
path: root/src/testcases
diff options
context:
space:
mode:
authorDominik Stadler <centic@apache.org>2019-01-27 09:57:39 +0000
committerDominik Stadler <centic@apache.org>2019-01-27 09:57:39 +0000
commitc376d662f92577aa6226e454bb3eb5f113ec817d (patch)
tree3903acfe8eb5f863bb4fc1dac1218a7e2b177989 /src/testcases
parente59f9c6461f4e6ee34129970b94c51c4098bd710 (diff)
downloadpoi-c376d662f92577aa6226e454bb3eb5f113ec817d.tar.gz
poi-c376d662f92577aa6226e454bb3eb5f113ec817d.zip
Bug 60405: Add initial support for cetab functions so some macros can be
parsed Add some function-definitions for Excel 4 Macros and missing functions found in regression tests git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1852277 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'src/testcases')
-rw-r--r--src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java96
-rw-r--r--src/testcases/org/apache/poi/ss/formula/function/ExcelCetabFunctionExtractor.java378
2 files changed, 432 insertions, 42 deletions
diff --git a/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java b/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java
index fd9c49dc12..ce4f2b7be8 100644
--- a/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java
+++ b/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java
@@ -34,7 +34,6 @@ import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.util.LocaleUtil;
-import org.junit.After;
import org.junit.Test;
/**
@@ -130,50 +129,42 @@ public final class TestExcelExtractor {
public void testEventExtractor() throws Exception {
// First up, a simple file with string
// based formulas in it
- EventBasedExcelExtractor extractor1 = null;
- try {
- extractor1 = new EventBasedExcelExtractor(
- new POIFSFileSystem(
- HSSFTestDataSamples.openSampleFileStream("SimpleWithFormula.xls")
- )
- );
- extractor1.setIncludeSheetNames(true);
-
- String text = extractor1.getText();
- assertEquals("Sheet1\nreplaceme\nreplaceme\nreplacemereplaceme\nSheet2\nSheet3\n", text);
-
- extractor1.setIncludeSheetNames(false);
- extractor1.setFormulasNotResults(true);
-
- text = extractor1.getText();
- assertEquals("replaceme\nreplaceme\nCONCATENATE(A1,A2)\n", text);
- } finally {
- if (extractor1 != null) extractor1.close();
+ try (EventBasedExcelExtractor extractor1 = new EventBasedExcelExtractor(
+ new POIFSFileSystem(
+ HSSFTestDataSamples.openSampleFileStream("SimpleWithFormula.xls")
+ )
+ )) {
+ extractor1.setIncludeSheetNames(true);
+
+ String text = extractor1.getText();
+ assertEquals("Sheet1\nreplaceme\nreplaceme\nreplacemereplaceme\nSheet2\nSheet3\n", text);
+
+ extractor1.setIncludeSheetNames(false);
+ extractor1.setFormulasNotResults(true);
+
+ text = extractor1.getText();
+ assertEquals("replaceme\nreplaceme\nCONCATENATE(A1,A2)\n", text);
}
// Now, a slightly longer file with numeric formulas
- EventBasedExcelExtractor extractor2 = null;
- try {
- extractor2 = new EventBasedExcelExtractor(
- new POIFSFileSystem(
- HSSFTestDataSamples.openSampleFileStream("sumifformula.xls")
- )
- );
-
- extractor2.setIncludeSheetNames(false);
- extractor2.setFormulasNotResults(true);
-
- String text = extractor2.getText();
- assertEquals(
- "1000\t1\tSUMIF(A1:A5,\">4000\",B1:B5)\n" +
- "2000\t2\n" +
- "3000\t3\n" +
- "4000\t4\n" +
- "5000\t5\n",
- text
- );
- } finally {
- if (extractor2 != null) extractor2.close();
+ try (EventBasedExcelExtractor extractor2 = new EventBasedExcelExtractor(
+ new POIFSFileSystem(
+ HSSFTestDataSamples.openSampleFileStream("sumifformula.xls")
+ )
+ )) {
+
+ extractor2.setIncludeSheetNames(false);
+ extractor2.setFormulasNotResults(true);
+
+ String text = extractor2.getText();
+ assertEquals(
+ "1000\t1\tSUMIF(A1:A5,\">4000\",B1:B5)\n" +
+ "2000\t2\n" +
+ "3000\t3\n" +
+ "4000\t4\n" +
+ "5000\t5\n",
+ text
+ );
}
}
@@ -372,4 +363,25 @@ public final class TestExcelExtractor {
assertContains(txt, "NONBUSINESS");
}
}
+
+ @Test
+ public void test60405a() throws IOException {
+ //bug 61045. File is govdocs1 626534
+ try (ExcelExtractor extractor = createExtractor("60405.xls")) {
+ String txt = extractor.getText();
+ assertContains(txt, "Macro1");
+ assertContains(txt, "Macro2");
+ }
+ }
+
+ @Test
+ public void test60405b() throws IOException {
+ //bug 61045. File is govdocs1 626534
+ try (ExcelExtractor extractor = createExtractor("60405.xls")) {
+ extractor.setFormulasNotResults(true);
+ String txt = extractor.getText();
+ assertContains(txt, "Macro1");
+ assertContains(txt, "Macro2");
+ }
+ }
}
diff --git a/src/testcases/org/apache/poi/ss/formula/function/ExcelCetabFunctionExtractor.java b/src/testcases/org/apache/poi/ss/formula/function/ExcelCetabFunctionExtractor.java
new file mode 100644
index 0000000000..db87e07377
--- /dev/null
+++ b/src/testcases/org/apache/poi/ss/formula/function/ExcelCetabFunctionExtractor.java
@@ -0,0 +1,378 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.ss.formula.function;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.io.PrintStream;
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * This class is not used during normal POI run-time but is used at development time to generate
+ * the file 'functionMetadataCetab.txt'. There are more than 300 built-in functions in Excel in
+ * the Cetab and the intention of this class is to make it easier to maintain the metadata, by extracting
+ * it from a reliable source.
+ */
+public final class ExcelCetabFunctionExtractor {
+
+ private static final String SOURCE_DOC_FILE_NAME = "functionMetadataCetab-PDF.txt";
+
+ /**
+ * For simplicity, the output file is strictly simple ASCII.
+ * This method detects any unexpected characters.
+ */
+ /* package */ static boolean isSimpleAscii(char c) {
+
+ if (c>=0x21 && c<=0x7E) {
+ // everything from '!' to '~' (includes letters, digits, punctuation
+ return true;
+ }
+ // some specific whitespace chars below 0x21:
+ switch(c) {
+ case ' ':
+ case '\t':
+ case '\r':
+ case '\n':
+ return true;
+ }
+ return false;
+ }
+
+
+ private static final class FunctionData {
+ // special characters from the ooo document
+ private static final int CHAR_ELLIPSIS_8230 = 8230;
+ private static final int CHAR_NDASH_8211 = 8211;
+
+ private final int _index;
+ private final boolean _hasFootnote;
+ private final String _name;
+ private final int _minParams;
+ private final int _maxParams;
+ private final String _returnClass;
+ private final String _paramClasses;
+ private final boolean _isVolatile;
+
+ public FunctionData(int funcIx, boolean hasFootnote, String funcName, int minParams, int maxParams,
+ String returnClass, String paramClasses, boolean isVolatile) {
+ _index = funcIx;
+ _hasFootnote = hasFootnote;
+ _name = funcName;
+ _minParams = minParams;
+ _maxParams = maxParams;
+ _returnClass = convertSpecialChars(returnClass);
+ _paramClasses = convertSpecialChars(paramClasses);
+ _isVolatile = isVolatile;
+ }
+ private static String convertSpecialChars(String ss) {
+ StringBuilder sb = new StringBuilder(ss.length() + 4);
+ for(int i=0; i<ss.length(); i++) {
+ char c = ss.charAt(i);
+ if (isSimpleAscii(c)) {
+ sb.append(c);
+ continue;
+ }
+ switch (c) {
+ case CHAR_NDASH_8211:
+ sb.append('-');
+ continue;
+ case CHAR_ELLIPSIS_8230:
+ sb.append("...");
+ continue;
+ }
+ throw new RuntimeException("bad char (" + ((int)c) + ") in string '" + ss + "'");
+ }
+ return sb.toString();
+ }
+ public int getIndex() {
+ return _index;
+ }
+ public String getName() {
+ return _name;
+ }
+ public boolean hasFootnote() {
+ return _hasFootnote;
+ }
+ public String formatAsDataLine() {
+ return _index + "\t" + _name + "\t" + _minParams + "\t"
+ + _maxParams + "\t" + _returnClass + "\t" + _paramClasses
+ + "\t" + checkMark(_isVolatile) + "\t" + checkMark(_hasFootnote);
+ }
+ private static String checkMark(boolean b) {
+ return b ? "x" : "";
+ }
+ }
+
+ private static final class FunctionDataCollector {
+
+ private final Map<Integer, FunctionData> _allFunctionsByIndex;
+ private final Map<String, FunctionData> _allFunctionsByName;
+ private final Set<Integer> _groupFunctionIndexes;
+ private final Set<String> _groupFunctionNames;
+ private final PrintStream _ps;
+
+ public FunctionDataCollector(PrintStream ps) {
+ _ps = ps;
+ _allFunctionsByIndex = new HashMap<>();
+ _allFunctionsByName = new HashMap<>();
+ _groupFunctionIndexes = new HashSet<>();
+ _groupFunctionNames = new HashSet<>();
+ }
+
+ public void addFunction(int funcIx, boolean hasFootnote, String funcName, int minParams, int maxParams,
+ String returnClass, String paramClasses, String volatileFlagStr) {
+ boolean isVolatile = volatileFlagStr.length() > 0;
+
+ Integer funcIxKey = Integer.valueOf(funcIx);
+ if(!_groupFunctionIndexes.add(funcIxKey)) {
+ throw new RuntimeException("Duplicate function index (" + funcIx + ")");
+ }
+ if(!_groupFunctionNames.add(funcName)) {
+ throw new RuntimeException("Duplicate function name '" + funcName + "'");
+ }
+
+ checkRedefinedFunction(hasFootnote, funcName, funcIxKey);
+ FunctionData fd = new FunctionData(funcIx, hasFootnote, funcName,
+ minParams, maxParams, returnClass, paramClasses, isVolatile);
+
+ _allFunctionsByIndex.put(funcIxKey, fd);
+ _allFunctionsByName.put(funcName, fd);
+ }
+
+ /**
+ * Some extra validation here.
+ * Any function which changes definition will have a footnote in the source document
+ */
+ private void checkRedefinedFunction(boolean hasNote, String funcName, Integer funcIxKey) {
+ FunctionData fdPrev;
+ // check by index
+ fdPrev = _allFunctionsByIndex.get(funcIxKey);
+ if(fdPrev != null) {
+ if(!fdPrev.hasFootnote() || !hasNote) {
+ throw new RuntimeException("changing function ["
+ + funcIxKey + "] definition without foot-note");
+ }
+ _allFunctionsByName.remove(fdPrev.getName());
+ }
+ // check by name
+ fdPrev = _allFunctionsByName.get(funcName);
+ if(fdPrev != null) {
+ if(!fdPrev.hasFootnote() || !hasNote) {
+ throw new RuntimeException("changing function '"
+ + funcName + "' definition without foot-note");
+ }
+ _allFunctionsByIndex.remove(Integer.valueOf(fdPrev.getIndex()));
+ }
+ }
+
+ public void endTableGroup(String headingText) {
+ Integer[] keys = new Integer[_groupFunctionIndexes.size()];
+ _groupFunctionIndexes.toArray(keys);
+ _groupFunctionIndexes.clear();
+ _groupFunctionNames.clear();
+ Arrays.sort(keys);
+
+ _ps.println("# " + headingText);
+ for (Integer key : keys) {
+ FunctionData fd = _allFunctionsByIndex.get(key);
+ _ps.println(fd.formatAsDataLine());
+ }
+ }
+ }
+
+ private static final Pattern ID_MATCH = Pattern.compile("0x([\\dA-F]+)");
+ private static final Pattern NAME_MATCH = Pattern.compile("([0-9A-Z.]+)");
+ private static final Pattern ID_NAME_MATCH = Pattern.compile("0x([\\dA-F]+)\\s+([0-9A-Z.]+)");
+
+ private static final Set<String> IGNORED_LINES = new HashSet<>();
+ static {
+ IGNORED_LINES.add("[MS-XLS] — v20141018");
+ IGNORED_LINES.add("Excel Binary File Format (.xls) Structure");
+ IGNORED_LINES.add("Copyright © 2014 Microsoft Corporation.");
+ IGNORED_LINES.add("Release: October 30, 2014Value Meaning");
+ IGNORED_LINES.add("Release: October 30, 2014Value");
+ IGNORED_LINES.add("Meaning");
+ }
+
+ private static void extractFunctionData(FunctionDataCollector fdc, InputStream is) throws IOException {
+ try (BufferedReader reader = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8))) {
+
+ String id = null;
+ String name = null;
+ while (true) {
+ String line = reader.readLine();
+ if(line == null) {
+ break;
+ }
+
+ if(IGNORED_LINES.contains(line) || line.matches("\\d+ / \\d+")) {
+ continue;
+ }
+
+ Matcher idMatcher = ID_MATCH.matcher(line);
+ boolean foundID = idMatcher.matches();
+ Matcher nameMatcher = NAME_MATCH.matcher(line);
+ boolean foundName = nameMatcher.matches();
+ Matcher idAndNameMatcher = ID_NAME_MATCH.matcher(line);
+ boolean foundIDandName = idAndNameMatcher.matches();
+ if(foundID && foundName ||
+ foundName && foundIDandName ||
+ foundID && foundIDandName) {
+ throw new IllegalStateException("Should not find id and name: " +
+ foundID + "/" + foundName + "/" + foundIDandName +
+ ", line: " + line);
+ }
+
+ if(foundID && id != null) {
+ throw new IllegalStateException("Found ID, but already had one: " + id + ", line: " + line);
+ }
+ if(foundName && name != null) {
+ throw new IllegalStateException("Found name, but already had one: " + name + ", line: " + line);
+ }
+ if(foundIDandName && (name != null || id != null)) {
+ throw new IllegalStateException("Found name and id, but already had one: id: " + id + ", name: " + name + ", line: " + line);
+ }
+
+ if(foundID) {
+ id = idMatcher.group(1);
+ } else if (foundName) {
+ name = nameMatcher.group(1);
+ } else if (foundIDandName) {
+ id = idAndNameMatcher.group(1);
+ name = idAndNameMatcher.group(2);
+ // manual override for one function name which contains lowercase characters
+ } else if(line.equals("VBAActivate")) {
+ name = line;
+ } else if (id == null || name == null) {
+ throw new IllegalStateException("Found params, but had empty id or name, id: " + id +
+ ", name: " + name + ", line: " + line);
+ } else {
+ System.out.println("Found function " + id + " " + name + " " + line);
+ fdc.addFunction(Integer.parseInt(id, 16), false, name, 0, 0,
+ "", "", "");
+
+ id = null;
+ name = null;
+ }
+ }
+ }
+
+ fdc.endTableGroup("");
+ }
+ /**
+ * To be sure that no tricky unicode chars make it through to the output file.
+ */
+ private static final class SimpleAsciiOutputStream extends OutputStream {
+
+ private final OutputStream _os;
+
+ public SimpleAsciiOutputStream(OutputStream os) {
+ _os = os;
+ }
+
+ @Override
+ public void write(int b) throws IOException {
+ checkByte(b);
+ _os.write(b);
+ }
+
+ private static void checkByte(int b) {
+ if (!isSimpleAscii((char)b)) {
+ throw new RuntimeException("Encountered char (" + b + ") which was not simple ascii as expected");
+ }
+ }
+
+ @Override
+ public void write(byte[] b, int off, int len) throws IOException {
+ for (int i = 0; i < len; i++) {
+ checkByte(b[i + off]);
+
+ }
+ _os.write(b, off, len);
+ }
+ }
+
+ private static void processFile(InputStream input, File outFile) throws IOException {
+ try (OutputStream os = new SimpleAsciiOutputStream(new FileOutputStream(outFile));
+ PrintStream ps = new PrintStream(os, true, "UTF-8")) {
+
+ outputLicenseHeader(ps);
+ Class<?> genClass = ExcelCetabFunctionExtractor.class;
+ ps.println("# Created by (" + genClass.getName() + ")");
+ // identify the source file
+ ps.println("# from source file '" + SOURCE_DOC_FILE_NAME + "'");
+ ps.println("#");
+ ps.println("#Columns: (index, name, minParams, maxParams, returnClass, paramClasses, isVolatile, hasFootnote )");
+ ps.println();
+ extractFunctionData(new FunctionDataCollector(ps), input);
+ ps.close();
+
+ String canonicalOutputFileName = outFile.getCanonicalPath();
+ System.out.println("Successfully output to '" + canonicalOutputFileName + "'");
+ }
+ }
+
+ private static void outputLicenseHeader(PrintStream ps) {
+ String[] lines= {
+ "Licensed to the Apache Software Foundation (ASF) under one or more",
+ "contributor license agreements. See the NOTICE file distributed with",
+ "this work for additional information regarding copyright ownership.",
+ "The ASF licenses this file to You under the Apache License, Version 2.0",
+ "(the \"License\"); you may not use this file except in compliance with",
+ "the License. You may obtain a copy of the License at",
+ "",
+ " http://www.apache.org/licenses/LICENSE-2.0",
+ "",
+ "Unless required by applicable law or agreed to in writing, software",
+ "distributed under the License is distributed on an \"AS IS\" BASIS,",
+ "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.",
+ "See the License for the specific language governing permissions and",
+ "limitations under the License.",
+ };
+ for (String line : lines) {
+ ps.print("# ");
+ ps.println(line);
+ }
+ ps.println();
+ }
+
+ public static void main(String[] args) throws IOException {
+ if(!new File(SOURCE_DOC_FILE_NAME).exists()) {
+ throw new IllegalStateException("Did not find file " + SOURCE_DOC_FILE_NAME + " in the resources");
+ }
+
+ try (InputStream stream = new FileInputStream(SOURCE_DOC_FILE_NAME)) {
+ File outFile = new File("functionMetadataCetab.txt");
+
+ processFile(stream, outFile);
+ }
+ }
+}