From 9d0ca2d5734d9a3d87e7766409ff35988b996329 Mon Sep 17 00:00:00 2001 From: Josh Micich Date: Tue, 6 May 2008 19:48:55 +0000 Subject: [PATCH] Another attempt to fix unicode problems in functionMetadata.txt. Made that file pure ascii. git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@653892 13f79535-47bb-0310-9956-ffa450edef68 --- .../function/FunctionMetadataReader.java | 18 ++-- .../function/functionMetadata-asGenerated.txt | 46 +++++----- .../formula/function/functionMetadata.txt | 46 +++++----- .../ExcelFileFormatDocFunctionExtractor.java | 85 +++++++++++++++++-- 4 files changed, 136 insertions(+), 59 deletions(-) diff --git a/src/java/org/apache/poi/hssf/record/formula/function/FunctionMetadataReader.java b/src/java/org/apache/poi/hssf/record/formula/function/FunctionMetadataReader.java index 9ea428c8df..5ce7327199 100644 --- a/src/java/org/apache/poi/hssf/record/formula/function/FunctionMetadataReader.java +++ b/src/java/org/apache/poi/hssf/record/formula/function/FunctionMetadataReader.java @@ -37,15 +37,14 @@ import org.apache.poi.hssf.record.formula.Ptg; final class FunctionMetadataReader { private static final String METADATA_FILE_NAME = "functionMetadata.txt"; + + /** plain ASCII text metadata file uses three dots for ellipsis */ + private static final String ELLIPSIS = "..."; private static final Pattern TAB_DELIM_PATTERN = Pattern.compile("\t"); private static final Pattern SPACE_DELIM_PATTERN = Pattern.compile(" "); private static final byte[] EMPTY_BYTE_ARRAY = { }; - // special characters from the ooo document - private static final int CHAR_ELLIPSIS_8230 = 8230; - private static final int CHAR_NDASH_8211 = 8211; - private static final String[] DIGIT_ENDING_FUNCTION_NAMES = { // Digits at the end of a function might be due to a left-over footnote marker. // except in these cases @@ -59,10 +58,12 @@ final class FunctionMetadataReader { throw new RuntimeException("resource '" + METADATA_FILE_NAME + "' not found"); } - BufferedReader br = null; + BufferedReader br; try { br = new BufferedReader(new InputStreamReader(is,"UTF-8")); - } catch(UnsupportedEncodingException e) { /* never happens */ } + } catch(UnsupportedEncodingException e) { + throw new RuntimeException(e); + } FunctionDataBuilder fdb = new FunctionDataBuilder(400); try { @@ -127,7 +128,9 @@ final class FunctionMetadataReader { } String[] array = SPACE_DELIM_PATTERN.split(codes); int nItems = array.length; - if(array[nItems-1].charAt(0) == CHAR_ELLIPSIS_8230) { + if(ELLIPSIS.equals(array[nItems-1])) { + // final ellipsis is optional, and ignored + // (all unspecified params are assumed to be the same as the last) nItems --; } byte[] result = new byte[nItems]; @@ -141,7 +144,6 @@ final class FunctionMetadataReader { if(codes.length() == 1) { switch (codes.charAt(0)) { case '-': - case CHAR_NDASH_8211: // this is what the ooo doc has return true; } } diff --git a/src/resources/main/org/apache/poi/hssf/record/formula/function/functionMetadata-asGenerated.txt b/src/resources/main/org/apache/poi/hssf/record/formula/function/functionMetadata-asGenerated.txt index 8ec4bc62d3..475131e1c0 100644 --- a/src/resources/main/org/apache/poi/hssf/record/formula/function/functionMetadata-asGenerated.txt +++ b/src/resources/main/org/apache/poi/hssf/record/formula/function/functionMetadata-asGenerated.txt @@ -29,7 +29,7 @@ 7 MAX 1 30 V R 8 ROW 0 1 V R 9 COLUMN 0 1 V R -10 NA 0 0 V – +10 NA 0 0 V - 11 NPV 2 30 V V R 12 STDEV 1 30 V R 13 DOLLAR 1 2 V V V @@ -38,7 +38,7 @@ 16 COS 1 1 V V 17 TAN 1 1 V V 18 ARCTAN 1 1 V V -19 PI 0 0 V – +19 PI 0 0 V - 20 SQRT 1 1 V V 21 EXP 1 1 V V 22 LN 1 1 V V @@ -53,8 +53,8 @@ 31 MID 3 3 V V V V 32 LEN 1 1 V V 33 VALUE 1 1 V V -34 TRUE 0 0 V – -35 FALSE 0 0 V – +34 TRUE 0 0 V - +35 FALSE 0 0 V - 36 AND 1 30 V R 37 OR 1 30 V R 38 NOT 1 1 V V @@ -80,7 +80,7 @@ 60 RATE 3 6 V V V V V V V 61 MIRR 3 3 V R V V 62 IRR 1 2 V R V -63 RAND 0 0 V – x +63 RAND 0 0 V - x 64 MATCH 2 3 V V R R 65 DATE 3 3 V V V V 66 TIME 3 3 V V V V @@ -91,7 +91,7 @@ 71 HOUR 1 1 V V 72 MINUTE 1 1 V V 73 SECOND 1 1 V V -74 NOW 0 0 V – x +74 NOW 0 0 V - x 75 AREAS 1 1 V R 76 ROWS 1 1 V R 77 COLUMNS 1 1 V R @@ -170,10 +170,10 @@ 215 JIS 1 1 V V x 219 ADDRESS 2 5 V V V V V V 220 DAYS360 2 2 V V V x -221 TODAY 0 0 V – x +221 TODAY 0 0 V - x 222 VDB 5 7 V V V V V V V V -227 MEDIAN 1 30 V R … -228 SUMPRODUCT 1 30 V A … +227 MEDIAN 1 30 V R ... +228 SUMPRODUCT 1 30 V A ... 229 SINH 1 1 V V 230 COSH 1 1 V V 231 TANH 1 1 V V @@ -188,7 +188,7 @@ 247 DB 4 5 V V V V V V 252 FREQUENCY 2 2 A R R 261 ERROR.TYPE 1 1 V V -269 AVEDEV 1 30 V R … +269 AVEDEV 1 30 V R ... 270 BETADIST 3 5 V V V V V V 271 GAMMALN 1 1 V V 272 BETAINV 3 5 V V V V V V @@ -237,12 +237,12 @@ 315 SLOPE 2 2 V A A 316 TTEST 4 4 V A A V V 317 PROB 3 4 V A A V V -318 DEVSQ 1 30 V R … -319 GEOMEAN 1 30 V R … -320 HARMEAN 1 30 V R … -321 SUMSQ 0 30 V R … -322 KURT 1 30 V R … -323 SKEW 1 30 V R … +318 DEVSQ 1 30 V R ... +319 GEOMEAN 1 30 V R ... +320 HARMEAN 1 30 V R ... +321 SUMSQ 0 30 V R ... +322 KURT 1 30 V R ... +323 SKEW 1 30 V R ... 324 ZTEST 2 3 V R V V 325 LARGE 2 2 V R V 326 SMALL 2 2 V R V @@ -274,10 +274,10 @@ 358 GETPIVOTDATA 2 30 359 HYPERLINK 1 2 V V V 360 PHONETIC 1 1 V R -361 AVERAGEA 1 30 V R … -362 MAXA 1 30 V R … -363 MINA 1 30 V R … -364 STDEVPA 1 30 V R … -365 VARPA 1 30 V R … -366 STDEVA 1 30 V R … -367 VARA 1 30 V R … +361 AVERAGEA 1 30 V R ... +362 MAXA 1 30 V R ... +363 MINA 1 30 V R ... +364 STDEVPA 1 30 V R ... +365 VARPA 1 30 V R ... +366 STDEVA 1 30 V R ... +367 VARA 1 30 V R ... diff --git a/src/resources/main/org/apache/poi/hssf/record/formula/function/functionMetadata.txt b/src/resources/main/org/apache/poi/hssf/record/formula/function/functionMetadata.txt index e46e4174c4..6902027de9 100644 --- a/src/resources/main/org/apache/poi/hssf/record/formula/function/functionMetadata.txt +++ b/src/resources/main/org/apache/poi/hssf/record/formula/function/functionMetadata.txt @@ -31,7 +31,7 @@ 7 MAX 1 30 V R 8 ROW 0 1 V R 9 COLUMN 0 1 V R -10 NA 0 0 V – +10 NA 0 0 V - 11 NPV 2 30 V V R 12 STDEV 1 30 V R 13 DOLLAR 1 2 V V V @@ -40,7 +40,7 @@ 16 COS 1 1 V V 17 TAN 1 1 V V 18 ATAN 1 1 V V -19 PI 0 0 V – +19 PI 0 0 V - 20 SQRT 1 1 V V 21 EXP 1 1 V V 22 LN 1 1 V V @@ -55,8 +55,8 @@ 31 MID 3 3 V V V V 32 LEN 1 1 V V 33 VALUE 1 1 V V -34 TRUE 0 0 V – -35 FALSE 0 0 V – +34 TRUE 0 0 V - +35 FALSE 0 0 V - 36 AND 1 30 V R 37 OR 1 30 V R 38 NOT 1 1 V V @@ -82,7 +82,7 @@ 60 RATE 3 6 V V V V V V V 61 MIRR 3 3 V R V V 62 IRR 1 2 V R V -63 RAND 0 0 V – x +63 RAND 0 0 V - x 64 MATCH 2 3 V V R R 65 DATE 3 3 V V V V 66 TIME 3 3 V V V V @@ -93,7 +93,7 @@ 71 HOUR 1 1 V V 72 MINUTE 1 1 V V 73 SECOND 1 1 V V -74 NOW 0 0 V – x +74 NOW 0 0 V - x 75 AREAS 1 1 V R 76 ROWS 1 1 V R 77 COLUMNS 1 1 V R @@ -172,10 +172,10 @@ 215 JIS 1 1 V V x 219 ADDRESS 2 5 V V V V V V 220 DAYS360 2 2 V V V x -221 TODAY 0 0 V – x +221 TODAY 0 0 V - x 222 VDB 5 7 V V V V V V V V -227 MEDIAN 1 30 V R … -228 SUMPRODUCT 1 30 V A … +227 MEDIAN 1 30 V R ... +228 SUMPRODUCT 1 30 V A ... 229 SINH 1 1 V V 230 COSH 1 1 V V 231 TANH 1 1 V V @@ -192,7 +192,7 @@ 247 DB 4 5 V V V V V V 252 FREQUENCY 2 2 A R R 261 ERROR.TYPE 1 1 V V -269 AVEDEV 1 30 V R … +269 AVEDEV 1 30 V R ... 270 BETADIST 3 5 V V V V V V 271 GAMMALN 1 1 V V 272 BETAINV 3 5 V V V V V V @@ -241,12 +241,12 @@ 315 SLOPE 2 2 V A A 316 TTEST 4 4 V A A V V 317 PROB 3 4 V A A V V -318 DEVSQ 1 30 V R … -319 GEOMEAN 1 30 V R … -320 HARMEAN 1 30 V R … -321 SUMSQ 0 30 V R … -322 KURT 1 30 V R … -323 SKEW 1 30 V R … +318 DEVSQ 1 30 V R ... +319 GEOMEAN 1 30 V R ... +320 HARMEAN 1 30 V R ... +321 SUMSQ 0 30 V R ... +322 KURT 1 30 V R ... +323 SKEW 1 30 V R ... 324 ZTEST 2 3 V R V V 325 LARGE 2 2 V R V 326 SMALL 2 2 V R V @@ -278,10 +278,10 @@ 358 GETPIVOTDATA 2 30 359 HYPERLINK 1 2 V V V 360 PHONETIC 1 1 V R -361 AVERAGEA 1 30 V R … -362 MAXA 1 30 V R … -363 MINA 1 30 V R … -364 STDEVPA 1 30 V R … -365 VARPA 1 30 V R … -366 STDEVA 1 30 V R … -367 VARA 1 30 V R … +361 AVERAGEA 1 30 V R ... +362 MAXA 1 30 V R ... +363 MINA 1 30 V R ... +364 STDEVPA 1 30 V R ... +365 VARPA 1 30 V R ... +366 STDEVA 1 30 V R ... +367 VARA 1 30 V R ... diff --git a/src/testcases/org/apache/poi/hssf/record/formula/function/ExcelFileFormatDocFunctionExtractor.java b/src/testcases/org/apache/poi/hssf/record/formula/function/ExcelFileFormatDocFunctionExtractor.java index 351e85cf85..47137df4f6 100644 --- a/src/testcases/org/apache/poi/hssf/record/formula/function/ExcelFileFormatDocFunctionExtractor.java +++ b/src/testcases/org/apache/poi/hssf/record/formula/function/ExcelFileFormatDocFunctionExtractor.java @@ -61,7 +61,32 @@ public final class ExcelFileFormatDocFunctionExtractor { private static final String SOURCE_DOC_FILE_NAME = "excelfileformat.odt"; + /** + * For simplicity, the output file is strictly simple ASCII. + * This method detects any unexpected characters. + */ + /* package */ static boolean isSimpleAscii(char c) { + + if (c>=0x21 && c<=0x7E) { + // everything from '!' to '~' (includes letters, digits, punctuation + return true; + } + // some specific whitespace chars below 0x21: + switch(c) { + case ' ': + case '\t': + case '\r': + case '\n': + return true; + } + return false; + } + + private static final class FunctionData { + // special characters from the ooo document + private static final int CHAR_ELLIPSIS_8230 = 8230; + private static final int CHAR_NDASH_8211 = 8211; private final int _index; private final boolean _hasFootnote; @@ -79,10 +104,30 @@ public final class ExcelFileFormatDocFunctionExtractor { _name = funcName; _minParams = minParams; _maxParams = maxParams; - _returnClass = returnClass; - _paramClasses = paramClasses; + _returnClass = convertSpecialChars(returnClass); + _paramClasses = convertSpecialChars(paramClasses); _isVolatile = isVolatile; } + private static String convertSpecialChars(String ss) { + StringBuffer sb = new StringBuffer(ss.length() + 4); + for(int i=0; i