final class FunctionMetadataReader {
private static final String METADATA_FILE_NAME = "functionMetadata.txt";
+
+ /** plain ASCII text metadata file uses three dots for ellipsis */
+ private static final String ELLIPSIS = "...";
private static final Pattern TAB_DELIM_PATTERN = Pattern.compile("\t");
private static final Pattern SPACE_DELIM_PATTERN = Pattern.compile(" ");
private static final byte[] EMPTY_BYTE_ARRAY = { };
- // special characters from the ooo document
- private static final int CHAR_ELLIPSIS_8230 = 8230;
- private static final int CHAR_NDASH_8211 = 8211;
-
private static final String[] DIGIT_ENDING_FUNCTION_NAMES = {
// Digits at the end of a function might be due to a left-over footnote marker.
// except in these cases
throw new RuntimeException("resource '" + METADATA_FILE_NAME + "' not found");
}
- BufferedReader br = null;
+ BufferedReader br;
try {
br = new BufferedReader(new InputStreamReader(is,"UTF-8"));
- } catch(UnsupportedEncodingException e) { /* never happens */ }
+ } catch(UnsupportedEncodingException e) {
+ throw new RuntimeException(e);
+ }
FunctionDataBuilder fdb = new FunctionDataBuilder(400);
try {
}
String[] array = SPACE_DELIM_PATTERN.split(codes);
int nItems = array.length;
- if(array[nItems-1].charAt(0) == CHAR_ELLIPSIS_8230) {
+ if(ELLIPSIS.equals(array[nItems-1])) {
+ // final ellipsis is optional, and ignored
+ // (all unspecified params are assumed to be the same as the last)
nItems --;
}
byte[] result = new byte[nItems];
if(codes.length() == 1) {
switch (codes.charAt(0)) {
case '-':
- case CHAR_NDASH_8211: // this is what the ooo doc has
return true;
}
}
7 MAX 1 30 V R \r
8 ROW 0 1 V R \r
9 COLUMN 0 1 V R \r
-10 NA 0 0 V – \r
+10 NA 0 0 V - \r
11 NPV 2 30 V V R \r
12 STDEV 1 30 V R \r
13 DOLLAR 1 2 V V V \r
16 COS 1 1 V V \r
17 TAN 1 1 V V \r
18 ARCTAN 1 1 V V \r
-19 PI 0 0 V – \r
+19 PI 0 0 V - \r
20 SQRT 1 1 V V \r
21 EXP 1 1 V V \r
22 LN 1 1 V V \r
31 MID 3 3 V V V V \r
32 LEN 1 1 V V \r
33 VALUE 1 1 V V \r
-34 TRUE 0 0 V – \r
-35 FALSE 0 0 V – \r
+34 TRUE 0 0 V - \r
+35 FALSE 0 0 V - \r
36 AND 1 30 V R \r
37 OR 1 30 V R \r
38 NOT 1 1 V V \r
60 RATE 3 6 V V V V V V V \r
61 MIRR 3 3 V R V V \r
62 IRR 1 2 V R V \r
-63 RAND 0 0 V – x \r
+63 RAND 0 0 V - x \r
64 MATCH 2 3 V V R R \r
65 DATE 3 3 V V V V \r
66 TIME 3 3 V V V V \r
71 HOUR 1 1 V V \r
72 MINUTE 1 1 V V \r
73 SECOND 1 1 V V \r
-74 NOW 0 0 V – x \r
+74 NOW 0 0 V - x \r
75 AREAS 1 1 V R \r
76 ROWS 1 1 V R \r
77 COLUMNS 1 1 V R \r
215 JIS 1 1 V V x\r
219 ADDRESS 2 5 V V V V V V \r
220 DAYS360 2 2 V V V x\r
-221 TODAY 0 0 V – x \r
+221 TODAY 0 0 V - x \r
222 VDB 5 7 V V V V V V V V \r
-227 MEDIAN 1 30 V R … \r
-228 SUMPRODUCT 1 30 V A … \r
+227 MEDIAN 1 30 V R ... \r
+228 SUMPRODUCT 1 30 V A ... \r
229 SINH 1 1 V V \r
230 COSH 1 1 V V \r
231 TANH 1 1 V V \r
247 DB 4 5 V V V V V V \r
252 FREQUENCY 2 2 A R R \r
261 ERROR.TYPE 1 1 V V \r
-269 AVEDEV 1 30 V R … \r
+269 AVEDEV 1 30 V R ... \r
270 BETADIST 3 5 V V V V V V \r
271 GAMMALN 1 1 V V \r
272 BETAINV 3 5 V V V V V V \r
315 SLOPE 2 2 V A A \r
316 TTEST 4 4 V A A V V \r
317 PROB 3 4 V A A V V \r
-318 DEVSQ 1 30 V R … \r
-319 GEOMEAN 1 30 V R … \r
-320 HARMEAN 1 30 V R … \r
-321 SUMSQ 0 30 V R … \r
-322 KURT 1 30 V R … \r
-323 SKEW 1 30 V R … \r
+318 DEVSQ 1 30 V R ... \r
+319 GEOMEAN 1 30 V R ... \r
+320 HARMEAN 1 30 V R ... \r
+321 SUMSQ 0 30 V R ... \r
+322 KURT 1 30 V R ... \r
+323 SKEW 1 30 V R ... \r
324 ZTEST 2 3 V R V V \r
325 LARGE 2 2 V R V \r
326 SMALL 2 2 V R V \r
358 GETPIVOTDATA 2 30 \r
359 HYPERLINK 1 2 V V V \r
360 PHONETIC 1 1 V R \r
-361 AVERAGEA 1 30 V R … \r
-362 MAXA 1 30 V R … \r
-363 MINA 1 30 V R … \r
-364 STDEVPA 1 30 V R … \r
-365 VARPA 1 30 V R … \r
-366 STDEVA 1 30 V R … \r
-367 VARA 1 30 V R … \r
+361 AVERAGEA 1 30 V R ... \r
+362 MAXA 1 30 V R ... \r
+363 MINA 1 30 V R ... \r
+364 STDEVPA 1 30 V R ... \r
+365 VARPA 1 30 V R ... \r
+366 STDEVA 1 30 V R ... \r
+367 VARA 1 30 V R ... \r
7 MAX 1 30 V R \r
8 ROW 0 1 V R \r
9 COLUMN 0 1 V R \r
-10 NA 0 0 V – \r
+10 NA 0 0 V - \r
11 NPV 2 30 V V R \r
12 STDEV 1 30 V R \r
13 DOLLAR 1 2 V V V \r
16 COS 1 1 V V \r
17 TAN 1 1 V V \r
18 ATAN 1 1 V V \r
-19 PI 0 0 V – \r
+19 PI 0 0 V - \r
20 SQRT 1 1 V V \r
21 EXP 1 1 V V \r
22 LN 1 1 V V \r
31 MID 3 3 V V V V \r
32 LEN 1 1 V V \r
33 VALUE 1 1 V V \r
-34 TRUE 0 0 V – \r
-35 FALSE 0 0 V – \r
+34 TRUE 0 0 V - \r
+35 FALSE 0 0 V - \r
36 AND 1 30 V R \r
37 OR 1 30 V R \r
38 NOT 1 1 V V \r
60 RATE 3 6 V V V V V V V \r
61 MIRR 3 3 V R V V \r
62 IRR 1 2 V R V \r
-63 RAND 0 0 V – x \r
+63 RAND 0 0 V - x \r
64 MATCH 2 3 V V R R \r
65 DATE 3 3 V V V V \r
66 TIME 3 3 V V V V \r
71 HOUR 1 1 V V \r
72 MINUTE 1 1 V V \r
73 SECOND 1 1 V V \r
-74 NOW 0 0 V – x \r
+74 NOW 0 0 V - x \r
75 AREAS 1 1 V R \r
76 ROWS 1 1 V R \r
77 COLUMNS 1 1 V R \r
215 JIS 1 1 V V x\r
219 ADDRESS 2 5 V V V V V V \r
220 DAYS360 2 2 V V V x\r
-221 TODAY 0 0 V – x \r
+221 TODAY 0 0 V - x \r
222 VDB 5 7 V V V V V V V V \r
-227 MEDIAN 1 30 V R … \r
-228 SUMPRODUCT 1 30 V A … \r
+227 MEDIAN 1 30 V R ... \r
+228 SUMPRODUCT 1 30 V A ... \r
229 SINH 1 1 V V \r
230 COSH 1 1 V V \r
231 TANH 1 1 V V \r
247 DB 4 5 V V V V V V \r
252 FREQUENCY 2 2 A R R \r
261 ERROR.TYPE 1 1 V V \r
-269 AVEDEV 1 30 V R … \r
+269 AVEDEV 1 30 V R ... \r
270 BETADIST 3 5 V V V V V V \r
271 GAMMALN 1 1 V V \r
272 BETAINV 3 5 V V V V V V \r
315 SLOPE 2 2 V A A \r
316 TTEST 4 4 V A A V V \r
317 PROB 3 4 V A A V V \r
-318 DEVSQ 1 30 V R … \r
-319 GEOMEAN 1 30 V R … \r
-320 HARMEAN 1 30 V R … \r
-321 SUMSQ 0 30 V R … \r
-322 KURT 1 30 V R … \r
-323 SKEW 1 30 V R … \r
+318 DEVSQ 1 30 V R ... \r
+319 GEOMEAN 1 30 V R ... \r
+320 HARMEAN 1 30 V R ... \r
+321 SUMSQ 0 30 V R ... \r
+322 KURT 1 30 V R ... \r
+323 SKEW 1 30 V R ... \r
324 ZTEST 2 3 V R V V \r
325 LARGE 2 2 V R V \r
326 SMALL 2 2 V R V \r
358 GETPIVOTDATA 2 30 \r
359 HYPERLINK 1 2 V V V \r
360 PHONETIC 1 1 V R \r
-361 AVERAGEA 1 30 V R … \r
-362 MAXA 1 30 V R … \r
-363 MINA 1 30 V R … \r
-364 STDEVPA 1 30 V R … \r
-365 VARPA 1 30 V R … \r
-366 STDEVA 1 30 V R … \r
-367 VARA 1 30 V R … \r
+361 AVERAGEA 1 30 V R ... \r
+362 MAXA 1 30 V R ... \r
+363 MINA 1 30 V R ... \r
+364 STDEVPA 1 30 V R ... \r
+365 VARPA 1 30 V R ... \r
+366 STDEVA 1 30 V R ... \r
+367 VARA 1 30 V R ... \r
private static final String SOURCE_DOC_FILE_NAME = "excelfileformat.odt";
+ /**
+ * For simplicity, the output file is strictly simple ASCII.
+ * This method detects any unexpected characters.
+ */
+ /* package */ static boolean isSimpleAscii(char c) {
+
+ if (c>=0x21 && c<=0x7E) {
+ // everything from '!' to '~' (includes letters, digits, punctuation
+ return true;
+ }
+ // some specific whitespace chars below 0x21:
+ switch(c) {
+ case ' ':
+ case '\t':
+ case '\r':
+ case '\n':
+ return true;
+ }
+ return false;
+ }
+
+
private static final class FunctionData {
+ // special characters from the ooo document
+ private static final int CHAR_ELLIPSIS_8230 = 8230;
+ private static final int CHAR_NDASH_8211 = 8211;
private final int _index;
private final boolean _hasFootnote;
_name = funcName;
_minParams = minParams;
_maxParams = maxParams;
- _returnClass = returnClass;
- _paramClasses = paramClasses;
+ _returnClass = convertSpecialChars(returnClass);
+ _paramClasses = convertSpecialChars(paramClasses);
_isVolatile = isVolatile;
}
+ private static String convertSpecialChars(String ss) {
+ StringBuffer sb = new StringBuffer(ss.length() + 4);
+ for(int i=0; i<ss.length(); i++) {
+ char c = ss.charAt(i);
+ if (isSimpleAscii(c)) {
+ sb.append(c);
+ continue;
+ }
+ switch (c) {
+ case CHAR_NDASH_8211:
+ sb.append('-');
+ continue;
+ case CHAR_ELLIPSIS_8230:
+ sb.append("...");
+ continue;
+ }
+ throw new RuntimeException("bad char (" + ((int)c) + ") in string '" + ss + "'");
+ }
+ return sb.toString();
+ }
public int getIndex() {
return _index;
}
throw new RuntimeException(e);
}
}
+ /**
+ * To be sure that no tricky unicode chars make it through to the output file.
+ */
+ private static final class SimpleAsciiOutputStream extends OutputStream {
+
+ private final OutputStream _os;
+
+ public SimpleAsciiOutputStream(OutputStream os) {
+ _os = os;
+ }
+ public void write(int b) throws IOException {
+ checkByte(b);
+ _os.write(b);
+ }
+ private static void checkByte(int b) {
+ if (!isSimpleAscii((char)b)) {
+ throw new RuntimeException("Encountered char (" + b + ") which was not simple ascii as expected");
+ }
+ }
+ public void write(byte[] b, int off, int len) throws IOException {
+ for (int i = 0; i < len; i++) {
+ checkByte(b[i + off]);
+
+ }
+ _os.write(b, off, len);
+ }
+ }
private static void processFile(File effDocFile, File outFile) {
OutputStream os;
} catch (FileNotFoundException e) {
throw new RuntimeException(e);
}
- PrintStream ps = null;
+ os = new SimpleAsciiOutputStream(os);
+ PrintStream ps;
try {
- ps = new PrintStream(os,true, "UTF-8");
- } catch(UnsupportedEncodingException e) {}
+ ps = new PrintStream(os, true, "UTF-8");
+ } catch(UnsupportedEncodingException e) {
+ throw new RuntimeException(e);
+ }
outputLicenseHeader(ps);
Class genClass = ExcelFileFormatDocFunctionExtractor.class;