diff options
author | Josh Micich <josh@apache.org> | 2008-05-06 19:48:55 +0000 |
---|---|---|
committer | Josh Micich <josh@apache.org> | 2008-05-06 19:48:55 +0000 |
commit | 9d0ca2d5734d9a3d87e7766409ff35988b996329 (patch) | |
tree | 792bfad1e07435a16bf94bef9d4cadf961dc3182 /src/testcases/org/apache | |
parent | 21cbfa84405326a04e3dc0516a07c78297c634e6 (diff) | |
download | poi-9d0ca2d5734d9a3d87e7766409ff35988b996329.tar.gz poi-9d0ca2d5734d9a3d87e7766409ff35988b996329.zip |
Another attempt to fix unicode problems in functionMetadata.txt. Made that file pure ascii.
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@653892 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'src/testcases/org/apache')
-rw-r--r-- | src/testcases/org/apache/poi/hssf/record/formula/function/ExcelFileFormatDocFunctionExtractor.java | 85 |
1 files changed, 80 insertions, 5 deletions
diff --git a/src/testcases/org/apache/poi/hssf/record/formula/function/ExcelFileFormatDocFunctionExtractor.java b/src/testcases/org/apache/poi/hssf/record/formula/function/ExcelFileFormatDocFunctionExtractor.java index 351e85cf85..47137df4f6 100644 --- a/src/testcases/org/apache/poi/hssf/record/formula/function/ExcelFileFormatDocFunctionExtractor.java +++ b/src/testcases/org/apache/poi/hssf/record/formula/function/ExcelFileFormatDocFunctionExtractor.java @@ -61,7 +61,32 @@ public final class ExcelFileFormatDocFunctionExtractor { private static final String SOURCE_DOC_FILE_NAME = "excelfileformat.odt"; + /** + * For simplicity, the output file is strictly simple ASCII. + * This method detects any unexpected characters. + */ + /* package */ static boolean isSimpleAscii(char c) { + + if (c>=0x21 && c<=0x7E) { + // everything from '!' to '~' (includes letters, digits, punctuation + return true; + } + // some specific whitespace chars below 0x21: + switch(c) { + case ' ': + case '\t': + case '\r': + case '\n': + return true; + } + return false; + } + + private static final class FunctionData { + // special characters from the ooo document + private static final int CHAR_ELLIPSIS_8230 = 8230; + private static final int CHAR_NDASH_8211 = 8211; private final int _index; private final boolean _hasFootnote; @@ -79,10 +104,30 @@ public final class ExcelFileFormatDocFunctionExtractor { _name = funcName; _minParams = minParams; _maxParams = maxParams; - _returnClass = returnClass; - _paramClasses = paramClasses; + _returnClass = convertSpecialChars(returnClass); + _paramClasses = convertSpecialChars(paramClasses); _isVolatile = isVolatile; } + private static String convertSpecialChars(String ss) { + StringBuffer sb = new StringBuffer(ss.length() + 4); + for(int i=0; i<ss.length(); i++) { + char c = ss.charAt(i); + if (isSimpleAscii(c)) { + sb.append(c); + continue; + } + switch (c) { + case CHAR_NDASH_8211: + sb.append('-'); + continue; + case CHAR_ELLIPSIS_8230: + sb.append("..."); + continue; + } + throw new RuntimeException("bad char (" + ((int)c) + ") in string '" + ss + "'"); + } + return sb.toString(); + } public int getIndex() { return _index; } @@ -382,6 +427,33 @@ public final class ExcelFileFormatDocFunctionExtractor { throw new RuntimeException(e); } } + /** + * To be sure that no tricky unicode chars make it through to the output file. + */ + private static final class SimpleAsciiOutputStream extends OutputStream { + + private final OutputStream _os; + + public SimpleAsciiOutputStream(OutputStream os) { + _os = os; + } + public void write(int b) throws IOException { + checkByte(b); + _os.write(b); + } + private static void checkByte(int b) { + if (!isSimpleAscii((char)b)) { + throw new RuntimeException("Encountered char (" + b + ") which was not simple ascii as expected"); + } + } + public void write(byte[] b, int off, int len) throws IOException { + for (int i = 0; i < len; i++) { + checkByte(b[i + off]); + + } + _os.write(b, off, len); + } + } private static void processFile(File effDocFile, File outFile) { OutputStream os; @@ -390,10 +462,13 @@ public final class ExcelFileFormatDocFunctionExtractor { } catch (FileNotFoundException e) { throw new RuntimeException(e); } - PrintStream ps = null; + os = new SimpleAsciiOutputStream(os); + PrintStream ps; try { - ps = new PrintStream(os,true, "UTF-8"); - } catch(UnsupportedEncodingException e) {} + ps = new PrintStream(os, true, "UTF-8"); + } catch(UnsupportedEncodingException e) { + throw new RuntimeException(e); + } outputLicenseHeader(ps); Class genClass = ExcelFileFormatDocFunctionExtractor.class; |