From: Javen O'Neal Date: Thu, 12 Jan 2017 10:39:26 +0000 (+0000) Subject: bug 60260: parse unicode sheet names X-Git-Tag: REL_3_16_BETA2~41 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=bdc2e2fe04bcf322d790c1c633fcc4c392b07458;p=poi.git bug 60260: parse unicode sheet names git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1778418 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/java/org/apache/poi/ss/formula/FormulaParser.java b/src/java/org/apache/poi/ss/formula/FormulaParser.java index 745836ef73..27499501fd 100644 --- a/src/java/org/apache/poi/ss/formula/FormulaParser.java +++ b/src/java/org/apache/poi/ss/formula/FormulaParser.java @@ -96,7 +96,7 @@ public final class FormulaParser { private final static POILogger log = POILogFactory.getLogger(FormulaParser.class); private final String _formulaString; private final int _formulaLength; - /** points at the next character to be read (after the {@link #look} char) */ + /** points at the next character to be read (after the {@link #look} codepoint) */ private int _pointer; private ParseNode _rootNode; @@ -106,10 +106,10 @@ public final class FormulaParser { private final static char LF = '\n'; // Normally just XSSF /** - * Lookahead Character. + * Lookahead unicode codepoint * gets value '\0' when the input string is exhausted */ - private char look; + private int look; /** * Tracks whether the run of whitespace preceding "look" could be an @@ -226,20 +226,20 @@ public final class FormulaParser { throw new RuntimeException("too far"); } if (_pointer < _formulaLength) { - look=_formulaString.charAt(_pointer); + look=_formulaString.codePointAt(_pointer); } else { // Just return if so and reset 'look' to something to keep // SkipWhitespace from spinning look = (char)0; _inIntersection = false; } - _pointer++; - //System.out.println("Got char: "+ look); + _pointer += Character.charCount(look); + //System.out.println(new StringBuilder("Got char: ").appendCodePoint(look)).toString(); } private void resetPointer(int ptr) { _pointer = ptr; if (_pointer <= _formulaLength) { - look=_formulaString.charAt(_pointer-1); + look=_formulaString.codePointAt(_pointer - Character.charCount(look)); } else { // Just return if so and reset 'look' to something to keep // SkipWhitespace from spinning @@ -255,25 +255,32 @@ public final class FormulaParser { msg = "The specified formula '" + _formulaString + "' starts with an equals sign which is not allowed."; } else { - msg = "Parse error near char " + (_pointer-1) + " '" + look + "'" - + " in specified formula '" + _formulaString + "'. Expected " - + s; + msg = new StringBuilder("Parse error near char ") + .append(_pointer-1) //this is the codepoint index, not char index, which may be larger if there are multi-byte chars + .append(" '") + .appendCodePoint(look) + .append("'") + .append(" in specified formula '") + .append(_formulaString) + .append("'. Expected ") + .append(s) + .toString(); } return new FormulaParseException(msg); } /** Recognize an Alpha Character */ - private static boolean IsAlpha(char c) { + private static boolean IsAlpha(int c) { return Character.isLetter(c) || c == '$' || c=='_'; } /** Recognize a Decimal Digit */ - private static boolean IsDigit(char c) { + private static boolean IsDigit(int c) { return Character.isDigit(c); } /** Recognize White Space */ - private static boolean IsWhite( char c) { + private static boolean IsWhite(int c) { return c ==' ' || c== TAB || c == CR || c == LF; } @@ -289,9 +296,13 @@ public final class FormulaParser { * unchecked exception. This method does not consume whitespace (before or after the * matched character). */ - private void Match(char x) { + private void Match(int x) { if (look != x) { - throw expected("'" + x + "'"); + throw expected(new StringBuilder() + .append("'") + .appendCodePoint(x) + .append("'") + .toString()); } GetChar(); } @@ -301,7 +312,7 @@ public final class FormulaParser { StringBuilder value = new StringBuilder(); while (IsDigit(this.look)){ - value.append(this.look); + value.appendCodePoint(this.look); GetChar(); } return value.length() == 0 ? null : value.toString(); @@ -826,7 +837,7 @@ public final class FormulaParser { } StringBuilder name = new StringBuilder(); while (look!=']') { - name.append(look); + name.appendCodePoint(look); GetChar(); } Match(']'); @@ -914,7 +925,7 @@ public final class FormulaParser { throw expected("number, string, defined name, or data table"); } while (isValidDefinedNameChar(look)) { - sb.append(look); + sb.appendCodePoint(look); GetChar(); } SkipWhite(); @@ -923,13 +934,18 @@ public final class FormulaParser { } /** - * + * @param ch unicode codepoint * @return true if the specified character may be used in a defined name */ - private static boolean isValidDefinedNameChar(char ch) { + private static boolean isValidDefinedNameChar(int ch) { if (Character.isLetterOrDigit(ch)) { return true; } + // the sheet naming rules are vague on whether unicode characters are allowed + // assume they're allowed. + if (ch > 128) { + return true; + } switch (ch) { case '.': case '_': @@ -937,6 +953,7 @@ public final class FormulaParser { case '\\': // of all things return true; } + // includes special non-name control characters like ! $ : , ( ) [ ] and space return false; } @@ -1120,7 +1137,7 @@ public final class FormulaParser { StringBuilder sb = new StringBuilder(); GetChar(); while (look != ']') { - sb.append(look); + sb.appendCodePoint(look); GetChar(); } GetChar(); @@ -1148,7 +1165,7 @@ public final class FormulaParser { StringBuilder sb = new StringBuilder(); boolean done = look == '\''; while(!done) { - sb.append(look); + sb.appendCodePoint(look); GetChar(); if(look == '\'') { @@ -1176,7 +1193,7 @@ public final class FormulaParser { StringBuilder sb = new StringBuilder(); // can concatenate idens with dots while (isUnquotedSheetNameChar(look)) { - sb.append(look); + sb.appendCodePoint(look); GetChar(); } NameIdentifier iden = new NameIdentifier(sb.toString(), false); @@ -1214,11 +1231,17 @@ public final class FormulaParser { /** * very similar to {@link SheetNameFormatter#isSpecialChar(char)} + * @param ch unicode codepoint */ - private static boolean isUnquotedSheetNameChar(char ch) { + private static boolean isUnquotedSheetNameChar(int ch) { if(Character.isLetterOrDigit(ch)) { return true; } + // the sheet naming rules are vague on whether unicode characters are allowed + // assume they're allowed. + if (ch > 128) { + return true; + } switch(ch) { case '.': // dot is OK case '_': // underscore is OK @@ -1413,7 +1436,11 @@ public final class FormulaParser { } } - private static boolean isArgumentDelimiter(char ch) { + /** + * @param ch unicode codepoint + * + */ + private static boolean isArgumentDelimiter(int ch) { return ch == ',' || ch == ')'; } @@ -1754,7 +1781,7 @@ public final class FormulaParser { } StringBuilder sb = new StringBuilder(); while (Character.isLetterOrDigit(look) || look == '.') { - sb.append(look); + sb.appendCodePoint(look); GetChar(); } if (sb.length() < 1) { @@ -1819,7 +1846,7 @@ public final class FormulaParser { break; } } - token.append(look); + token.appendCodePoint(look); GetChar(); } return token.toString(); diff --git a/src/ooxml/testcases/org/apache/poi/ss/formula/TestFormulaParser.java b/src/ooxml/testcases/org/apache/poi/ss/formula/TestFormulaParser.java index 421913fe7c..a9d7397965 100644 --- a/src/ooxml/testcases/org/apache/poi/ss/formula/TestFormulaParser.java +++ b/src/ooxml/testcases/org/apache/poi/ss/formula/TestFormulaParser.java @@ -23,6 +23,8 @@ import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; +import java.io.IOException; + import org.apache.poi.hssf.usermodel.HSSFEvaluationWorkbook; import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.ss.formula.ptg.AbstractFunctionPtg; @@ -30,6 +32,7 @@ import org.apache.poi.ss.formula.ptg.NameXPxg; import org.apache.poi.ss.formula.ptg.Ptg; import org.apache.poi.ss.formula.ptg.Ref3DPxg; import org.apache.poi.ss.formula.ptg.StringPtg; +import org.apache.poi.util.IOUtils; import org.apache.poi.xssf.XSSFTestDataSamples; import org.apache.poi.xssf.usermodel.XSSFEvaluationWorkbook; import org.apache.poi.xssf.usermodel.XSSFWorkbook; @@ -55,16 +58,31 @@ public class TestFormulaParser { } } + private static void checkHSSFFormula(String formula) { + HSSFWorkbook wb = new HSSFWorkbook(); + FormulaParsingWorkbook workbook = HSSFEvaluationWorkbook.create(wb); + FormulaParser.parse(formula, workbook, FormulaType.CELL, 0); + IOUtils.closeQuietly(wb); + } + private static void checkXSSFFormula(String formula) { + XSSFWorkbook wb = new XSSFWorkbook(); + FormulaParsingWorkbook workbook = XSSFEvaluationWorkbook.create(wb); + FormulaParser.parse(formula, workbook, FormulaType.CELL, 0); + IOUtils.closeQuietly(wb); + } + private static void checkFormula(String formula) { + checkHSSFFormula(formula); + checkXSSFFormula(formula); + } + @Test public void testHSSFPassCase() { - FormulaParsingWorkbook workbook = HSSFEvaluationWorkbook.create(new HSSFWorkbook()); - FormulaParser.parse("Sheet1!1:65536", workbook, FormulaType.CELL, 0); + checkHSSFFormula("Sheet1!1:65536"); } @Test public void testXSSFWorksForOver65536() { - FormulaParsingWorkbook workbook = XSSFEvaluationWorkbook.create(new XSSFWorkbook()); - FormulaParser.parse("Sheet1!1:65537", workbook, FormulaType.CELL, 0); + checkXSSFFormula("Sheet1!1:65537"); } @Test @@ -203,4 +221,10 @@ public class TestFormulaParser { assertEquals("Column", 0, pxg.getColumn()); wb.close(); } + + // bug 60260 + @Test + public void testUnicodeSheetName() { + checkFormula("'Sheet\u30FB1'!A1:A6"); + } } diff --git a/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFSheetShiftRows.java b/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFSheetShiftRows.java index fa4c77fbf8..84a2dfdbca 100644 --- a/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFSheetShiftRows.java +++ b/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFSheetShiftRows.java @@ -35,6 +35,7 @@ import org.apache.poi.ss.usermodel.Sheet; import org.apache.poi.ss.usermodel.Workbook; import org.apache.poi.ss.util.CellAddress; import org.apache.poi.ss.util.CellUtil; +import org.apache.poi.util.IOUtils; import org.apache.poi.xssf.XSSFITestDataProvider; import org.apache.poi.xssf.XSSFTestDataSamples; import org.apache.xmlbeans.impl.values.XmlValueDisconnectedException; @@ -449,4 +450,14 @@ public final class TestXSSFSheetShiftRows extends BaseTestSheetShiftRows { wb.close(); } + + // bug 60260: shift rows or rename a sheet containing a named range + // that refers to formula with a unicode (non-ASCII) sheet name formula + @Test + public void shiftRowsWithUnicodeNamedRange() throws IOException { + XSSFWorkbook wb = XSSFTestDataSamples.openSampleWorkbook("unicodeSheetName.xlsx"); + XSSFSheet sheet = wb.getSheetAt(0); + sheet.shiftRows(1, 2, 3); + IOUtils.closeQuietly(wb); + } } diff --git a/src/testcases/org/apache/poi/ss/usermodel/BaseTestNamedRange.java b/src/testcases/org/apache/poi/ss/usermodel/BaseTestNamedRange.java index 1bb73f9f5e..74b5a5fac1 100644 --- a/src/testcases/org/apache/poi/ss/usermodel/BaseTestNamedRange.java +++ b/src/testcases/org/apache/poi/ss/usermodel/BaseTestNamedRange.java @@ -31,6 +31,7 @@ import java.util.List; import org.apache.poi.ss.ITestDataProvider; import org.apache.poi.ss.util.AreaReference; import org.apache.poi.ss.util.CellReference; +import org.apache.poi.util.IOUtils; import org.junit.Test; /** @@ -737,4 +738,18 @@ public abstract class BaseTestNamedRange { } } + + // bug 60260: renaming a sheet with a named range referring to a unicode (non-ASCII) sheet name + @Test + public void renameSheetWithNamedRangeReferringToUnicodeSheetName() { + Workbook wb = _testDataProvider.createWorkbook(); + wb.createSheet("Sheet\u30FB1"); + + Name name = wb.createName(); + name.setNameName("test_named_range"); + name.setRefersToFormula("'Sheet\u30FB201'!A1:A6"); + + wb.setSheetName(0, "Sheet 1"); + IOUtils.closeQuietly(wb); + } } diff --git a/test-data/spreadsheet/unicodeSheetName.xlsx b/test-data/spreadsheet/unicodeSheetName.xlsx new file mode 100644 index 0000000000..8c0fa8c4d1 Binary files /dev/null and b/test-data/spreadsheet/unicodeSheetName.xlsx differ