]> source.dussan.org Git - poi.git/commitdiff
bug 60260: parse unicode sheet names
authorJaven O'Neal <onealj@apache.org>
Thu, 12 Jan 2017 10:39:26 +0000 (10:39 +0000)
committerJaven O'Neal <onealj@apache.org>
Thu, 12 Jan 2017 10:39:26 +0000 (10:39 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1778418 13f79535-47bb-0310-9956-ffa450edef68

src/java/org/apache/poi/ss/formula/FormulaParser.java
src/ooxml/testcases/org/apache/poi/ss/formula/TestFormulaParser.java
src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFSheetShiftRows.java
src/testcases/org/apache/poi/ss/usermodel/BaseTestNamedRange.java
test-data/spreadsheet/unicodeSheetName.xlsx [new file with mode: 0644]

index 745836ef73dbae84fd2517e4646f77b8b9bed2bd..27499501fded6b94aebeff66700ed460b10b8a0a 100644 (file)
@@ -96,7 +96,7 @@ public final class FormulaParser {
     private final static POILogger log = POILogFactory.getLogger(FormulaParser.class);
     private final String _formulaString;
     private final int _formulaLength;
-    /** points at the next character to be read (after the {@link #look} char) */
+    /** points at the next character to be read (after the {@link #look} codepoint) */
     private int _pointer;
 
     private ParseNode _rootNode;
@@ -106,10 +106,10 @@ public final class FormulaParser {
     private final static char LF = '\n';  // Normally just XSSF
 
     /**
-     * Lookahead Character.
+     * Lookahead unicode codepoint
      * gets value '\0' when the input string is exhausted
      */
-    private char look;
+    private int look;
 
     /**
      * Tracks whether the run of whitespace preceding "look" could be an
@@ -226,20 +226,20 @@ public final class FormulaParser {
             throw new RuntimeException("too far");
         }
         if (_pointer < _formulaLength) {
-            look=_formulaString.charAt(_pointer);
+            look=_formulaString.codePointAt(_pointer);
         } else {
             // Just return if so and reset 'look' to something to keep
             // SkipWhitespace from spinning
             look = (char)0;
             _inIntersection = false;
         }
-        _pointer++;
-        //System.out.println("Got char: "+ look);
+        _pointer += Character.charCount(look);
+        //System.out.println(new StringBuilder("Got char: ").appendCodePoint(look)).toString();
     }
     private void resetPointer(int ptr) {
         _pointer = ptr;
         if (_pointer <= _formulaLength) {
-            look=_formulaString.charAt(_pointer-1);
+            look=_formulaString.codePointAt(_pointer - Character.charCount(look));
         } else {
             // Just return if so and reset 'look' to something to keep
             // SkipWhitespace from spinning
@@ -255,25 +255,32 @@ public final class FormulaParser {
             msg = "The specified formula '" + _formulaString
                 + "' starts with an equals sign which is not allowed.";
         } else {
-            msg = "Parse error near char " + (_pointer-1) + " '" + look + "'"
-                + " in specified formula '" + _formulaString + "'. Expected "
-                + s;
+            msg = new StringBuilder("Parse error near char ")
+                .append(_pointer-1) //this is the codepoint index, not char index, which may be larger if there are multi-byte chars
+                .append(" '")
+                .appendCodePoint(look)
+                .append("'")
+                .append(" in specified formula '")
+                .append(_formulaString)
+                .append("'. Expected ")
+                .append(s)
+                .toString();
         }
         return new FormulaParseException(msg);
     }
 
     /** Recognize an Alpha Character */
-    private static boolean IsAlpha(char c) {
+    private static boolean IsAlpha(int c) {
         return Character.isLetter(c) || c == '$' || c=='_';
     }
 
     /** Recognize a Decimal Digit */
-    private static boolean IsDigit(char c) {
+    private static boolean IsDigit(int c) {
         return Character.isDigit(c);
     }
 
     /** Recognize White Space */
-    private static boolean IsWhite( char c) {
+    private static boolean IsWhite(int c) {
         return  c ==' ' || c== TAB || c == CR || c == LF;
     }
 
@@ -289,9 +296,13 @@ public final class FormulaParser {
      *  unchecked exception. This method does <b>not</b> consume whitespace (before or after the
      *  matched character).
      */
-    private void Match(char x) {
+    private void Match(int x) {
         if (look != x) {
-            throw expected("'" + x + "'");
+            throw expected(new StringBuilder()
+                    .append("'")
+                    .appendCodePoint(x)
+                    .append("'")
+                    .toString());
         }
         GetChar();
     }
@@ -301,7 +312,7 @@ public final class FormulaParser {
         StringBuilder value = new StringBuilder();
 
         while (IsDigit(this.look)){
-            value.append(this.look);
+            value.appendCodePoint(this.look);
             GetChar();
         }
         return value.length() == 0 ? null : value.toString();
@@ -826,7 +837,7 @@ public final class FormulaParser {
         }
         StringBuilder name = new StringBuilder();
         while (look!=']') {
-           name.append(look);
+           name.appendCodePoint(look);
            GetChar();
         }
         Match(']');
@@ -914,7 +925,7 @@ public final class FormulaParser {
             throw expected("number, string, defined name, or data table");
         }
         while (isValidDefinedNameChar(look)) {
-            sb.append(look);
+            sb.appendCodePoint(look);
             GetChar();
         }
         SkipWhite();
@@ -923,13 +934,18 @@ public final class FormulaParser {
     }
 
     /**
-     *
+     * @param ch unicode codepoint
      * @return <code>true</code> if the specified character may be used in a defined name
      */
-    private static boolean isValidDefinedNameChar(char ch) {
+    private static boolean isValidDefinedNameChar(int ch) {
         if (Character.isLetterOrDigit(ch)) {
             return true;
         }
+        // the sheet naming rules are vague on whether unicode characters are allowed
+        // assume they're allowed.
+        if (ch > 128) {
+            return true;
+        }
         switch (ch) {
             case '.':
             case '_':
@@ -937,6 +953,7 @@ public final class FormulaParser {
             case '\\': // of all things
                 return true;
         }
+        // includes special non-name control characters like ! $ : , ( ) [ ] and space
         return false;
     }
     
@@ -1120,7 +1137,7 @@ public final class FormulaParser {
         StringBuilder sb = new StringBuilder();
         GetChar();
         while (look != ']') {
-            sb.append(look);
+            sb.appendCodePoint(look);
             GetChar();
         }
         GetChar();
@@ -1148,7 +1165,7 @@ public final class FormulaParser {
             StringBuilder sb = new StringBuilder();
             boolean done = look == '\'';
             while(!done) {
-                sb.append(look);
+                sb.appendCodePoint(look);
                 GetChar();
                 if(look == '\'')
                 {
@@ -1176,7 +1193,7 @@ public final class FormulaParser {
             StringBuilder sb = new StringBuilder();
             // can concatenate idens with dots
             while (isUnquotedSheetNameChar(look)) {
-                sb.append(look);
+                sb.appendCodePoint(look);
                 GetChar();
             }
             NameIdentifier iden = new NameIdentifier(sb.toString(), false);
@@ -1214,11 +1231,17 @@ public final class FormulaParser {
 
     /**
      * very similar to {@link SheetNameFormatter#isSpecialChar(char)}
+     * @param ch unicode codepoint
      */
-    private static boolean isUnquotedSheetNameChar(char ch) {
+    private static boolean isUnquotedSheetNameChar(int ch) {
         if(Character.isLetterOrDigit(ch)) {
             return true;
         }
+        // the sheet naming rules are vague on whether unicode characters are allowed
+        // assume they're allowed.
+        if (ch > 128) {
+            return true;
+        }
         switch(ch) {
             case '.': // dot is OK
             case '_': // underscore is OK
@@ -1413,7 +1436,11 @@ public final class FormulaParser {
        }
     }
 
-    private static boolean isArgumentDelimiter(char ch) {
+    /**
+     * @param ch  unicode codepoint
+     *
+     */
+    private static boolean isArgumentDelimiter(int ch) {
         return ch ==  ',' || ch == ')';
     }
 
@@ -1754,7 +1781,7 @@ public final class FormulaParser {
         }
         StringBuilder sb = new StringBuilder();
         while (Character.isLetterOrDigit(look) || look == '.') {
-            sb.append(look);
+            sb.appendCodePoint(look);
             GetChar();
         }
         if (sb.length() < 1) {
@@ -1819,7 +1846,7 @@ public final class FormulaParser {
                     break;
                 }
              }
-            token.append(look);
+            token.appendCodePoint(look);
             GetChar();
         }
         return token.toString();
index 421913fe7ccb51b97d3e8bb248189b2574893456..a9d739796531efa297a71b1d36cebb52dce9ce6b 100644 (file)
@@ -23,6 +23,8 @@ import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
+import java.io.IOException;
+
 import org.apache.poi.hssf.usermodel.HSSFEvaluationWorkbook;
 import org.apache.poi.hssf.usermodel.HSSFWorkbook;
 import org.apache.poi.ss.formula.ptg.AbstractFunctionPtg;
@@ -30,6 +32,7 @@ import org.apache.poi.ss.formula.ptg.NameXPxg;
 import org.apache.poi.ss.formula.ptg.Ptg;
 import org.apache.poi.ss.formula.ptg.Ref3DPxg;
 import org.apache.poi.ss.formula.ptg.StringPtg;
+import org.apache.poi.util.IOUtils;
 import org.apache.poi.xssf.XSSFTestDataSamples;
 import org.apache.poi.xssf.usermodel.XSSFEvaluationWorkbook;
 import org.apache.poi.xssf.usermodel.XSSFWorkbook;
@@ -55,16 +58,31 @@ public class TestFormulaParser {
         }
     }
 
+    private static void checkHSSFFormula(String formula) {
+        HSSFWorkbook wb = new HSSFWorkbook();
+        FormulaParsingWorkbook workbook = HSSFEvaluationWorkbook.create(wb);
+        FormulaParser.parse(formula, workbook, FormulaType.CELL, 0);
+        IOUtils.closeQuietly(wb);
+    } 
+    private static void checkXSSFFormula(String formula) {
+        XSSFWorkbook wb = new XSSFWorkbook();
+        FormulaParsingWorkbook workbook = XSSFEvaluationWorkbook.create(wb);
+        FormulaParser.parse(formula, workbook, FormulaType.CELL, 0);
+        IOUtils.closeQuietly(wb);
+    } 
+    private static void checkFormula(String formula) {
+        checkHSSFFormula(formula);
+        checkXSSFFormula(formula);
+    }
+
     @Test
     public void testHSSFPassCase() {
-        FormulaParsingWorkbook workbook = HSSFEvaluationWorkbook.create(new HSSFWorkbook());
-        FormulaParser.parse("Sheet1!1:65536", workbook, FormulaType.CELL, 0);
+        checkHSSFFormula("Sheet1!1:65536");
     }
 
     @Test
     public void testXSSFWorksForOver65536() {
-        FormulaParsingWorkbook workbook = XSSFEvaluationWorkbook.create(new XSSFWorkbook());
-        FormulaParser.parse("Sheet1!1:65537", workbook, FormulaType.CELL, 0);
+        checkXSSFFormula("Sheet1!1:65537");
     }
 
     @Test
@@ -203,4 +221,10 @@ public class TestFormulaParser {
         assertEquals("Column", 0, pxg.getColumn());
         wb.close();
     }
+
+    // bug 60260
+    @Test
+    public void testUnicodeSheetName() {
+        checkFormula("'Sheet\u30FB1'!A1:A6");
+    }
 }
index fa4c77fbf869b774a17acf30260bf540ad5a0f7f..84a2dfdbcaa0c749090aab96993270bd53008522 100644 (file)
@@ -35,6 +35,7 @@ import org.apache.poi.ss.usermodel.Sheet;
 import org.apache.poi.ss.usermodel.Workbook;
 import org.apache.poi.ss.util.CellAddress;
 import org.apache.poi.ss.util.CellUtil;
+import org.apache.poi.util.IOUtils;
 import org.apache.poi.xssf.XSSFITestDataProvider;
 import org.apache.poi.xssf.XSSFTestDataSamples;
 import org.apache.xmlbeans.impl.values.XmlValueDisconnectedException;
@@ -449,4 +450,14 @@ public final class TestXSSFSheetShiftRows extends BaseTestSheetShiftRows {
         
         wb.close();\r
     }
+    
+    // bug 60260: shift rows or rename a sheet containing a named range
+    // that refers to formula with a unicode (non-ASCII) sheet name formula
+    @Test
+    public void shiftRowsWithUnicodeNamedRange() throws IOException {
+        XSSFWorkbook wb = XSSFTestDataSamples.openSampleWorkbook("unicodeSheetName.xlsx");
+        XSSFSheet sheet = wb.getSheetAt(0);
+        sheet.shiftRows(1, 2, 3);
+        IOUtils.closeQuietly(wb);
+    }
 }
index 1bb73f9f5e56b402e92d3a0ada038ea07c722d26..74b5a5fac19e245618c787b4fec3fa5946b8f530 100644 (file)
@@ -31,6 +31,7 @@ import java.util.List;
 import org.apache.poi.ss.ITestDataProvider;
 import org.apache.poi.ss.util.AreaReference;
 import org.apache.poi.ss.util.CellReference;
+import org.apache.poi.util.IOUtils;
 import org.junit.Test;
 
 /**
@@ -737,4 +738,18 @@ public abstract class BaseTestNamedRange {
         }
         
     }
+    
+    // bug 60260: renaming a sheet with a named range referring to a unicode (non-ASCII) sheet name
+    @Test
+    public void renameSheetWithNamedRangeReferringToUnicodeSheetName() {
+        Workbook wb = _testDataProvider.createWorkbook();
+        wb.createSheet("Sheet\u30FB1");
+        
+        Name name = wb.createName();
+        name.setNameName("test_named_range");
+        name.setRefersToFormula("'Sheet\u30FB201'!A1:A6");
+        
+        wb.setSheetName(0, "Sheet 1");
+        IOUtils.closeQuietly(wb);
+    }
 }
diff --git a/test-data/spreadsheet/unicodeSheetName.xlsx b/test-data/spreadsheet/unicodeSheetName.xlsx
new file mode 100644 (file)
index 0000000..8c0fa8c
Binary files /dev/null and b/test-data/spreadsheet/unicodeSheetName.xlsx differ