From 73f52d0f4bec5ed838deb4a81b259a0d4f63a22b Mon Sep 17 00:00:00 2001 From: Josh Micich Date: Tue, 8 Jul 2008 21:00:13 +0000 Subject: [PATCH] Fix for bug 45354 - Proper distinguishing of cell references and named ranges within formulas git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@674953 13f79535-47bb-0310-9956-ffa450edef68 --- src/documentation/content/xdocs/changes.xml | 1 + src/documentation/content/xdocs/status.xml | 1 + .../apache/poi/hssf/model/FormulaParser.java | 25 ++- .../record/formula/SheetNameFormatter.java | 120 ++++---------- .../apache/poi/hssf/util/CellReference.java | 150 +++++++++++++++++- .../poi/hssf/model/TestFormulaParser.java | 34 ++++ .../formula/TestSheetNameFormatter.java | 9 +- .../poi/hssf/util/TestCellReference.java | 23 ++- 8 files changed, 260 insertions(+), 103 deletions(-) diff --git a/src/documentation/content/xdocs/changes.xml b/src/documentation/content/xdocs/changes.xml index c03b89e795..54bfcfee85 100644 --- a/src/documentation/content/xdocs/changes.xml +++ b/src/documentation/content/xdocs/changes.xml @@ -37,6 +37,7 @@ + 45354 - Fixed recognition of named ranges within formulas 45338 - Fix HSSFWorkbook to give you the same HSSFFont every time, and then fix it to find newly added fonts 45336 - Fix HSSFColor.getTripletHash() 45334 - Fixed formula parser to handle dots in identifiers diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index f706bed1a6..bde398a9c4 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + 45354 - Fixed recognition of named ranges within formulas 45338 - Fix HSSFWorkbook to give you the same HSSFFont every time, and then fix it to find newly added fonts 45336 - Fix HSSFColor.getTripletHash() 45334 - Fixed formula parser to handle dots in identifiers diff --git a/src/java/org/apache/poi/hssf/model/FormulaParser.java b/src/java/org/apache/poi/hssf/model/FormulaParser.java index 6798bf2a40..75b3aeb42e 100644 --- a/src/java/org/apache/poi/hssf/model/FormulaParser.java +++ b/src/java/org/apache/poi/hssf/model/FormulaParser.java @@ -29,6 +29,7 @@ import org.apache.poi.hssf.record.formula.function.FunctionMetadataRegistry; import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.hssf.util.AreaReference; import org.apache.poi.hssf.util.CellReference; +import org.apache.poi.hssf.util.CellReference.NameType; /** * This class parses a formula string into a List of tokens in RPN order. @@ -293,9 +294,14 @@ public final class FormulaParser { // This can be either a cell ref or a named range // Try to spot which it is - if (isValidCellReference(name)) { + int nameType = CellReference.classifyCellReference(name); + if (nameType == NameType.CELL) { return new RefPtg(name); } + if (nameType != NameType.NAMED_RANGE) { + new FormulaParseException("Name '" + name + + "' does not look like a cell reference or named range"); + } for(int i = 0; i < book.getNumberOfNames(); i++) { // named range name matching is case insensitive @@ -303,11 +309,12 @@ public final class FormulaParser { return new NamePtg(name, book); } } - throw new FormulaParseException("Found reference to named range \"" - + name + "\", but that named range wasn't defined!"); + throw new FormulaParseException("Specified named range '" + + name + "' does not exist in the current workbook."); } /** + * @param name an 'identifier' like string (i.e. contains alphanums, and dots) * @return null if name cannot be split at a dot */ private AreaReference parseArea(String name) { @@ -323,6 +330,8 @@ public final class FormulaParser { return null; } } + // This expression is only valid as an area ref, if the LHS and RHS of the dot(s) are both + // cell refs. Otherwise, this expression must be a named range name String partA = name.substring(0, dotPos); if (!isValidCellReference(partA)) { return null; @@ -336,12 +345,14 @@ public final class FormulaParser { return new AreaReference(topLeft, bottomRight); } + /** + * @return true if the specified name is a valid cell reference + */ private static boolean isValidCellReference(String str) { - // TODO - exact rules for recognising cell references may be too complicated for regex - return CELL_REFERENCE_PATTERN.matcher(str).matches(); + return CellReference.classifyCellReference(str) == NameType.CELL; } - - + + /** * Note - Excel function names are 'case aware but not case sensitive'. This method may end * up creating a defined name record in the workbook if the specified name is not an internal diff --git a/src/java/org/apache/poi/hssf/record/formula/SheetNameFormatter.java b/src/java/org/apache/poi/hssf/record/formula/SheetNameFormatter.java index 8e47cbe7a0..ace857da1e 100755 --- a/src/java/org/apache/poi/hssf/record/formula/SheetNameFormatter.java +++ b/src/java/org/apache/poi/hssf/record/formula/SheetNameFormatter.java @@ -14,13 +14,14 @@ See the License for the specific language governing permissions and limitations under the License. ==================================================================== */ - package org.apache.poi.hssf.record.formula; import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.apache.poi.hssf.util.CellReference; + /** * Formats sheet names for use in formula expressions. * @@ -28,14 +29,12 @@ import java.util.regex.Pattern; */ public final class SheetNameFormatter { - private static final String BIFF8_LAST_COLUMN = "IV"; - private static final int BIFF8_LAST_COLUMN_TEXT_LEN = BIFF8_LAST_COLUMN.length(); - private static final String BIFF8_LAST_ROW = String.valueOf(0x10000); - private static final int BIFF8_LAST_ROW_TEXT_LEN = BIFF8_LAST_ROW.length(); - private static final char DELIMITER = '\''; - private static final Pattern CELL_REF_PATTERN = Pattern.compile("([A-Za-z])+[0-9]+"); + /** + * Matches a single cell ref with no absolute ('$') markers + */ + private static final Pattern CELL_REF_PATTERN = Pattern.compile("([A-Za-z]+)([0-9]+)"); private SheetNameFormatter() { // no instances of this class @@ -105,27 +104,27 @@ public final class SheetNameFormatter { return false; } - /** - * @return true if the presence of the specified character in a sheet name would - * require the sheet name to be delimited in formulas. This includes every non-alphanumeric - * character besides underscore '_'. - */ - /* package */ static boolean isSpecialChar(char ch) { - // note - Character.isJavaIdentifierPart() would allow dollars '$' - if(Character.isLetterOrDigit(ch)) { - return false; - } - switch(ch) { - case '_': // underscore is ok - return false; - case '\n': - case '\r': - case '\t': - throw new RuntimeException("Illegal character (0x" - + Integer.toHexString(ch) + ") found in sheet name"); - } - return true; - } + /** + * @return true if the presence of the specified character in a sheet name would + * require the sheet name to be delimited in formulas. This includes every non-alphanumeric + * character besides underscore '_'. + */ + /* package */ static boolean isSpecialChar(char ch) { + // note - Character.isJavaIdentifierPart() would allow dollars '$' + if(Character.isLetterOrDigit(ch)) { + return false; + } + switch(ch) { + case '_': // underscore is ok + return false; + case '\n': + case '\r': + case '\t': + throw new RuntimeException("Illegal character (0x" + + Integer.toHexString(ch) + ") found in sheet name"); + } + return true; + } /** @@ -149,64 +148,11 @@ public final class SheetNameFormatter { *

* For better or worse this implementation attempts to replicate Excel's formula renderer. * Excel uses range checking on the apparent 'row' and 'column' components. Note however that - * the maximum sheet size varies across versions: - *

- *

- * - * - * - * - *
Version  File Format  Last Column  Last Row
97-2003BIFF8"IV" (2^8)65536 (2^14)
2007BIFF12"XFD" (2^14)1048576 (2^20)
- * POI currently targets BIFF8 (Excel 97-2003), so the following behaviour can be observed for - * this method: - *
- * - * - * - * - * - * - * - * - * - * - * - *
Input           Result 
"A1", 1true
"a111", 1true
"A65536", 1true
"A65537", 1false
"iv1", 2true
"IW1", 2false
"AAA1", 3false
"a111", 1true
"Sheet1", 6false
+ * the maximum sheet size varies across versions. + * @see org.apache.poi.hssf.util.CellReference */ - /* package */ static boolean cellReferenceIsWithinRange(String rawSheetName, int numberOfLetters) { - - if(numberOfLetters > BIFF8_LAST_COLUMN_TEXT_LEN) { - // "Sheet1" case etc - return false; // that was easy - } - int nDigits = rawSheetName.length() - numberOfLetters; - if(nDigits > BIFF8_LAST_ROW_TEXT_LEN) { - return false; - } - if(numberOfLetters == BIFF8_LAST_COLUMN_TEXT_LEN) { - String colStr = rawSheetName.substring(0, BIFF8_LAST_COLUMN_TEXT_LEN).toUpperCase(); - if(colStr.compareTo(BIFF8_LAST_COLUMN) > 0) { - return false; - } - } else { - // apparent column name has less chars than max - // no need to check range - } - - if(nDigits == BIFF8_LAST_ROW_TEXT_LEN) { - String colStr = rawSheetName.substring(numberOfLetters); - // ASCII comparison is valid if digit count is same - if(colStr.compareTo(BIFF8_LAST_ROW) > 0) { - return false; - } - } else { - // apparent row has less chars than max - // no need to check range - } - - return true; + /* package */ static boolean cellReferenceIsWithinRange(String lettersPrefix, String numbersSuffix) { + return CellReference.cellReferenceIsWithinRange(lettersPrefix, numbersSuffix); } /** @@ -239,7 +185,7 @@ public final class SheetNameFormatter { // rawSheetName == "Sheet1" gets this far. String lettersPrefix = matcher.group(1); - return cellReferenceIsWithinRange(rawSheetName, lettersPrefix.length()); + String numbersSuffix = matcher.group(2); + return cellReferenceIsWithinRange(lettersPrefix, numbersSuffix); } - } diff --git a/src/java/org/apache/poi/hssf/util/CellReference.java b/src/java/org/apache/poi/hssf/util/CellReference.java index 47d579d94f..bde5407d63 100644 --- a/src/java/org/apache/poi/hssf/util/CellReference.java +++ b/src/java/org/apache/poi/hssf/util/CellReference.java @@ -17,6 +17,9 @@ package org.apache.poi.hssf.util; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + import org.apache.poi.hssf.record.formula.SheetNameFormatter; /** @@ -25,6 +28,14 @@ import org.apache.poi.hssf.record.formula.SheetNameFormatter; * @author Dennis Doubleday (patch to seperateRowColumns()) */ public final class CellReference { + /** + * Used to classify identifiers found in formulas as cell references or not. + */ + public static final class NameType { + public static final int CELL = 1; + public static final int NAMED_RANGE = 2; + public static final int BAD_CELL_OR_NAMED_RANGE = -1; + } /** The character ($) that signifies a row or column value is absolute instead of relative */ private static final char ABSOLUTE_REFERENCE_MARKER = '$'; /** The character (!) that separates sheet names from cell references */ @@ -32,6 +43,20 @@ public final class CellReference { /** The character (') used to quote sheet names when they contain special characters */ private static final char SPECIAL_NAME_DELIMITER = '\''; + /** + * Matches a run of letters followed by a run of digits. The run of letters is group 1 and the + * run of digits is group 2. Each group may optionally be prefixed with a single '$'. + */ + private static final Pattern CELL_REF_PATTERN = Pattern.compile("\\$?([A-Za-z]+)\\$?([0-9]+)"); + /** + * Named range names must start with a letter or underscore. Subsequent characters may include + * digits or dot. (They can even end in dot). + */ + private static final Pattern NAMED_RANGE_NAME_PATTERN = Pattern.compile("[_A-Za-z][_.A-Za-z0-9]*"); + private static final String BIFF8_LAST_COLUMN = "IV"; + private static final int BIFF8_LAST_COLUMN_TEXT_LEN = BIFF8_LAST_COLUMN.length(); + private static final String BIFF8_LAST_ROW = String.valueOf(0x10000); + private static final int BIFF8_LAST_ROW_TEXT_LEN = BIFF8_LAST_ROW.length(); private final int _rowIndex; private final int _colIndex; @@ -119,8 +144,131 @@ public final class CellReference { return retval-1; } - /** + * Classifies an identifier as either a simple (2D) cell reference or a named range name + * @return one of the values from NameType + */ + public static int classifyCellReference(String str) { + int len = str.length(); + if (len < 1) { + throw new IllegalArgumentException("Empty string not allowed"); + } + char firstChar = str.charAt(0); + switch (firstChar) { + case ABSOLUTE_REFERENCE_MARKER: + case '.': + case '_': + break; + default: + if (!Character.isLetter(firstChar)) { + throw new IllegalArgumentException("Invalid first char (" + firstChar + + ") of cell reference or named range. Letter expected"); + } + } + if (!Character.isDigit(str.charAt(len-1))) { + // no digits at end of str + return validateNamedRangeName(str); + } + Matcher cellRefPatternMatcher = CELL_REF_PATTERN.matcher(str); + if (!cellRefPatternMatcher.matches()) { + return validateNamedRangeName(str); + } + String lettersGroup = cellRefPatternMatcher.group(1); + String digitsGroup = cellRefPatternMatcher.group(2); + if (cellReferenceIsWithinRange(lettersGroup, digitsGroup)) { + // valid cell reference + return NameType.CELL; + } + // If str looks like a cell reference, but is out of (row/col) range, it is a valid + // named range name + // This behaviour is a little weird. For example, "IW123" is a valid named range name + // because the column "IW" is beyond the maximum "IV". Note - this behaviour is version + // dependent. In Excel 2007, "IW123" is not a valid named range name. + if (str.indexOf(ABSOLUTE_REFERENCE_MARKER) >= 0) { + // Of course, named range names cannot have '$' + return NameType.BAD_CELL_OR_NAMED_RANGE; + } + return NameType.NAMED_RANGE; + } + + private static int validateNamedRangeName(String str) { + if (!NAMED_RANGE_NAME_PATTERN.matcher(str).matches()) { + return NameType.BAD_CELL_OR_NAMED_RANGE; + } + return NameType.NAMED_RANGE; + + } + + + /** + * Used to decide whether a name of the form "[A-Z]*[0-9]*" that appears in a formula can be + * interpreted as a cell reference. Names of that form can be also used for sheets and/or + * named ranges, and in those circumstances, the question of whether the potential cell + * reference is valid (in range) becomes important. + *

+ * Note - that the maximum sheet size varies across Excel versions: + *

+ *

+ * + * + * + * + *
Version  File Format  Last Column  Last Row
97-2003BIFF8"IV" (2^8)65536 (2^14)
2007BIFF12"XFD" (2^14)1048576 (2^20)
+ * POI currently targets BIFF8 (Excel 97-2003), so the following behaviour can be observed for + * this method: + *
+ * + * + * + * + * + * + * + * + * + * + * + *
Input           Result 
"A", "1"true
"a", "111"true
"A", "65536"true
"A", "65537"false
"iv", "1"true
"IW", "1"false
"AAA", "1"false
"a", "111"true
"Sheet", "1"false
+ * + * @param colStr a string of only letter characters + * @param rowStr a string of only digit characters + * @return true if the row and col parameters are within range of a BIFF8 spreadsheet. + */ + public static boolean cellReferenceIsWithinRange(String colStr, String rowStr) { + int numberOfLetters = colStr.length(); + if(numberOfLetters > BIFF8_LAST_COLUMN_TEXT_LEN) { + // "Sheet1" case etc + return false; // that was easy + } + int nDigits = rowStr.length(); + if(nDigits > BIFF8_LAST_ROW_TEXT_LEN) { + return false; + } + if(numberOfLetters == BIFF8_LAST_COLUMN_TEXT_LEN) { + if(colStr.toUpperCase().compareTo(BIFF8_LAST_COLUMN) > 0) { + return false; + } + } else { + // apparent column name has less chars than max + // no need to check range + } + + if(nDigits == BIFF8_LAST_ROW_TEXT_LEN) { + // ASCII comparison is valid if digit count is same + if(rowStr.compareTo(BIFF8_LAST_ROW) > 0) { + return false; + } + } else { + // apparent row has less chars than max + // no need to check range + } + + return true; + } + + /** * Separates the row from the columns and returns an array of three Strings. The first element * is the sheet name. Only the first element may be null. The second element in is the column * name still in ALPHA-26 number format. The third element is the row. diff --git a/src/testcases/org/apache/poi/hssf/model/TestFormulaParser.java b/src/testcases/org/apache/poi/hssf/model/TestFormulaParser.java index 115a7e081b..a21850887c 100644 --- a/src/testcases/org/apache/poi/hssf/model/TestFormulaParser.java +++ b/src/testcases/org/apache/poi/hssf/model/TestFormulaParser.java @@ -46,6 +46,7 @@ import org.apache.poi.hssf.record.formula.SubtractPtg; import org.apache.poi.hssf.record.formula.UnaryMinusPtg; import org.apache.poi.hssf.record.formula.UnaryPlusPtg; import org.apache.poi.hssf.usermodel.HSSFCell; +import org.apache.poi.hssf.usermodel.HSSFName; import org.apache.poi.hssf.usermodel.HSSFRow; import org.apache.poi.hssf.usermodel.HSSFSheet; import org.apache.poi.hssf.usermodel.HSSFWorkbook; @@ -791,4 +792,37 @@ public final class TestFormulaParser extends TestCase { assertEquals("ERROR.TYPE", funcPtg.getName()); } + public void testNamedRangeThatLooksLikeCell() { + HSSFWorkbook wb = new HSSFWorkbook(); + HSSFSheet sheet = wb.createSheet("Sheet1"); + HSSFName name = wb.createName(); + name.setReference("Sheet1!B1"); + name.setNameName("pfy1"); + + Ptg[] ptgs; + try { + ptgs = FormulaParser.parse("count(pfy1)", wb); + } catch (IllegalArgumentException e) { + if (e.getMessage().equals("Specified colIx (1012) is out of range")) { + throw new AssertionFailedError("Identified bug 45354"); + } + throw e; + } + assertEquals(2, ptgs.length); + assertEquals(NamePtg.class, ptgs[0].getClass()); + + HSSFCell cell = sheet.createRow(0).createCell((short)0); + cell.setCellFormula("count(pfy1)"); + assertEquals("COUNT(pfy1)", cell.getCellFormula()); + try { + cell.setCellFormula("count(pf1)"); + throw new AssertionFailedError("Expected formula parse execption"); + } catch (FormulaParseException e) { + if (!e.getMessage().equals("Specified named range 'pf1' does not exist in the current workbook.")) { + throw e; + } + } + cell.setCellFormula("count(fp1)"); // plain cell ref, col is in range + + } } diff --git a/src/testcases/org/apache/poi/hssf/record/formula/TestSheetNameFormatter.java b/src/testcases/org/apache/poi/hssf/record/formula/TestSheetNameFormatter.java index 768c429580..b6dd1970e4 100644 --- a/src/testcases/org/apache/poi/hssf/record/formula/TestSheetNameFormatter.java +++ b/src/testcases/org/apache/poi/hssf/record/formula/TestSheetNameFormatter.java @@ -14,7 +14,6 @@ See the License for the specific language governing permissions and limitations under the License. ==================================================================== */ - package org.apache.poi.hssf.record.formula; @@ -76,7 +75,9 @@ public final class TestSheetNameFormatter extends TestCase { } private static void confirmCellRange(String text, int numberOfPrefixLetters, boolean expected) { - assertEquals(expected, SheetNameFormatter.cellReferenceIsWithinRange(text, numberOfPrefixLetters)); + String prefix = text.substring(0, numberOfPrefixLetters); + String suffix = text.substring(numberOfPrefixLetters); + assertEquals(expected, SheetNameFormatter.cellReferenceIsWithinRange(prefix, suffix)); } /** @@ -93,7 +94,7 @@ public final class TestSheetNameFormatter extends TestCase { confirmCellRange("AAA1", 3, false); confirmCellRange("a111", 1, true); confirmCellRange("Sheet1", 6, false); - confirmCellRange("iV65536", 2, true); // max cell in Excel 97-2003 - confirmCellRange("IW65537", 2, false); + confirmCellRange("iV65536", 2, true); // max cell in Excel 97-2003 + confirmCellRange("IW65537", 2, false); } } diff --git a/src/testcases/org/apache/poi/hssf/util/TestCellReference.java b/src/testcases/org/apache/poi/hssf/util/TestCellReference.java index 648fb9a8e3..8ec8f99467 100644 --- a/src/testcases/org/apache/poi/hssf/util/TestCellReference.java +++ b/src/testcases/org/apache/poi/hssf/util/TestCellReference.java @@ -20,6 +20,8 @@ package org.apache.poi.hssf.util; import junit.framework.TestCase; +import org.apache.poi.hssf.util.CellReference.NameType; + public final class TestCellReference extends TestCase { @@ -75,7 +77,6 @@ public final class TestCellReference extends TestCase { confirmCell(cf, "Amazing!", 0, 0, false, false, "'Amazing!'!A1"); } - /* package */ static void confirmCell(CellReference cf, String expSheetName, int expRow, int expCol, boolean expIsRowAbs, boolean expIsColAbs, String expText) { @@ -87,8 +88,22 @@ public final class TestCellReference extends TestCase { assertEquals("text is wrong", expText, cf.formatAsString()); } - public static void main(String [] args) { - System.out.println("Testing org.apache.poi.hssf.util.TestCellReference"); - junit.textui.TestRunner.run(TestCellReference.class); + public void testClassifyCellReference() { + confirmNameType("a1", NameType.CELL); + confirmNameType("pfy1", NameType.NAMED_RANGE); + confirmNameType("pf1", NameType.NAMED_RANGE); // (col) out of cell range + confirmNameType("fp1", NameType.CELL); + confirmNameType("pf$1", NameType.BAD_CELL_OR_NAMED_RANGE); + confirmNameType("_A1", NameType.NAMED_RANGE); + confirmNameType("A_1", NameType.NAMED_RANGE); + confirmNameType("A1_", NameType.NAMED_RANGE); + confirmNameType(".A1", NameType.BAD_CELL_OR_NAMED_RANGE); + confirmNameType("A.1", NameType.NAMED_RANGE); + confirmNameType("A1.", NameType.NAMED_RANGE); + } + + private void confirmNameType(String ref, int expectedResult) { + int actualResult = CellReference.classifyCellReference(ref); + assertEquals(expectedResult, actualResult); } } -- 2.39.5