<!-- Don't forget to update status.xml too! -->
<release version="3.1.1-alpha1" date="2008-??-??">
+ <action dev="POI-DEVELOPERS" type="fix">45354 - Fixed recognition of named ranges within formulas</action>
<action dev="POI-DEVELOPERS" type="fix">45338 - Fix HSSFWorkbook to give you the same HSSFFont every time, and then fix it to find newly added fonts</action>
<action dev="POI-DEVELOPERS" type="fix">45336 - Fix HSSFColor.getTripletHash()</action>
<action dev="POI-DEVELOPERS" type="fix">45334 - Fixed formula parser to handle dots in identifiers</action>
<!-- Don't forget to update changes.xml too! -->
<changes>
<release version="3.1.1-alpha1" date="2008-??-??">
+ <action dev="POI-DEVELOPERS" type="fix">45354 - Fixed recognition of named ranges within formulas</action>
<action dev="POI-DEVELOPERS" type="fix">45338 - Fix HSSFWorkbook to give you the same HSSFFont every time, and then fix it to find newly added fonts</action>
<action dev="POI-DEVELOPERS" type="fix">45336 - Fix HSSFColor.getTripletHash()</action>
<action dev="POI-DEVELOPERS" type="fix">45334 - Fixed formula parser to handle dots in identifiers</action>
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hssf.util.AreaReference;
import org.apache.poi.hssf.util.CellReference;
+import org.apache.poi.hssf.util.CellReference.NameType;
/**
* This class parses a formula string into a List of tokens in RPN order.
// This can be either a cell ref or a named range
// Try to spot which it is
- if (isValidCellReference(name)) {
+ int nameType = CellReference.classifyCellReference(name);
+ if (nameType == NameType.CELL) {
return new RefPtg(name);
}
+ if (nameType != NameType.NAMED_RANGE) {
+ new FormulaParseException("Name '" + name
+ + "' does not look like a cell reference or named range");
+ }
for(int i = 0; i < book.getNumberOfNames(); i++) {
// named range name matching is case insensitive
return new NamePtg(name, book);
}
}
- throw new FormulaParseException("Found reference to named range \""
- + name + "\", but that named range wasn't defined!");
+ throw new FormulaParseException("Specified named range '"
+ + name + "' does not exist in the current workbook.");
}
/**
+ * @param name an 'identifier' like string (i.e. contains alphanums, and dots)
* @return <code>null</code> if name cannot be split at a dot
*/
private AreaReference parseArea(String name) {
return null;
}
}
+ // This expression is only valid as an area ref, if the LHS and RHS of the dot(s) are both
+ // cell refs. Otherwise, this expression must be a named range name
String partA = name.substring(0, dotPos);
if (!isValidCellReference(partA)) {
return null;
return new AreaReference(topLeft, bottomRight);
}
+ /**
+ * @return <code>true</code> if the specified name is a valid cell reference
+ */
private static boolean isValidCellReference(String str) {
- // TODO - exact rules for recognising cell references may be too complicated for regex
- return CELL_REFERENCE_PATTERN.matcher(str).matches();
+ return CellReference.classifyCellReference(str) == NameType.CELL;
}
-
-
+
+
/**
* Note - Excel function names are 'case aware but not case sensitive'. This method may end
* up creating a defined name record in the workbook if the specified name is not an internal
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
-
package org.apache.poi.hssf.record.formula;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import org.apache.poi.hssf.util.CellReference;
+
/**
* Formats sheet names for use in formula expressions.
*
*/
public final class SheetNameFormatter {
- private static final String BIFF8_LAST_COLUMN = "IV";
- private static final int BIFF8_LAST_COLUMN_TEXT_LEN = BIFF8_LAST_COLUMN.length();
- private static final String BIFF8_LAST_ROW = String.valueOf(0x10000);
- private static final int BIFF8_LAST_ROW_TEXT_LEN = BIFF8_LAST_ROW.length();
-
private static final char DELIMITER = '\'';
- private static final Pattern CELL_REF_PATTERN = Pattern.compile("([A-Za-z])+[0-9]+");
+ /**
+ * Matches a single cell ref with no absolute ('$') markers
+ */
+ private static final Pattern CELL_REF_PATTERN = Pattern.compile("([A-Za-z]+)([0-9]+)");
private SheetNameFormatter() {
// no instances of this class
return false;
}
- /**
- * @return <code>true</code> if the presence of the specified character in a sheet name would
- * require the sheet name to be delimited in formulas. This includes every non-alphanumeric
- * character besides underscore '_'.
- */
- /* package */ static boolean isSpecialChar(char ch) {
- // note - Character.isJavaIdentifierPart() would allow dollars '$'
- if(Character.isLetterOrDigit(ch)) {
- return false;
- }
- switch(ch) {
- case '_': // underscore is ok
- return false;
- case '\n':
- case '\r':
- case '\t':
- throw new RuntimeException("Illegal character (0x"
- + Integer.toHexString(ch) + ") found in sheet name");
- }
- return true;
- }
+ /**
+ * @return <code>true</code> if the presence of the specified character in a sheet name would
+ * require the sheet name to be delimited in formulas. This includes every non-alphanumeric
+ * character besides underscore '_'.
+ */
+ /* package */ static boolean isSpecialChar(char ch) {
+ // note - Character.isJavaIdentifierPart() would allow dollars '$'
+ if(Character.isLetterOrDigit(ch)) {
+ return false;
+ }
+ switch(ch) {
+ case '_': // underscore is ok
+ return false;
+ case '\n':
+ case '\r':
+ case '\t':
+ throw new RuntimeException("Illegal character (0x"
+ + Integer.toHexString(ch) + ") found in sheet name");
+ }
+ return true;
+ }
/**
* <p/>
* For better or worse this implementation attempts to replicate Excel's formula renderer.
* Excel uses range checking on the apparent 'row' and 'column' components. Note however that
- * the maximum sheet size varies across versions:
- * <p/>
- * <blockquote><table border="0" cellpadding="1" cellspacing="0"
- * summary="Notable cases.">
- * <tr><th>Version </th><th>File Format </th>
- * <th>Last Column </th><th>Last Row</th></tr>
- * <tr><td>97-2003</td><td>BIFF8</td><td>"IV" (2^8)</td><td>65536 (2^14)</td></tr>
- * <tr><td>2007</td><td>BIFF12</td><td>"XFD" (2^14)</td><td>1048576 (2^20)</td></tr>
- * </table></blockquote>
- * POI currently targets BIFF8 (Excel 97-2003), so the following behaviour can be observed for
- * this method:
- * <blockquote><table border="0" cellpadding="1" cellspacing="0"
- * summary="Notable cases.">
- * <tr><th>Input </th>
- * <th>Result </th></tr>
- * <tr><td>"A1", 1</td><td>true</td></tr>
- * <tr><td>"a111", 1</td><td>true</td></tr>
- * <tr><td>"A65536", 1</td><td>true</td></tr>
- * <tr><td>"A65537", 1</td><td>false</td></tr>
- * <tr><td>"iv1", 2</td><td>true</td></tr>
- * <tr><td>"IW1", 2</td><td>false</td></tr>
- * <tr><td>"AAA1", 3</td><td>false</td></tr>
- * <tr><td>"a111", 1</td><td>true</td></tr>
- * <tr><td>"Sheet1", 6</td><td>false</td></tr>
- * </table></blockquote>
+ * the maximum sheet size varies across versions.
+ * @see org.apache.poi.hssf.util.CellReference
*/
- /* package */ static boolean cellReferenceIsWithinRange(String rawSheetName, int numberOfLetters) {
-
- if(numberOfLetters > BIFF8_LAST_COLUMN_TEXT_LEN) {
- // "Sheet1" case etc
- return false; // that was easy
- }
- int nDigits = rawSheetName.length() - numberOfLetters;
- if(nDigits > BIFF8_LAST_ROW_TEXT_LEN) {
- return false;
- }
- if(numberOfLetters == BIFF8_LAST_COLUMN_TEXT_LEN) {
- String colStr = rawSheetName.substring(0, BIFF8_LAST_COLUMN_TEXT_LEN).toUpperCase();
- if(colStr.compareTo(BIFF8_LAST_COLUMN) > 0) {
- return false;
- }
- } else {
- // apparent column name has less chars than max
- // no need to check range
- }
-
- if(nDigits == BIFF8_LAST_ROW_TEXT_LEN) {
- String colStr = rawSheetName.substring(numberOfLetters);
- // ASCII comparison is valid if digit count is same
- if(colStr.compareTo(BIFF8_LAST_ROW) > 0) {
- return false;
- }
- } else {
- // apparent row has less chars than max
- // no need to check range
- }
-
- return true;
+ /* package */ static boolean cellReferenceIsWithinRange(String lettersPrefix, String numbersSuffix) {
+ return CellReference.cellReferenceIsWithinRange(lettersPrefix, numbersSuffix);
}
/**
// rawSheetName == "Sheet1" gets this far.
String lettersPrefix = matcher.group(1);
- return cellReferenceIsWithinRange(rawSheetName, lettersPrefix.length());
+ String numbersSuffix = matcher.group(2);
+ return cellReferenceIsWithinRange(lettersPrefix, numbersSuffix);
}
-
}
package org.apache.poi.hssf.util;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
import org.apache.poi.hssf.record.formula.SheetNameFormatter;
/**
* @author Dennis Doubleday (patch to seperateRowColumns())
*/
public final class CellReference {
+ /**
+ * Used to classify identifiers found in formulas as cell references or not.
+ */
+ public static final class NameType {
+ public static final int CELL = 1;
+ public static final int NAMED_RANGE = 2;
+ public static final int BAD_CELL_OR_NAMED_RANGE = -1;
+ }
/** The character ($) that signifies a row or column value is absolute instead of relative */
private static final char ABSOLUTE_REFERENCE_MARKER = '$';
/** The character (!) that separates sheet names from cell references */
/** The character (') used to quote sheet names when they contain special characters */
private static final char SPECIAL_NAME_DELIMITER = '\'';
+ /**
+ * Matches a run of letters followed by a run of digits. The run of letters is group 1 and the
+ * run of digits is group 2. Each group may optionally be prefixed with a single '$'.
+ */
+ private static final Pattern CELL_REF_PATTERN = Pattern.compile("\\$?([A-Za-z]+)\\$?([0-9]+)");
+ /**
+ * Named range names must start with a letter or underscore. Subsequent characters may include
+ * digits or dot. (They can even end in dot).
+ */
+ private static final Pattern NAMED_RANGE_NAME_PATTERN = Pattern.compile("[_A-Za-z][_.A-Za-z0-9]*");
+ private static final String BIFF8_LAST_COLUMN = "IV";
+ private static final int BIFF8_LAST_COLUMN_TEXT_LEN = BIFF8_LAST_COLUMN.length();
+ private static final String BIFF8_LAST_ROW = String.valueOf(0x10000);
+ private static final int BIFF8_LAST_ROW_TEXT_LEN = BIFF8_LAST_ROW.length();
private final int _rowIndex;
private final int _colIndex;
return retval-1;
}
-
/**
+ * Classifies an identifier as either a simple (2D) cell reference or a named range name
+ * @return one of the values from <tt>NameType</tt>
+ */
+ public static int classifyCellReference(String str) {
+ int len = str.length();
+ if (len < 1) {
+ throw new IllegalArgumentException("Empty string not allowed");
+ }
+ char firstChar = str.charAt(0);
+ switch (firstChar) {
+ case ABSOLUTE_REFERENCE_MARKER:
+ case '.':
+ case '_':
+ break;
+ default:
+ if (!Character.isLetter(firstChar)) {
+ throw new IllegalArgumentException("Invalid first char (" + firstChar
+ + ") of cell reference or named range. Letter expected");
+ }
+ }
+ if (!Character.isDigit(str.charAt(len-1))) {
+ // no digits at end of str
+ return validateNamedRangeName(str);
+ }
+ Matcher cellRefPatternMatcher = CELL_REF_PATTERN.matcher(str);
+ if (!cellRefPatternMatcher.matches()) {
+ return validateNamedRangeName(str);
+ }
+ String lettersGroup = cellRefPatternMatcher.group(1);
+ String digitsGroup = cellRefPatternMatcher.group(2);
+ if (cellReferenceIsWithinRange(lettersGroup, digitsGroup)) {
+ // valid cell reference
+ return NameType.CELL;
+ }
+ // If str looks like a cell reference, but is out of (row/col) range, it is a valid
+ // named range name
+ // This behaviour is a little weird. For example, "IW123" is a valid named range name
+ // because the column "IW" is beyond the maximum "IV". Note - this behaviour is version
+ // dependent. In Excel 2007, "IW123" is not a valid named range name.
+ if (str.indexOf(ABSOLUTE_REFERENCE_MARKER) >= 0) {
+ // Of course, named range names cannot have '$'
+ return NameType.BAD_CELL_OR_NAMED_RANGE;
+ }
+ return NameType.NAMED_RANGE;
+ }
+
+ private static int validateNamedRangeName(String str) {
+ if (!NAMED_RANGE_NAME_PATTERN.matcher(str).matches()) {
+ return NameType.BAD_CELL_OR_NAMED_RANGE;
+ }
+ return NameType.NAMED_RANGE;
+
+ }
+
+
+ /**
+ * Used to decide whether a name of the form "[A-Z]*[0-9]*" that appears in a formula can be
+ * interpreted as a cell reference. Names of that form can be also used for sheets and/or
+ * named ranges, and in those circumstances, the question of whether the potential cell
+ * reference is valid (in range) becomes important.
+ * <p/>
+ * Note - that the maximum sheet size varies across Excel versions:
+ * <p/>
+ * <blockquote><table border="0" cellpadding="1" cellspacing="0"
+ * summary="Notable cases.">
+ * <tr><th>Version </th><th>File Format </th>
+ * <th>Last Column </th><th>Last Row</th></tr>
+ * <tr><td>97-2003</td><td>BIFF8</td><td>"IV" (2^8)</td><td>65536 (2^14)</td></tr>
+ * <tr><td>2007</td><td>BIFF12</td><td>"XFD" (2^14)</td><td>1048576 (2^20)</td></tr>
+ * </table></blockquote>
+ * POI currently targets BIFF8 (Excel 97-2003), so the following behaviour can be observed for
+ * this method:
+ * <blockquote><table border="0" cellpadding="1" cellspacing="0"
+ * summary="Notable cases.">
+ * <tr><th>Input </th>
+ * <th>Result </th></tr>
+ * <tr><td>"A", "1"</td><td>true</td></tr>
+ * <tr><td>"a", "111"</td><td>true</td></tr>
+ * <tr><td>"A", "65536"</td><td>true</td></tr>
+ * <tr><td>"A", "65537"</td><td>false</td></tr>
+ * <tr><td>"iv", "1"</td><td>true</td></tr>
+ * <tr><td>"IW", "1"</td><td>false</td></tr>
+ * <tr><td>"AAA", "1"</td><td>false</td></tr>
+ * <tr><td>"a", "111"</td><td>true</td></tr>
+ * <tr><td>"Sheet", "1"</td><td>false</td></tr>
+ * </table></blockquote>
+ *
+ * @param colStr a string of only letter characters
+ * @param rowStr a string of only digit characters
+ * @return <code>true</code> if the row and col parameters are within range of a BIFF8 spreadsheet.
+ */
+ public static boolean cellReferenceIsWithinRange(String colStr, String rowStr) {
+ int numberOfLetters = colStr.length();
+ if(numberOfLetters > BIFF8_LAST_COLUMN_TEXT_LEN) {
+ // "Sheet1" case etc
+ return false; // that was easy
+ }
+ int nDigits = rowStr.length();
+ if(nDigits > BIFF8_LAST_ROW_TEXT_LEN) {
+ return false;
+ }
+ if(numberOfLetters == BIFF8_LAST_COLUMN_TEXT_LEN) {
+ if(colStr.toUpperCase().compareTo(BIFF8_LAST_COLUMN) > 0) {
+ return false;
+ }
+ } else {
+ // apparent column name has less chars than max
+ // no need to check range
+ }
+
+ if(nDigits == BIFF8_LAST_ROW_TEXT_LEN) {
+ // ASCII comparison is valid if digit count is same
+ if(rowStr.compareTo(BIFF8_LAST_ROW) > 0) {
+ return false;
+ }
+ } else {
+ // apparent row has less chars than max
+ // no need to check range
+ }
+
+ return true;
+ }
+
+ /**
* Separates the row from the columns and returns an array of three Strings. The first element
* is the sheet name. Only the first element may be null. The second element in is the column
* name still in ALPHA-26 number format. The third element is the row.
import org.apache.poi.hssf.record.formula.UnaryMinusPtg;
import org.apache.poi.hssf.record.formula.UnaryPlusPtg;
import org.apache.poi.hssf.usermodel.HSSFCell;
+import org.apache.poi.hssf.usermodel.HSSFName;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
assertEquals("ERROR.TYPE", funcPtg.getName());
}
+ public void testNamedRangeThatLooksLikeCell() {
+ HSSFWorkbook wb = new HSSFWorkbook();
+ HSSFSheet sheet = wb.createSheet("Sheet1");
+ HSSFName name = wb.createName();
+ name.setReference("Sheet1!B1");
+ name.setNameName("pfy1");
+
+ Ptg[] ptgs;
+ try {
+ ptgs = FormulaParser.parse("count(pfy1)", wb);
+ } catch (IllegalArgumentException e) {
+ if (e.getMessage().equals("Specified colIx (1012) is out of range")) {
+ throw new AssertionFailedError("Identified bug 45354");
+ }
+ throw e;
+ }
+ assertEquals(2, ptgs.length);
+ assertEquals(NamePtg.class, ptgs[0].getClass());
+
+ HSSFCell cell = sheet.createRow(0).createCell((short)0);
+ cell.setCellFormula("count(pfy1)");
+ assertEquals("COUNT(pfy1)", cell.getCellFormula());
+ try {
+ cell.setCellFormula("count(pf1)");
+ throw new AssertionFailedError("Expected formula parse execption");
+ } catch (FormulaParseException e) {
+ if (!e.getMessage().equals("Specified named range 'pf1' does not exist in the current workbook.")) {
+ throw e;
+ }
+ }
+ cell.setCellFormula("count(fp1)"); // plain cell ref, col is in range
+
+ }
}
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
-
package org.apache.poi.hssf.record.formula;
}
private static void confirmCellRange(String text, int numberOfPrefixLetters, boolean expected) {
- assertEquals(expected, SheetNameFormatter.cellReferenceIsWithinRange(text, numberOfPrefixLetters));
+ String prefix = text.substring(0, numberOfPrefixLetters);
+ String suffix = text.substring(numberOfPrefixLetters);
+ assertEquals(expected, SheetNameFormatter.cellReferenceIsWithinRange(prefix, suffix));
}
/**
confirmCellRange("AAA1", 3, false);
confirmCellRange("a111", 1, true);
confirmCellRange("Sheet1", 6, false);
- confirmCellRange("iV65536", 2, true); // max cell in Excel 97-2003
- confirmCellRange("IW65537", 2, false);
+ confirmCellRange("iV65536", 2, true); // max cell in Excel 97-2003
+ confirmCellRange("IW65537", 2, false);
}
}
import junit.framework.TestCase;
+import org.apache.poi.hssf.util.CellReference.NameType;
+
public final class TestCellReference extends TestCase {
confirmCell(cf, "Amazing!", 0, 0, false, false, "'Amazing!'!A1");
}
-
/* package */ static void confirmCell(CellReference cf, String expSheetName, int expRow,
int expCol, boolean expIsRowAbs, boolean expIsColAbs, String expText) {
assertEquals("text is wrong", expText, cf.formatAsString());
}
- public static void main(String [] args) {
- System.out.println("Testing org.apache.poi.hssf.util.TestCellReference");
- junit.textui.TestRunner.run(TestCellReference.class);
+ public void testClassifyCellReference() {
+ confirmNameType("a1", NameType.CELL);
+ confirmNameType("pfy1", NameType.NAMED_RANGE);
+ confirmNameType("pf1", NameType.NAMED_RANGE); // (col) out of cell range
+ confirmNameType("fp1", NameType.CELL);
+ confirmNameType("pf$1", NameType.BAD_CELL_OR_NAMED_RANGE);
+ confirmNameType("_A1", NameType.NAMED_RANGE);
+ confirmNameType("A_1", NameType.NAMED_RANGE);
+ confirmNameType("A1_", NameType.NAMED_RANGE);
+ confirmNameType(".A1", NameType.BAD_CELL_OR_NAMED_RANGE);
+ confirmNameType("A.1", NameType.NAMED_RANGE);
+ confirmNameType("A1.", NameType.NAMED_RANGE);
+ }
+
+ private void confirmNameType(String ref, int expectedResult) {
+ int actualResult = CellReference.classifyCellReference(ref);
+ assertEquals(expectedResult, actualResult);
}
}