diff options
author | Nick Burch <nick@apache.org> | 2008-07-10 20:37:09 +0000 |
---|---|---|
committer | Nick Burch <nick@apache.org> | 2008-07-10 20:37:09 +0000 |
commit | 99b7fbfaea7af348bd13c61cd16cb5a3c8dc509a (patch) | |
tree | 6d08f74c6e5c988f4a743ef6b26892d33cf66902 /src/java/org/apache/poi/ss/util/CellReference.java | |
parent | 1f301d019ba0c2bc66cf05a91f68c7b85176dae1 (diff) | |
download | poi-99b7fbfaea7af348bd13c61cd16cb5a3c8dc509a.tar.gz poi-99b7fbfaea7af348bd13c61cd16cb5a3c8dc509a.zip |
Merged revisions 638786-638802,638805-638811,638813-638814,638816-639230,639233-639241,639243-639253,639255-639486,639488-639601,639603-639835,639837-639917,639919-640056,640058-640710,640712-641156,641158-641184,641186-641795,641797-641798,641800-641933,641935-641963,641965-641966,641968-641995,641997-642230,642232-642562,642564-642565,642568-642570,642572-642573,642576-642736,642739-642877,642879,642881-642890,642892-642903,642905-642945,642947-643624,643626-643653,643655-643669,643671,643673-643830,643832-643833,643835-644342,644344-644472,644474-644508,644510-645347,645349-645351,645353-645559,645561-645565,645568-645951,645953-646193,646195-646311,646313-646404,646406-646665,646667-646853,646855-646869,646871-647151,647153-647185,647187-647277,647279-647566,647568-647573,647575,647578-647711,647714-647737,647739-647823,647825-648155,648157-648202,648204-648273,648275,648277-648302,648304-648333,648335-648588,648590-648622,648625-648673,648675-649141,649144,649146-649556,649558-649795,649799,649801-649910,649912-649913,649915-650128,650131-650132,650134-650137,650140-650914,650916-651991,651993-652284,652286-652287,652289,652291,652293-652297,652299-652328,652330-652425,652427-652445,652447-652560,652562-652933,652935,652937-652993,652995-653116,653118-653124,653126-653483,653487-653519,653522-653550,653552-653607,653609-653667,653669-653674,653676-653814,653817-653830,653832-653891,653893-653944,653946-654055,654057-654355,654357-654365,654367-654648,654651-655215,655217-655277,655279-655281,655283-655911,655913-656212,656214,656216-656251,656253-656698,656700-656756,656758-656892,656894-657135,657137-657165,657168-657179,657181-657354,657356-657357,657359-657701,657703-657874,657876-658032,658034-658284,658286,658288-658301,658303-658307,658309-658321,658323-658335,658337-658348,658351,658353-658832,658834-658983,658985,658987-659066,659068-659402,659404-659428,659430-659451,659453-659454,659456-659461,659463-659477,659479-659524,659526-659571,659574,659576-660255,660257-660262,660264-660279,660281-660343,660345-660473,660475-660827,660829-660833,660835-660888,660890-663321,663323-663435,663437-663764,663766-663854,663856-664219,664221-664489,664494-664514,664516-668013,668015-668142,668144-668152,668154,668156-668256,668258,668260-669139,669141-669455,669457-669657,669659-669808,669810-670189,670191-671321,671323-672229,672231-672549,672551-672552,672554-672561,672563-672566,672568,672571-673049,673051-673852,673854-673862,673864-673986,673988-673996,673998-674347,674349-675673 via svnmerge from
https://svn.apache.org:443/repos/asf/poi/trunk
........
r674891 | nick | 2008-07-08 18:31:36 +0100 (Tue, 08 Jul 2008) | 1 line
Test to check that formula stuff is mostly working with zip codes
........
r674911 | josh | 2008-07-08 19:56:21 +0100 (Tue, 08 Jul 2008) | 1 line
Fix bug #45338 - JDK 1.4/1.5 issue from r673997
........
r674937 | josh | 2008-07-08 21:19:40 +0100 (Tue, 08 Jul 2008) | 1 line
Hooked up new junit in suite (from bug 45338)
........
r674953 | josh | 2008-07-08 22:00:13 +0100 (Tue, 08 Jul 2008) | 1 line
Fix for bug 45354 - Proper distinguishing of cell references and named ranges within formulas
........
r675079 | josh | 2008-07-09 02:45:33 +0100 (Wed, 09 Jul 2008) | 1 line
Fix for bug 45348 - required tweaks to RVA formula logic
........
r675086 | josh | 2008-07-09 03:15:52 +0100 (Wed, 09 Jul 2008) | 2 lines
Minor junit fix after r674953 (bug 45354). Hooked up loose tests in suite.
........
r675218 | josh | 2008-07-09 15:58:06 +0100 (Wed, 09 Jul 2008) | 1 line
Fix for bug 45367 - fixed boundary case when row zero is the last row removed from the sheet
........
r675661 | nick | 2008-07-10 18:52:33 +0100 (Thu, 10 Jul 2008) | 1 line
Apply some changes inspired by bug # 45373, which improve the performance of HSSFSheet.shiftRows
........
r675671 | nick | 2008-07-10 19:41:25 +0100 (Thu, 10 Jul 2008) | 1 line
Fix bug #45365 - Handle more excel number formatting rules in FormatTrackingHSSFListener / XLS2CSVmra
........
git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@675718 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'src/java/org/apache/poi/ss/util/CellReference.java')
-rw-r--r-- | src/java/org/apache/poi/ss/util/CellReference.java | 262 |
1 files changed, 203 insertions, 59 deletions
diff --git a/src/java/org/apache/poi/ss/util/CellReference.java b/src/java/org/apache/poi/ss/util/CellReference.java index 4f90311f3b..b46cd88ccb 100644 --- a/src/java/org/apache/poi/ss/util/CellReference.java +++ b/src/java/org/apache/poi/ss/util/CellReference.java @@ -17,17 +17,26 @@ package org.apache.poi.ss.util; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + import org.apache.poi.hssf.record.formula.SheetNameFormatter; /** - * Common convertion functions between Excel style A1, C27 style - * cell references, and POI usermodel style row=0, column=0 - * style references. - * Applys for both HSSF and XSSF. + * * @author Avik Sengupta * @author Dennis Doubleday (patch to seperateRowColumns()) */ public class CellReference { + /** + * Used to classify identifiers found in formulas as cell references or not. + */ + public static final class NameType { + public static final int CELL = 1; + public static final int NAMED_RANGE = 2; + public static final int BAD_CELL_OR_NAMED_RANGE = -1; + } + /** The character ($) that signifies a row or column value is absolute instead of relative */ private static final char ABSOLUTE_REFERENCE_MARKER = '$'; /** The character (!) that separates sheet names from cell references */ @@ -35,6 +44,20 @@ public class CellReference { /** The character (') used to quote sheet names when they contain special characters */ private static final char SPECIAL_NAME_DELIMITER = '\''; + /** + * Matches a run of letters followed by a run of digits. The run of letters is group 1 and the + * run of digits is group 2. Each group may optionally be prefixed with a single '$'. + */ + private static final Pattern CELL_REF_PATTERN = Pattern.compile("\\$?([A-Za-z]+)\\$?([0-9]+)"); + /** + * Named range names must start with a letter or underscore. Subsequent characters may include + * digits or dot. (They can even end in dot). + */ + private static final Pattern NAMED_RANGE_NAME_PATTERN = Pattern.compile("[_A-Za-z][_.A-Za-z0-9]*"); + private static final String BIFF8_LAST_COLUMN = "IV"; + private static final int BIFF8_LAST_COLUMN_TEXT_LEN = BIFF8_LAST_COLUMN.length(); + private static final String BIFF8_LAST_ROW = String.valueOf(0x10000); + private static final int BIFF8_LAST_ROW_TEXT_LEN = BIFF8_LAST_ROW.length(); private final int _rowIndex; private final int _colIndex; @@ -70,13 +93,13 @@ public class CellReference { _rowIndex = Integer.parseInt(rowRef)-1; // -1 to convert 1-based to zero-based } - /** - * Creates a cell reference for the given row and cell. - * Assumes these references are relative - */ - public CellReference(int row, int col) { - this(row, col, false, false); + public CellReference(int pRow, int pCol) { + this(pRow, pCol, false, false); } + public CellReference(int pRow, short pCol) { + this(pRow, (int)pCol, false, false); + } + public CellReference(int pRow, int pCol, boolean pAbsRow, boolean pAbsCol) { this(null, pRow, pCol, pAbsRow, pAbsCol); } @@ -97,7 +120,7 @@ public class CellReference { } public int getRow(){return _rowIndex;} - public int getCol(){return _colIndex;} + public short getCol(){return (short) _colIndex;} public boolean isRowAbsolute(){return _isRowAbs;} public boolean isColAbsolute(){return _isColAbs;} /** @@ -111,27 +134,148 @@ public class CellReference { /** * takes in a column reference portion of a CellRef and converts it from * ALPHA-26 number format to 0-based base 10. - * ALPHA-26 goes A to Z, then AA to AZ, BA to BZ, ..., ZA to ZZ, - * AAA to AAZ, ABA to ABZ, ..., AZA to AZZ, BAA to BAZ etc */ private int convertColStringToNum(String ref) { - int lastIx = ref.length()-1; - int retval=0; - int pos = 0; + int lastIx = ref.length()-1; + int retval=0; + int pos = 0; + + for (int k = lastIx; k > -1; k--) { + char thechar = ref.charAt(k); + // Character.getNumericValue() returns the values + // 10-35 for the letter A-Z + int shift = (int)Math.pow(26, pos); + retval += (Character.getNumericValue(thechar)-9) * shift; + pos++; + } + return retval-1; + } - for (int k = lastIx; k > -1; k--) { - char thechar = ref.charAt(k); - // Character.getNumericValue() returns the values - // 10-35 for the letter A-Z - int shift = (int)Math.pow(26, pos); - retval += (Character.getNumericValue(thechar)-9) * shift; - pos++; - } - return retval-1; + /** + * Classifies an identifier as either a simple (2D) cell reference or a named range name + * @return one of the values from <tt>NameType</tt> + */ + public static int classifyCellReference(String str) { + int len = str.length(); + if (len < 1) { + throw new IllegalArgumentException("Empty string not allowed"); + } + char firstChar = str.charAt(0); + switch (firstChar) { + case ABSOLUTE_REFERENCE_MARKER: + case '.': + case '_': + break; + default: + if (!Character.isLetter(firstChar)) { + throw new IllegalArgumentException("Invalid first char (" + firstChar + + ") of cell reference or named range. Letter expected"); + } + } + if (!Character.isDigit(str.charAt(len-1))) { + // no digits at end of str + return validateNamedRangeName(str); + } + Matcher cellRefPatternMatcher = CELL_REF_PATTERN.matcher(str); + if (!cellRefPatternMatcher.matches()) { + return validateNamedRangeName(str); + } + String lettersGroup = cellRefPatternMatcher.group(1); + String digitsGroup = cellRefPatternMatcher.group(2); + if (cellReferenceIsWithinRange(lettersGroup, digitsGroup)) { + // valid cell reference + return NameType.CELL; + } + // If str looks like a cell reference, but is out of (row/col) range, it is a valid + // named range name + // This behaviour is a little weird. For example, "IW123" is a valid named range name + // because the column "IW" is beyond the maximum "IV". Note - this behaviour is version + // dependent. In Excel 2007, "IW123" is not a valid named range name. + if (str.indexOf(ABSOLUTE_REFERENCE_MARKER) >= 0) { + // Of course, named range names cannot have '$' + return NameType.BAD_CELL_OR_NAMED_RANGE; + } + return NameType.NAMED_RANGE; } + private static int validateNamedRangeName(String str) { + if (!NAMED_RANGE_NAME_PATTERN.matcher(str).matches()) { + return NameType.BAD_CELL_OR_NAMED_RANGE; + } + return NameType.NAMED_RANGE; + + } + + + /** + * Used to decide whether a name of the form "[A-Z]*[0-9]*" that appears in a formula can be + * interpreted as a cell reference. Names of that form can be also used for sheets and/or + * named ranges, and in those circumstances, the question of whether the potential cell + * reference is valid (in range) becomes important. + * <p/> + * Note - that the maximum sheet size varies across Excel versions: + * <p/> + * <blockquote><table border="0" cellpadding="1" cellspacing="0" + * summary="Notable cases."> + * <tr><th>Version </th><th>File Format </th> + * <th>Last Column </th><th>Last Row</th></tr> + * <tr><td>97-2003</td><td>BIFF8</td><td>"IV" (2^8)</td><td>65536 (2^14)</td></tr> + * <tr><td>2007</td><td>BIFF12</td><td>"XFD" (2^14)</td><td>1048576 (2^20)</td></tr> + * </table></blockquote> + * POI currently targets BIFF8 (Excel 97-2003), so the following behaviour can be observed for + * this method: + * <blockquote><table border="0" cellpadding="1" cellspacing="0" + * summary="Notable cases."> + * <tr><th>Input </th> + * <th>Result </th></tr> + * <tr><td>"A", "1"</td><td>true</td></tr> + * <tr><td>"a", "111"</td><td>true</td></tr> + * <tr><td>"A", "65536"</td><td>true</td></tr> + * <tr><td>"A", "65537"</td><td>false</td></tr> + * <tr><td>"iv", "1"</td><td>true</td></tr> + * <tr><td>"IW", "1"</td><td>false</td></tr> + * <tr><td>"AAA", "1"</td><td>false</td></tr> + * <tr><td>"a", "111"</td><td>true</td></tr> + * <tr><td>"Sheet", "1"</td><td>false</td></tr> + * </table></blockquote> + * + * @param colStr a string of only letter characters + * @param rowStr a string of only digit characters + * @return <code>true</code> if the row and col parameters are within range of a BIFF8 spreadsheet. + */ + public static boolean cellReferenceIsWithinRange(String colStr, String rowStr) { + int numberOfLetters = colStr.length(); + if(numberOfLetters > BIFF8_LAST_COLUMN_TEXT_LEN) { + // "Sheet1" case etc + return false; // that was easy + } + int nDigits = rowStr.length(); + if(nDigits > BIFF8_LAST_ROW_TEXT_LEN) { + return false; + } + if(numberOfLetters == BIFF8_LAST_COLUMN_TEXT_LEN) { + if(colStr.toUpperCase().compareTo(BIFF8_LAST_COLUMN) > 0) { + return false; + } + } else { + // apparent column name has less chars than max + // no need to check range + } + + if(nDigits == BIFF8_LAST_ROW_TEXT_LEN) { + // ASCII comparison is valid if digit count is same + if(rowStr.compareTo(BIFF8_LAST_ROW) > 0) { + return false; + } + } else { + // apparent row has less chars than max + // no need to check range + } + + return true; + } - /** + /** * Separates the row from the columns and returns an array of three Strings. The first element * is the sheet name. Only the first element may be null. The second element in is the column * name still in ALPHA-26 number format. The third element is the row. @@ -212,24 +356,24 @@ public class CellReference { * eg column #3 -> D */ protected static String convertNumToColString(int col) { - // Excel counts column A as the 1st column, we - // treat it as the 0th one - int excelColNum = col + 1; - - String colRef = ""; - int colRemain = excelColNum; - - while(colRemain > 0) { - int thisPart = colRemain % 26; - if(thisPart == 0) { thisPart = 26; } - colRemain = (colRemain - thisPart) / 26; - - // The letter A is at 65 - char colChar = (char)(thisPart+64); - colRef = colChar + colRef; - } - - return colRef; + // Excel counts column A as the 1st column, we + // treat it as the 0th one + int excelColNum = col + 1; + + String colRef = ""; + int colRemain = excelColNum; + + while(colRemain > 0) { + int thisPart = colRemain % 26; + if(thisPart == 0) { thisPart = 26; } + colRemain = (colRemain - thisPart) / 26; + + // The letter A is at 65 + char colChar = (char)(thisPart+64); + colRef = colChar + colRef; + } + + return colRef; } /** @@ -260,21 +404,21 @@ public class CellReference { return sb.toString(); } - /** - * Returns the three parts of the cell reference, the - * Sheet name (or null if none supplied), the 1 based - * row number, and the A based column letter. - * This will not include any markers for absolute - * references, so use {@link #formatAsString()} - * to properly turn references into strings. - */ - public String[] getCellRefParts() { - return new String[] { - _sheetName, - Integer.toString(_rowIndex+1), - convertNumToColString(_colIndex) - }; - } + /** + * Returns the three parts of the cell reference, the + * Sheet name (or null if none supplied), the 1 based + * row number, and the A based column letter. + * This will not include any markers for absolute + * references, so use {@link #formatAsString()} + * to properly turn references into strings. + */ + public String[] getCellRefParts() { + return new String[] { + _sheetName, + Integer.toString(_rowIndex+1), + convertNumToColString(_colIndex) + }; + } /** * Appends cell reference with '$' markers for absolute values as required. |