You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

CellReference.java 17KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520
  1. /* ====================================================================
  2. Licensed to the Apache Software Foundation (ASF) under one or more
  3. contributor license agreements. See the NOTICE file distributed with
  4. this work for additional information regarding copyright ownership.
  5. The ASF licenses this file to You under the Apache License, Version 2.0
  6. (the "License"); you may not use this file except in compliance with
  7. the License. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. ==================================================================== */
  15. package org.apache.poi.ss.util;
  16. import java.util.regex.Matcher;
  17. import java.util.regex.Pattern;
  18. import org.apache.poi.ss.formula.SheetNameFormatter;
  19. import org.apache.poi.ss.SpreadsheetVersion;
  20. import org.apache.poi.ss.usermodel.Cell;
  21. /**
  22. *
  23. * @author Avik Sengupta
  24. * @author Dennis Doubleday (patch to seperateRowColumns())
  25. */
  26. public class CellReference {
  27. /**
  28. * Used to classify identifiers found in formulas as cell references or not.
  29. */
  30. public enum NameType {
  31. CELL,
  32. NAMED_RANGE,
  33. COLUMN,
  34. ROW,
  35. BAD_CELL_OR_NAMED_RANGE;
  36. }
  37. /** The character ($) that signifies a row or column value is absolute instead of relative */
  38. private static final char ABSOLUTE_REFERENCE_MARKER = '$';
  39. /** The character (!) that separates sheet names from cell references */
  40. private static final char SHEET_NAME_DELIMITER = '!';
  41. /** The character (') used to quote sheet names when they contain special characters */
  42. private static final char SPECIAL_NAME_DELIMITER = '\'';
  43. /**
  44. * Matches a run of one or more letters followed by a run of one or more digits.
  45. * The run of letters is group 1 and the run of digits is group 2.
  46. * Each group may optionally be prefixed with a single '$'.
  47. */
  48. private static final Pattern CELL_REF_PATTERN = Pattern.compile("\\$?([A-Za-z]+)\\$?([0-9]+)");
  49. /**
  50. * Matches a run of one or more letters. The run of letters is group 1.
  51. * The text may optionally be prefixed with a single '$'.
  52. */
  53. private static final Pattern COLUMN_REF_PATTERN = Pattern.compile("\\$?([A-Za-z]+)");
  54. /**
  55. * Matches a run of one or more digits. The run of digits is group 1.
  56. * The text may optionally be prefixed with a single '$'.
  57. */
  58. private static final Pattern ROW_REF_PATTERN = Pattern.compile("\\$?([0-9]+)");
  59. /**
  60. * Named range names must start with a letter or underscore. Subsequent characters may include
  61. * digits or dot. (They can even end in dot).
  62. */
  63. private static final Pattern NAMED_RANGE_NAME_PATTERN = Pattern.compile("[_A-Za-z][_.A-Za-z0-9]*");
  64. //private static final String BIFF8_LAST_COLUMN = SpreadsheetVersion.EXCEL97.getLastColumnName();
  65. //private static final int BIFF8_LAST_COLUMN_TEXT_LEN = BIFF8_LAST_COLUMN.length();
  66. //private static final String BIFF8_LAST_ROW = String.valueOf(SpreadsheetVersion.EXCEL97.getMaxRows());
  67. //private static final int BIFF8_LAST_ROW_TEXT_LEN = BIFF8_LAST_ROW.length();
  68. private final int _rowIndex;
  69. private final int _colIndex;
  70. private final String _sheetName;
  71. private final boolean _isRowAbs;
  72. private final boolean _isColAbs;
  73. /**
  74. * Create an cell ref from a string representation. Sheet names containing special characters should be
  75. * delimited and escaped as per normal syntax rules for formulas.
  76. */
  77. public CellReference(String cellRef) {
  78. if(cellRef.endsWith("#REF!")) {
  79. throw new IllegalArgumentException("Cell reference invalid: " + cellRef);
  80. }
  81. String[] parts = separateRefParts(cellRef);
  82. _sheetName = parts[0];
  83. String colRef = parts[1];
  84. _isColAbs = (colRef.length() > 0) && colRef.charAt(0) == '$';
  85. if (_isColAbs) {
  86. colRef = colRef.substring(1);
  87. }
  88. if (colRef.length() == 0) {
  89. _colIndex = -1;
  90. } else {
  91. _colIndex = convertColStringToIndex(colRef);
  92. }
  93. String rowRef=parts[2];
  94. _isRowAbs = (rowRef.length() > 0) && rowRef.charAt(0) == '$';
  95. if (_isRowAbs) {
  96. rowRef = rowRef.substring(1);
  97. }
  98. if (rowRef.length() == 0) {
  99. _rowIndex = -1;
  100. } else {
  101. _rowIndex = Integer.parseInt(rowRef)-1; // -1 to convert 1-based to zero-based
  102. }
  103. }
  104. public CellReference(int pRow, int pCol) {
  105. this(pRow, pCol, false, false);
  106. }
  107. public CellReference(int pRow, short pCol) {
  108. this(pRow, pCol & 0xFFFF, false, false);
  109. }
  110. public CellReference(Cell cell) {
  111. this(cell.getRowIndex(), cell.getColumnIndex(), false, false);
  112. }
  113. public CellReference(int pRow, int pCol, boolean pAbsRow, boolean pAbsCol) {
  114. this(null, pRow, pCol, pAbsRow, pAbsCol);
  115. }
  116. public CellReference(String pSheetName, int pRow, int pCol, boolean pAbsRow, boolean pAbsCol) {
  117. // TODO - "-1" is a special value being temporarily used for whole row and whole column area references.
  118. // so these checks are currently N.Q.R.
  119. if(pRow < -1) {
  120. throw new IllegalArgumentException("row index may not be negative");
  121. }
  122. if(pCol < -1) {
  123. throw new IllegalArgumentException("column index may not be negative");
  124. }
  125. _sheetName = pSheetName;
  126. _rowIndex=pRow;
  127. _colIndex=pCol;
  128. _isRowAbs = pAbsRow;
  129. _isColAbs=pAbsCol;
  130. }
  131. public int getRow(){return _rowIndex;}
  132. public short getCol(){return (short) _colIndex;}
  133. public boolean isRowAbsolute(){return _isRowAbs;}
  134. public boolean isColAbsolute(){return _isColAbs;}
  135. /**
  136. * @return possibly <code>null</code> if this is a 2D reference. Special characters are not
  137. * escaped or delimited
  138. */
  139. public String getSheetName(){
  140. return _sheetName;
  141. }
  142. public static boolean isPartAbsolute(String part) {
  143. return part.charAt(0) == ABSOLUTE_REFERENCE_MARKER;
  144. }
  145. /**
  146. * takes in a column reference portion of a CellRef and converts it from
  147. * ALPHA-26 number format to 0-based base 10.
  148. * 'A' -> 0
  149. * 'Z' -> 25
  150. * 'AA' -> 26
  151. * 'IV' -> 255
  152. * @return zero based column index
  153. */
  154. public static int convertColStringToIndex(String ref) {
  155. int pos = 0;
  156. int retval=0;
  157. for (int k = ref.length()-1; k >= 0; k--) {
  158. char thechar = ref.charAt(k);
  159. if (thechar == ABSOLUTE_REFERENCE_MARKER) {
  160. if (k != 0) {
  161. throw new IllegalArgumentException("Bad col ref format '" + ref + "'");
  162. }
  163. break;
  164. }
  165. // Character.getNumericValue() returns the values
  166. // 10-35 for the letter A-Z
  167. int shift = (int)Math.pow(26, pos);
  168. retval += (Character.getNumericValue(thechar)-9) * shift;
  169. pos++;
  170. }
  171. return retval-1;
  172. }
  173. /**
  174. * Classifies an identifier as either a simple (2D) cell reference or a named range name
  175. * @return one of the values from <tt>NameType</tt>
  176. */
  177. public static NameType classifyCellReference(String str, SpreadsheetVersion ssVersion) {
  178. int len = str.length();
  179. if (len < 1) {
  180. throw new IllegalArgumentException("Empty string not allowed");
  181. }
  182. char firstChar = str.charAt(0);
  183. switch (firstChar) {
  184. case ABSOLUTE_REFERENCE_MARKER:
  185. case '.':
  186. case '_':
  187. break;
  188. default:
  189. if (!Character.isLetter(firstChar) && !Character.isDigit(firstChar)) {
  190. throw new IllegalArgumentException("Invalid first char (" + firstChar
  191. + ") of cell reference or named range. Letter expected");
  192. }
  193. }
  194. if (!Character.isDigit(str.charAt(len-1))) {
  195. // no digits at end of str
  196. return validateNamedRangeName(str, ssVersion);
  197. }
  198. Matcher cellRefPatternMatcher = CELL_REF_PATTERN.matcher(str);
  199. if (!cellRefPatternMatcher.matches()) {
  200. return validateNamedRangeName(str, ssVersion);
  201. }
  202. String lettersGroup = cellRefPatternMatcher.group(1);
  203. String digitsGroup = cellRefPatternMatcher.group(2);
  204. if (cellReferenceIsWithinRange(lettersGroup, digitsGroup, ssVersion)) {
  205. // valid cell reference
  206. return NameType.CELL;
  207. }
  208. // If str looks like a cell reference, but is out of (row/col) range, it is a valid
  209. // named range name
  210. // This behaviour is a little weird. For example, "IW123" is a valid named range name
  211. // because the column "IW" is beyond the maximum "IV". Note - this behaviour is version
  212. // dependent. In BIFF12, "IW123" is not a valid named range name, but in BIFF8 it is.
  213. if (str.indexOf(ABSOLUTE_REFERENCE_MARKER) >= 0) {
  214. // Of course, named range names cannot have '$'
  215. return NameType.BAD_CELL_OR_NAMED_RANGE;
  216. }
  217. return NameType.NAMED_RANGE;
  218. }
  219. private static NameType validateNamedRangeName(String str, SpreadsheetVersion ssVersion) {
  220. Matcher colMatcher = COLUMN_REF_PATTERN.matcher(str);
  221. if (colMatcher.matches()) {
  222. String colStr = colMatcher.group(1);
  223. if (isColumnWithnRange(colStr, ssVersion)) {
  224. return NameType.COLUMN;
  225. }
  226. }
  227. Matcher rowMatcher = ROW_REF_PATTERN.matcher(str);
  228. if (rowMatcher.matches()) {
  229. String rowStr = rowMatcher.group(1);
  230. if (isRowWithnRange(rowStr, ssVersion)) {
  231. return NameType.ROW;
  232. }
  233. }
  234. if (!NAMED_RANGE_NAME_PATTERN.matcher(str).matches()) {
  235. return NameType.BAD_CELL_OR_NAMED_RANGE;
  236. }
  237. return NameType.NAMED_RANGE;
  238. }
  239. /**
  240. * Used to decide whether a name of the form "[A-Z]*[0-9]*" that appears in a formula can be
  241. * interpreted as a cell reference. Names of that form can be also used for sheets and/or
  242. * named ranges, and in those circumstances, the question of whether the potential cell
  243. * reference is valid (in range) becomes important.
  244. * <p/>
  245. * Note - that the maximum sheet size varies across Excel versions:
  246. * <p/>
  247. * <blockquote><table border="0" cellpadding="1" cellspacing="0"
  248. * summary="Notable cases.">
  249. * <tr><th>Version&nbsp;&nbsp;</th><th>File Format&nbsp;&nbsp;</th>
  250. * <th>Last Column&nbsp;&nbsp;</th><th>Last Row</th></tr>
  251. * <tr><td>97-2003</td><td>BIFF8</td><td>"IV" (2^8)</td><td>65536 (2^14)</td></tr>
  252. * <tr><td>2007</td><td>BIFF12</td><td>"XFD" (2^14)</td><td>1048576 (2^20)</td></tr>
  253. * </table></blockquote>
  254. * POI currently targets BIFF8 (Excel 97-2003), so the following behaviour can be observed for
  255. * this method:
  256. * <blockquote><table border="0" cellpadding="1" cellspacing="0"
  257. * summary="Notable cases.">
  258. * <tr><th>Input&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</th>
  259. * <th>Result&nbsp;</th></tr>
  260. * <tr><td>"A", "1"</td><td>true</td></tr>
  261. * <tr><td>"a", "111"</td><td>true</td></tr>
  262. * <tr><td>"A", "65536"</td><td>true</td></tr>
  263. * <tr><td>"A", "65537"</td><td>false</td></tr>
  264. * <tr><td>"iv", "1"</td><td>true</td></tr>
  265. * <tr><td>"IW", "1"</td><td>false</td></tr>
  266. * <tr><td>"AAA", "1"</td><td>false</td></tr>
  267. * <tr><td>"a", "111"</td><td>true</td></tr>
  268. * <tr><td>"Sheet", "1"</td><td>false</td></tr>
  269. * </table></blockquote>
  270. *
  271. * @param colStr a string of only letter characters
  272. * @param rowStr a string of only digit characters
  273. * @return <code>true</code> if the row and col parameters are within range of a BIFF8 spreadsheet.
  274. */
  275. public static boolean cellReferenceIsWithinRange(String colStr, String rowStr, SpreadsheetVersion ssVersion) {
  276. if (!isColumnWithnRange(colStr, ssVersion)) {
  277. return false;
  278. }
  279. return isRowWithnRange(rowStr, ssVersion);
  280. }
  281. public static boolean isColumnWithnRange(String colStr, SpreadsheetVersion ssVersion) {
  282. String lastCol = ssVersion.getLastColumnName();
  283. int lastColLength = lastCol.length();
  284. int numberOfLetters = colStr.length();
  285. if(numberOfLetters > lastColLength) {
  286. // "Sheet1" case etc
  287. return false; // that was easy
  288. }
  289. if(numberOfLetters == lastColLength) {
  290. if(colStr.toUpperCase().compareTo(lastCol) > 0) {
  291. return false;
  292. }
  293. } else {
  294. // apparent column name has less chars than max
  295. // no need to check range
  296. }
  297. return true;
  298. }
  299. public static boolean isRowWithnRange(String rowStr, SpreadsheetVersion ssVersion) {
  300. int rowNum = Integer.parseInt(rowStr);
  301. if (rowNum < 0) {
  302. throw new IllegalStateException("Invalid rowStr '" + rowStr + "'.");
  303. }
  304. if (rowNum == 0) {
  305. // execution gets here because caller does first pass of discriminating
  306. // potential cell references using a simplistic regex pattern.
  307. return false;
  308. }
  309. return rowNum <= ssVersion.getMaxRows();
  310. }
  311. /**
  312. * Separates the row from the columns and returns an array of three Strings. The first element
  313. * is the sheet name. Only the first element may be null. The second element in is the column
  314. * name still in ALPHA-26 number format. The third element is the row.
  315. */
  316. private static String[] separateRefParts(String reference) {
  317. int plingPos = reference.lastIndexOf(SHEET_NAME_DELIMITER);
  318. String sheetName = parseSheetName(reference, plingPos);
  319. int start = plingPos+1;
  320. int length = reference.length();
  321. int loc = start;
  322. // skip initial dollars
  323. if (reference.charAt(loc)==ABSOLUTE_REFERENCE_MARKER) {
  324. loc++;
  325. }
  326. // step over column name chars until first digit (or dollars) for row number.
  327. for (; loc < length; loc++) {
  328. char ch = reference.charAt(loc);
  329. if (Character.isDigit(ch) || ch == ABSOLUTE_REFERENCE_MARKER) {
  330. break;
  331. }
  332. }
  333. return new String[] {
  334. sheetName,
  335. reference.substring(start,loc),
  336. reference.substring(loc),
  337. };
  338. }
  339. private static String parseSheetName(String reference, int indexOfSheetNameDelimiter) {
  340. if(indexOfSheetNameDelimiter < 0) {
  341. return null;
  342. }
  343. boolean isQuoted = reference.charAt(0) == SPECIAL_NAME_DELIMITER;
  344. if(!isQuoted) {
  345. return reference.substring(0, indexOfSheetNameDelimiter);
  346. }
  347. int lastQuotePos = indexOfSheetNameDelimiter-1;
  348. if(reference.charAt(lastQuotePos) != SPECIAL_NAME_DELIMITER) {
  349. throw new RuntimeException("Mismatched quotes: (" + reference + ")");
  350. }
  351. // TODO - refactor cell reference parsing logic to one place.
  352. // Current known incarnations:
  353. // FormulaParser.GetName()
  354. // CellReference.parseSheetName() (here)
  355. // AreaReference.separateAreaRefs()
  356. // SheetNameFormatter.format() (inverse)
  357. StringBuffer sb = new StringBuffer(indexOfSheetNameDelimiter);
  358. for(int i=1; i<lastQuotePos; i++) { // Note boundaries - skip outer quotes
  359. char ch = reference.charAt(i);
  360. if(ch != SPECIAL_NAME_DELIMITER) {
  361. sb.append(ch);
  362. continue;
  363. }
  364. if(i < lastQuotePos) {
  365. if(reference.charAt(i+1) == SPECIAL_NAME_DELIMITER) {
  366. // two consecutive quotes is the escape sequence for a single one
  367. i++; // skip this and keep parsing the special name
  368. sb.append(ch);
  369. continue;
  370. }
  371. }
  372. throw new RuntimeException("Bad sheet name quote escaping: (" + reference + ")");
  373. }
  374. return sb.toString();
  375. }
  376. /**
  377. * Takes in a 0-based base-10 column and returns a ALPHA-26
  378. * representation.
  379. * eg column #3 -> D
  380. */
  381. public static String convertNumToColString(int col) {
  382. // Excel counts column A as the 1st column, we
  383. // treat it as the 0th one
  384. int excelColNum = col + 1;
  385. String colRef = "";
  386. int colRemain = excelColNum;
  387. while(colRemain > 0) {
  388. int thisPart = colRemain % 26;
  389. if(thisPart == 0) { thisPart = 26; }
  390. colRemain = (colRemain - thisPart) / 26;
  391. // The letter A is at 65
  392. char colChar = (char)(thisPart+64);
  393. colRef = colChar + colRef;
  394. }
  395. return colRef;
  396. }
  397. /**
  398. * Example return values:
  399. * <table border="0" cellpadding="1" cellspacing="0" summary="Example return values">
  400. * <tr><th align='left'>Result</th><th align='left'>Comment</th></tr>
  401. * <tr><td>A1</td><td>Cell reference without sheet</td></tr>
  402. * <tr><td>Sheet1!A1</td><td>Standard sheet name</td></tr>
  403. * <tr><td>'O''Brien''s Sales'!A1'&nbsp;</td><td>Sheet name with special characters</td></tr>
  404. * </table>
  405. * @return the text representation of this cell reference as it would appear in a formula.
  406. */
  407. public String formatAsString() {
  408. StringBuffer sb = new StringBuffer(32);
  409. if(_sheetName != null) {
  410. SheetNameFormatter.appendFormat(sb, _sheetName);
  411. sb.append(SHEET_NAME_DELIMITER);
  412. }
  413. appendCellReference(sb);
  414. return sb.toString();
  415. }
  416. public String toString() {
  417. StringBuffer sb = new StringBuffer(64);
  418. sb.append(getClass().getName()).append(" [");
  419. sb.append(formatAsString());
  420. sb.append("]");
  421. return sb.toString();
  422. }
  423. /**
  424. * Returns the three parts of the cell reference, the
  425. * Sheet name (or null if none supplied), the 1 based
  426. * row number, and the A based column letter.
  427. * This will not include any markers for absolute
  428. * references, so use {@link #formatAsString()}
  429. * to properly turn references into strings.
  430. */
  431. public String[] getCellRefParts() {
  432. return new String[] {
  433. _sheetName,
  434. Integer.toString(_rowIndex+1),
  435. convertNumToColString(_colIndex)
  436. };
  437. }
  438. /**
  439. * Appends cell reference with '$' markers for absolute values as required.
  440. * Sheet name is not included.
  441. */
  442. /* package */ void appendCellReference(StringBuffer sb) {
  443. if (_colIndex != -1) {
  444. if(_isColAbs) {
  445. sb.append(ABSOLUTE_REFERENCE_MARKER);
  446. }
  447. sb.append( convertNumToColString(_colIndex));
  448. }
  449. if (_rowIndex != -1) {
  450. if(_isRowAbs) {
  451. sb.append(ABSOLUTE_REFERENCE_MARKER);
  452. }
  453. sb.append(_rowIndex+1);
  454. }
  455. }
  456. /**
  457. * Checks whether this cell reference is equal to another object.
  458. * <p>
  459. * Two cells references are assumed to be equal if their string representations
  460. * ({@link #formatAsString()} are equal.
  461. * </p>
  462. */
  463. @Override
  464. public boolean equals(Object o){
  465. if(!(o instanceof CellReference)) {
  466. return false;
  467. }
  468. CellReference cr = (CellReference) o;
  469. return _rowIndex == cr._rowIndex
  470. && _colIndex == cr._colIndex
  471. && _isRowAbs == cr._isColAbs
  472. && _isColAbs == cr._isColAbs;
  473. }
  474. }