You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

DataValidationEvaluator.java 24KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563
  1. /* ====================================================================
  2. Licensed to the Apache Software Foundation (ASF) under one or more
  3. contributor license agreements. See the NOTICE file distributed with
  4. this work for additional information regarding copyright ownership.
  5. The ASF licenses this file to You under the Apache License, Version 2.0
  6. (the "License"); you may not use this file except in compliance with
  7. the License. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. ==================================================================== */
  15. package org.apache.poi.ss.formula;
  16. import java.util.ArrayList;
  17. import java.util.Collections;
  18. import java.util.HashMap;
  19. import java.util.List;
  20. import java.util.Map;
  21. import org.apache.poi.ss.formula.eval.BlankEval;
  22. import org.apache.poi.ss.formula.eval.BoolEval;
  23. import org.apache.poi.ss.formula.eval.ErrorEval;
  24. import org.apache.poi.ss.formula.eval.NumberEval;
  25. import org.apache.poi.ss.formula.eval.RefEval;
  26. import org.apache.poi.ss.formula.eval.StringEval;
  27. import org.apache.poi.ss.formula.eval.ValueEval;
  28. import org.apache.poi.ss.usermodel.Cell;
  29. import org.apache.poi.ss.usermodel.CellType;
  30. import org.apache.poi.ss.usermodel.DataValidation;
  31. import org.apache.poi.ss.usermodel.DataValidationConstraint;
  32. import org.apache.poi.ss.usermodel.DataValidationConstraint.OperatorType;
  33. import org.apache.poi.ss.usermodel.DataValidationConstraint.ValidationType;
  34. import org.apache.poi.ss.usermodel.Sheet;
  35. import org.apache.poi.ss.usermodel.Workbook;
  36. import org.apache.poi.ss.util.CellRangeAddressBase;
  37. import org.apache.poi.ss.util.CellRangeAddressList;
  38. import org.apache.poi.ss.util.CellReference;
  39. import org.apache.poi.ss.util.SheetUtil;
  40. /**
  41. * Evaluates Data Validation constraints.<p/>
  42. *
  43. * For performance reasons, this class keeps a cache of all previously retrieved {@link DataValidation} instances.
  44. * Be sure to call {@link #clearAllCachedValues()} if any workbook validation definitions are
  45. * added, modified, or deleted.
  46. * <p/>
  47. * Changing cell values should be fine, as long as the corresponding {@link WorkbookEvaluator#clearAllCachedResultValues()}
  48. * is called as well.
  49. *
  50. */
  51. public class DataValidationEvaluator {
  52. /**
  53. * Expensive to compute, so cache them as they are retrieved.
  54. * <p/>
  55. * Sheets don't implement equals, and since its an interface,
  56. * there's no guarantee instances won't be recreated on the fly by some implementation.
  57. * So we use sheet name.
  58. */
  59. private final Map<String, List<? extends DataValidation>> validations = new HashMap<String, List<? extends DataValidation>>();
  60. private final Workbook workbook;
  61. private final WorkbookEvaluator workbookEvaluator;
  62. public DataValidationEvaluator(Workbook wb, WorkbookEvaluatorProvider provider) {
  63. this.workbook = wb;
  64. this.workbookEvaluator = provider._getWorkbookEvaluator();
  65. }
  66. protected WorkbookEvaluator getWorkbookEvaluator() {
  67. return workbookEvaluator;
  68. }
  69. public void clearAllCachedValues() {
  70. validations.clear();
  71. }
  72. /**
  73. * lazy load validations by sheet, since reading the CT* types is expensive
  74. * @param sheet
  75. * @return
  76. */
  77. private List<? extends DataValidation> getValidations(Sheet sheet) {
  78. List<? extends DataValidation> dvs = validations.get(sheet.getSheetName());
  79. if (dvs == null && !validations.containsKey(sheet.getSheetName())) {
  80. dvs = sheet.getDataValidations();
  81. validations.put(sheet.getSheetName(), dvs);
  82. }
  83. return dvs;
  84. }
  85. /**
  86. * Finds and returns the {@link DataValidation} for the cell, if there is
  87. * one. Lookup is based on the first match from
  88. * {@link DataValidation#getRegions()} for the cell's sheet. DataValidation
  89. * regions must be in the same sheet as the DataValidation. Allowed values
  90. * expressions may reference other sheets, however.
  91. *
  92. * @param cell reference to check - use this in case the cell does not actually exist yet
  93. * @return the DataValidation applicable to the given cell, or null if no
  94. * validation applies
  95. */
  96. public DataValidation getValidationForCell(CellReference cell) {
  97. return getValidationContextForCell(cell).getValidation();
  98. }
  99. public DataValidationContext getValidationContextForCell(CellReference cell) {
  100. // TODO
  101. final Sheet sheet = workbook.getSheet(cell.getSheetName());
  102. if (sheet == null) return null;
  103. final List<? extends DataValidation> dataValidations = getValidations(sheet);
  104. if (dataValidations == null) return null;
  105. for (DataValidation dv : dataValidations) {
  106. final CellRangeAddressList regions = dv.getRegions();
  107. if (regions == null) return null;
  108. // current implementation can't return null
  109. for (CellRangeAddressBase range : regions.getCellRangeAddresses()) {
  110. if (range.isInRange(cell)) {
  111. return new DataValidationContext(dv, this, range, cell);
  112. }
  113. }
  114. }
  115. return null;
  116. }
  117. /**
  118. * If {@link #getValidationForCell(Cell)} returns an instance, and the
  119. * {@link ValidationType} is {@link ValidationType#LIST}, return the valid
  120. * values, whether they are from a static list or cell range.
  121. * <p/>
  122. * For all other validation types, or no validation at all, this method
  123. * returns null.
  124. * <p/>
  125. * This method could throw an exception if the validation type is not LIST,
  126. * but since this method is mostly useful in UI contexts, null seems the
  127. * easier path.
  128. *
  129. * @param cell reference to check - use this in case the cell does not actually exist yet
  130. * @return returns an unmodifiable {@link List} of {@link ValueEval}s if applicable, or
  131. * null
  132. */
  133. public List<ValueEval> getValidationValuesForCell(CellReference cell) {
  134. DataValidationContext context = getValidationContextForCell(cell);
  135. if (context == null) return null;
  136. return getValidationValuesForConstraint(context);
  137. }
  138. /**
  139. * static so enums can reference it without creating a whole instance
  140. * @param cell
  141. * @param val
  142. * @return returns an unmodifiable {@link List} of {@link ValueEval}s, which may be empty
  143. */
  144. protected static List<ValueEval> getValidationValuesForConstraint(DataValidationContext context) {
  145. final DataValidationConstraint val = context.getValidation().getValidationConstraint();
  146. if (val.getValidationType() != ValidationType.LIST) return null;
  147. String formula = val.getFormula1();
  148. final List<ValueEval> values = new ArrayList<ValueEval>();
  149. if (val.getExplicitListValues() != null && val.getExplicitListValues().length > 0) {
  150. // assumes parsing interprets the overloaded property right for XSSF
  151. for (String s : val.getExplicitListValues()) {
  152. if (s != null) values.add(new StringEval(s)); // constructor throws exception on null
  153. }
  154. } else if (formula != null) {
  155. // evaluate formula for cell refs then get their values
  156. ValueEval eval = context.getEvaluator().getWorkbookEvaluator().evaluate(formula, context.getTarget(), context.getRegion());
  157. // formula is a StringEval if the validation is by a fixed list. Use the explicit list later.
  158. // there is no way from the model to tell if the list is fixed values or formula based.
  159. if (eval instanceof TwoDEval) {
  160. TwoDEval twod = (TwoDEval) eval;
  161. for (int i=0; i < twod.getHeight(); i++) {
  162. final ValueEval cellValue = twod.getValue(i, 0);
  163. values.add(cellValue);
  164. }
  165. }
  166. }
  167. return Collections.unmodifiableList(values);
  168. }
  169. /**
  170. * Use the validation returned by {@link #getValidationForCell(Cell)} if you
  171. * want the error display details. This is the validation checked by this
  172. * method, which attempts to replicate Excel's data validation rules.
  173. * <p/>
  174. * Note that to properly apply some validations, care must be taken to
  175. * offset the base validation formula by the relative position of the
  176. * current cell, or the wrong value is checked.
  177. *
  178. * @param cell
  179. * @return true if the cell has no validation or the cell value passes the
  180. * defined validation, false if it fails
  181. */
  182. public boolean isValidCell(CellReference cellRef) {
  183. final DataValidationContext context = getValidationContextForCell(cellRef);
  184. if (context == null) return true;
  185. final Cell cell = SheetUtil.getCell(workbook.getSheet(cellRef.getSheetName()), cellRef.getRow(), cellRef.getCol());
  186. // now we can validate the cell
  187. // if empty, return not allowed flag
  188. if ( cell == null
  189. || isType(cell, CellType.BLANK)
  190. || (isType(cell,CellType.STRING)
  191. && (cell.getStringCellValue() == null || cell.getStringCellValue().isEmpty())
  192. )
  193. ) {
  194. return context.getValidation().getEmptyCellAllowed();
  195. }
  196. // cell has a value
  197. return ValidationEnum.isValid(cell, context);
  198. }
  199. /**
  200. * Note that this assumes the cell cached value is up to date and in sync with data edits
  201. * @param cell
  202. * @param type
  203. * @return true if the cell or cached cell formula result type match the given type
  204. */
  205. public static boolean isType(Cell cell, CellType type) {
  206. final CellType cellType = cell.getCellTypeEnum();
  207. return cellType == type
  208. || (cellType == CellType.FORMULA
  209. && cell.getCachedFormulaResultTypeEnum() == type
  210. );
  211. }
  212. /**
  213. * Not calling it ValidationType to avoid confusion for now with DataValidationConstraint.ValidationType.
  214. * Definition order matches OOXML type ID indexes
  215. */
  216. public static enum ValidationEnum {
  217. ANY {
  218. public boolean isValidValue(Cell cell, DataValidationContext context) {
  219. return true;
  220. }
  221. },
  222. INTEGER {
  223. public boolean isValidValue(Cell cell, DataValidationContext context) {
  224. if (super.isValidValue(cell, context)) {
  225. // we know it is a number in the proper range, now check if it is an int
  226. final double value = cell.getNumericCellValue(); // can't get here without a valid numeric value
  227. return Double.valueOf(value).compareTo(Double.valueOf((int) value)) == 0;
  228. }
  229. return false;
  230. }
  231. },
  232. DECIMAL,
  233. LIST {
  234. public boolean isValidValue(Cell cell, DataValidationContext context) {
  235. final List<ValueEval> valueList = getValidationValuesForConstraint(context);
  236. if (valueList == null) return true; // special case
  237. // compare cell value to each item
  238. for (ValueEval listVal : valueList) {
  239. ValueEval comp = listVal instanceof RefEval ? ((RefEval) listVal).getInnerValueEval(context.getSheetIndex()) : listVal;
  240. // any value is valid if the list contains a blank value per Excel help
  241. if (comp instanceof BlankEval) return true;
  242. if (comp instanceof ErrorEval) continue; // nothing to check
  243. if (comp instanceof BoolEval) {
  244. if (isType(cell, CellType.BOOLEAN) && ((BoolEval) comp).getBooleanValue() == cell.getBooleanCellValue() ) {
  245. return true;
  246. } else {
  247. continue; // check the rest
  248. }
  249. }
  250. if (comp instanceof NumberEval) {
  251. // could this have trouble with double precision/rounding errors and date/time values?
  252. // do we need to allow a "close enough" double fractional range?
  253. // I see 17 digits after the decimal separator in XSSF files, and for time values,
  254. // there are sometimes discrepancies in the final decimal place.
  255. // I don't have a validation test case yet though. - GW
  256. if (isType(cell, CellType.NUMERIC) && ((NumberEval) comp).getNumberValue() == cell.getNumericCellValue()) {
  257. return true;
  258. } else {
  259. continue; // check the rest
  260. }
  261. }
  262. if (comp instanceof StringEval) {
  263. // interestingly, in testing, a validation value of the string "TRUE" or "true"
  264. // did not match a boolean cell value of TRUE - so apparently cell type matters
  265. // also, Excel validation is case insensitive - "true" is valid for the list value "TRUE"
  266. if (isType(cell, CellType.STRING) && ((StringEval) comp).getStringValue().equalsIgnoreCase(cell.getStringCellValue())) {
  267. return true;
  268. } else {
  269. continue; // check the rest;
  270. }
  271. }
  272. }
  273. return false; // no matches
  274. }
  275. },
  276. DATE,
  277. TIME,
  278. TEXT_LENGTH {
  279. public boolean isValidValue(Cell cell, DataValidationContext context) {
  280. if (! isType(cell, CellType.STRING)) return false;
  281. String v = cell.getStringCellValue();
  282. return isValidNumericValue(Double.valueOf(v.length()), context);
  283. }
  284. },
  285. FORMULA {
  286. /**
  287. * Note the formula result must either be a boolean result, or anything not in error.
  288. * If boolean, value must be true to pass, anything else valid is also passing, errors fail.
  289. * @see org.apache.poi.ss.formula.DataValidationEvaluator.ValidationEnum#isValidValue(org.apache.poi.ss.usermodel.Cell, org.apache.poi.ss.usermodel.DataValidationConstraint, org.apache.poi.ss.formula.WorkbookEvaluator)
  290. */
  291. public boolean isValidValue(Cell cell, DataValidationContext context) {
  292. ValueEval comp = context.getEvaluator().getWorkbookEvaluator().evaluate(context.getFormula1(), context.getTarget(), context.getRegion());
  293. if (comp instanceof RefEval) {
  294. comp = ((RefEval) comp).getInnerValueEval(((RefEval) comp).getFirstSheetIndex());
  295. }
  296. if (comp instanceof BlankEval) return true;
  297. if (comp instanceof ErrorEval) return false;
  298. if (comp instanceof BoolEval) {
  299. return ((BoolEval) comp).getBooleanValue();
  300. }
  301. // empirically tested in Excel - 0=false, any other number = true/valid
  302. // see test file DataValidationEvaluations.xlsx
  303. if (comp instanceof NumberEval) {
  304. return ((NumberEval) comp).getNumberValue() != 0;
  305. }
  306. return false; // anything else is false, such as text
  307. }
  308. },
  309. ;
  310. public boolean isValidValue(Cell cell, DataValidationContext context) {
  311. return isValidNumericCell(cell, context);
  312. }
  313. /**
  314. * Uses the cell value, which may be the cached formula result value.
  315. * We won't re-evaluate cells here. This validation would be after the cell value was updated externally.
  316. * Excel allows invalid values through methods like copy/paste, and only validates them when the user
  317. * interactively edits the cell.
  318. * @param cell
  319. * @param dvc
  320. * @param wbe
  321. * @return
  322. */
  323. protected boolean isValidNumericCell(Cell cell, DataValidationContext context) {
  324. if ( ! isType(cell, CellType.NUMERIC)) return false;
  325. Double value = Double.valueOf(cell.getNumericCellValue());
  326. return isValidNumericValue(value, context);
  327. }
  328. /**
  329. *
  330. * @param value
  331. * @param context
  332. * @return
  333. */
  334. protected boolean isValidNumericValue(Double value, final DataValidationContext context) {
  335. try {
  336. Double t1 = evalOrConstant(context.getFormula1(), context);
  337. // per Excel, a blank value for a numeric validation constraint formula validates true
  338. if (t1 == null) return true;
  339. Double t2 = null;
  340. if (context.getOperator() == OperatorType.BETWEEN || context.getOperator() == OperatorType.NOT_BETWEEN) {
  341. t2 = evalOrConstant(context.getFormula2(), context);
  342. // per Excel, a blank value for a numeric validation constraint formula validates true
  343. if (t2 == null) return true;
  344. }
  345. return OperatorEnum.values()[context.getOperator()].isValid(value, t1, t2);
  346. } catch (NumberFormatException e) {
  347. // one or both formulas are in error, not evaluating to a number, so the validation is false per Excel's behavior.
  348. return false;
  349. }
  350. }
  351. /**
  352. * Evaluate a numeric formula value as either a constant or numeric expression.
  353. * Note that Excel treats validations with constraint formulas that evaluate to null as valid,
  354. * but evaluations in error or non-numeric are marked invalid.
  355. * @param formula
  356. * @param context
  357. * @return numeric value or null if not defined or the formula evaluates to an empty/missing cell.
  358. * @throws NumberFormatException if the formula is non-numeric when it should be
  359. */
  360. private Double evalOrConstant(String formula, DataValidationContext context) throws NumberFormatException {
  361. if (formula == null || formula.trim().isEmpty()) return null; // shouldn't happen, but just in case
  362. try {
  363. return Double.valueOf(formula);
  364. } catch (NumberFormatException e) {
  365. // must be an expression, then. Overloading by Excel in the file formats.
  366. }
  367. ValueEval eval = context.getEvaluator().getWorkbookEvaluator().evaluate(formula, context.getTarget(), context.getRegion());
  368. if (eval instanceof RefEval) {
  369. eval = ((RefEval) eval).getInnerValueEval(((RefEval) eval).getFirstSheetIndex());
  370. }
  371. if (eval instanceof BlankEval) return null;
  372. if (eval instanceof NumberEval) return Double.valueOf(((NumberEval) eval).getNumberValue());
  373. if (eval instanceof StringEval) {
  374. final String value = ((StringEval) eval).getStringValue();
  375. if (value == null || value.trim().isEmpty()) return null;
  376. // try to parse the cell value as a double and return it
  377. return Double.valueOf(value);
  378. }
  379. throw new NumberFormatException("Formula '" + formula + "' evaluates to something other than a number");
  380. }
  381. /**
  382. * Validates against the type defined in dvc, as an index of the enum values array.
  383. * @param cell
  384. * @param dvc
  385. * @param wbe
  386. * @return true if validation passes
  387. * @throws ArrayIndexOutOfBoundsException if the constraint type is an invalid index
  388. */
  389. public static boolean isValid(Cell cell, DataValidationContext context) {
  390. return values()[context.getValidation().getValidationConstraint().getValidationType()].isValidValue(cell, context);
  391. }
  392. }
  393. /**
  394. * Not calling it OperatorType to avoid confusion for now with DataValidationConstraint.OperatorType.
  395. * Definition order matches OOXML type ID indexes
  396. */
  397. public static enum OperatorEnum {
  398. BETWEEN {
  399. public boolean isValid(Double cellValue, Double v1, Double v2) {
  400. return cellValue.compareTo(v1) >= 0 && cellValue.compareTo(v2) <= 0;
  401. }
  402. },
  403. NOT_BETWEEN {
  404. public boolean isValid(Double cellValue, Double v1, Double v2) {
  405. return cellValue.compareTo(v1) < 0 || cellValue.compareTo(v2) > 0;
  406. }
  407. },
  408. EQUAL {
  409. public boolean isValid(Double cellValue, Double v1, Double v2) {
  410. return cellValue.compareTo(v1) == 0;
  411. }
  412. },
  413. NOT_EQUAL {
  414. public boolean isValid(Double cellValue, Double v1, Double v2) {
  415. return cellValue.compareTo(v1) != 0;
  416. }
  417. },
  418. GREATER_THAN {
  419. public boolean isValid(Double cellValue, Double v1, Double v2) {
  420. return cellValue.compareTo(v1) > 0;
  421. }
  422. },
  423. LESS_THAN {
  424. public boolean isValid(Double cellValue, Double v1, Double v2) {
  425. return cellValue.compareTo(v1) < 0;
  426. }
  427. },
  428. GREATER_OR_EQUAL {
  429. public boolean isValid(Double cellValue, Double v1, Double v2) {
  430. return cellValue.compareTo(v1) >= 0;
  431. }
  432. },
  433. LESS_OR_EQUAL {
  434. public boolean isValid(Double cellValue, Double v1, Double v2) {
  435. return cellValue.compareTo(v1) <= 0;
  436. }
  437. },
  438. ;
  439. public static final OperatorEnum IGNORED = BETWEEN;
  440. /**
  441. * Evaluates comparison using operator instance rules
  442. * @param cellValue won't be null, assumption is previous checks handled that
  443. * @param v1 if null, value assumed invalid, anything passes, per Excel behavior
  444. * @param v2 null if not needed. If null when needed, assume anything passes, per Excel behavior
  445. * @return true if the comparison is valid
  446. */
  447. public abstract boolean isValid(Double cellValue, Double v1, Double v2);
  448. }
  449. public static class DataValidationContext {
  450. private final DataValidation dv;
  451. private final DataValidationEvaluator dve;
  452. private final CellRangeAddressBase region;
  453. private final CellReference target;
  454. /**
  455. *
  456. * @param dv
  457. * @param dve
  458. * @param region
  459. * @param target
  460. */
  461. public DataValidationContext(DataValidation dv, DataValidationEvaluator dve, CellRangeAddressBase region, CellReference target) {
  462. this.dv = dv;
  463. this.dve = dve;
  464. this.region = region;
  465. this.target = target;
  466. }
  467. /**
  468. * @return the dv
  469. */
  470. public DataValidation getValidation() {
  471. return dv;
  472. }
  473. /**
  474. * @return the dve
  475. */
  476. public DataValidationEvaluator getEvaluator() {
  477. return dve;
  478. }
  479. /**
  480. * @return the region
  481. */
  482. public CellRangeAddressBase getRegion() {
  483. return region;
  484. }
  485. /**
  486. * @return the target
  487. */
  488. public CellReference getTarget() {
  489. return target;
  490. }
  491. public int getOffsetColumns() {
  492. return target.getCol() - region.getFirstColumn();
  493. }
  494. public int getOffsetRows() {
  495. return target.getRow() - region.getFirstRow();
  496. }
  497. public int getSheetIndex() {
  498. return dve.getWorkbookEvaluator().getSheetIndex(target.getSheetName());
  499. }
  500. public String getFormula1() {
  501. return dv.getValidationConstraint().getFormula1();
  502. }
  503. public String getFormula2() {
  504. return dv.getValidationConstraint().getFormula2();
  505. }
  506. public int getOperator() {
  507. return dv.getValidationConstraint().getOperator();
  508. }
  509. }
  510. }