You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

DataFormatter.java 46KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197
  1. /* ====================================================================
  2. Licensed to the Apache Software Foundation (ASF) under one or more
  3. contributor license agreements. See the NOTICE file distributed with
  4. this work for additional information regarding copyright ownership.
  5. The ASF licenses this file to You under the Apache License, Version 2.0
  6. (the "License"); you may not use this file except in compliance with
  7. the License. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. 2012 - Alfresco Software, Ltd.
  15. Alfresco Software has modified source of this file
  16. The details of changes as svn diff can be found in svn at location root/projects/3rd-party/src
  17. ==================================================================== */
  18. package org.apache.poi.ss.usermodel;
  19. import java.math.BigDecimal;
  20. import java.math.RoundingMode;
  21. import java.text.DateFormat;
  22. import java.text.DateFormatSymbols;
  23. import java.text.DecimalFormat;
  24. import java.text.DecimalFormatSymbols;
  25. import java.text.FieldPosition;
  26. import java.text.Format;
  27. import java.text.ParsePosition;
  28. import java.text.SimpleDateFormat;
  29. import java.util.ArrayList;
  30. import java.util.Date;
  31. import java.util.HashMap;
  32. import java.util.List;
  33. import java.util.Locale;
  34. import java.util.Map;
  35. import java.util.Observable;
  36. import java.util.Observer;
  37. import java.util.regex.Matcher;
  38. import java.util.regex.Pattern;
  39. import org.apache.poi.ss.format.CellFormat;
  40. import org.apache.poi.ss.format.CellFormatResult;
  41. import org.apache.poi.ss.util.DateFormatConverter;
  42. import org.apache.poi.ss.util.NumberToTextConverter;
  43. import org.apache.poi.util.LocaleUtil;
  44. import org.apache.poi.util.POILogFactory;
  45. import org.apache.poi.util.POILogger;
  46. /**
  47. * DataFormatter contains methods for formatting the value stored in an
  48. * Cell. This can be useful for reports and GUI presentations when you
  49. * need to display data exactly as it appears in Excel. Supported formats
  50. * include currency, SSN, percentages, decimals, dates, phone numbers, zip
  51. * codes, etc.
  52. * <p>
  53. * Internally, formats will be implemented using subclasses of {@link Format}
  54. * such as {@link DecimalFormat} and {@link java.text.SimpleDateFormat}. Therefore the
  55. * formats used by this class must obey the same pattern rules as these Format
  56. * subclasses. This means that only legal number pattern characters ("0", "#",
  57. * ".", "," etc.) may appear in number formats. Other characters can be
  58. * inserted <em>before</em> or <em> after</em> the number pattern to form a
  59. * prefix or suffix.
  60. * </p>
  61. * <p>
  62. * For example the Excel pattern <code>"$#,##0.00 "USD"_);($#,##0.00 "USD")"
  63. * </code> will be correctly formatted as "$1,000.00 USD" or "($1,000.00 USD)".
  64. * However the pattern <code>"00-00-00"</code> is incorrectly formatted by
  65. * DecimalFormat as "000000--". For Excel formats that are not compatible with
  66. * DecimalFormat, you can provide your own custom {@link Format} implementation
  67. * via <code>DataFormatter.addFormat(String,Format)</code>. The following
  68. * custom formats are already provided by this class:
  69. * </p>
  70. * <pre>
  71. * <ul><li>SSN "000-00-0000"</li>
  72. * <li>Phone Number "(###) ###-####"</li>
  73. * <li>Zip plus 4 "00000-0000"</li>
  74. * </ul>
  75. * </pre>
  76. * <p>
  77. * If the Excel format pattern cannot be parsed successfully, then a default
  78. * format will be used. The default number format will mimic the Excel General
  79. * format: "#" for whole numbers and "#.##########" for decimal numbers. You
  80. * can override the default format pattern with <code>
  81. * DataFormatter.setDefaultNumberFormat(Format)</code>. <b>Note:</b> the
  82. * default format will only be used when a Format cannot be created from the
  83. * cell's data format string.
  84. *
  85. * <p>
  86. * Note that by default formatted numeric values are trimmed.
  87. * Excel formats can contain spacers and padding and the default behavior is to strip them off.
  88. * </p>
  89. * <p>Example:</p>
  90. * <p>
  91. * Consider a numeric cell with a value <code>12.343</code> and format <code>"##.##_ "</code>.
  92. * The trailing underscore and space ("_ ") in the format adds a space to the end and Excel formats this cell as <code>"12.34 "</code>,
  93. * but <code>DataFormatter</code> trims the formatted value and returns <code>"12.34"</code>.
  94. * </p>
  95. * You can enable spaces by passing the <code>emulateCSV=true</code> flag in the <code>DateFormatter</code> cosntructor.
  96. * If set to true, then the output tries to conform to what you get when you take an xls or xlsx in Excel and Save As CSV file:
  97. * <ul>
  98. * <li>returned values are not trimmed</li>
  99. * <li>Invalid dates are formatted as 255 pound signs ("#")</li>
  100. * <li>simulate Excel's handling of a format string of all # when the value is 0.
  101. * Excel will output "", <code>DataFormatter</code> will output "0".
  102. * </ul>
  103. * <p>
  104. * Some formats are automatically "localized" by Excel, eg show as mm/dd/yyyy when
  105. * loaded in Excel in some Locales but as dd/mm/yyyy in others. These are always
  106. * returned in the "default" (US) format, as stored in the file.
  107. * Some format strings request an alternate locale, eg
  108. * <code>[$-809]d/m/yy h:mm AM/PM</code> which explicitly requests UK locale.
  109. * These locale directives are (currently) ignored.
  110. * You can use {@link DateFormatConverter} to do some of this localisation if
  111. * you need it.
  112. */
  113. public class DataFormatter implements Observer {
  114. private static final String defaultFractionWholePartFormat = "#";
  115. private static final String defaultFractionFractionPartFormat = "#/##";
  116. /** Pattern to find a number format: "0" or "#" */
  117. private static final Pattern numPattern = Pattern.compile("[0#]+");
  118. /** Pattern to find days of week as text "ddd...." */
  119. private static final Pattern daysAsText = Pattern.compile("([d]{3,})", Pattern.CASE_INSENSITIVE);
  120. /** Pattern to find "AM/PM" marker */
  121. private static final Pattern amPmPattern = Pattern.compile("((A|P)[M/P]*)", Pattern.CASE_INSENSITIVE);
  122. /**
  123. * A regex to find locale patterns like [$$-1009] and [$?-452].
  124. * Note that we don't currently process these into locales
  125. */
  126. private static final Pattern localePatternGroup = Pattern.compile("(\\[\\$[^-\\]]*-[0-9A-Z]+\\])");
  127. /**
  128. * A regex to match the colour formattings rules.
  129. * Allowed colours are: Black, Blue, Cyan, Green,
  130. * Magenta, Red, White, Yellow, "Color n" (1<=n<=56)
  131. */
  132. private static final Pattern colorPattern =
  133. Pattern.compile("(\\[BLACK\\])|(\\[BLUE\\])|(\\[CYAN\\])|(\\[GREEN\\])|" +
  134. "(\\[MAGENTA\\])|(\\[RED\\])|(\\[WHITE\\])|(\\[YELLOW\\])|" +
  135. "(\\[COLOR\\s*\\d\\])|(\\[COLOR\\s*[0-5]\\d\\])", Pattern.CASE_INSENSITIVE);
  136. /**
  137. * A regex to identify a fraction pattern.
  138. * This requires that replaceAll("\\?", "#") has already been called
  139. */
  140. private static final Pattern fractionPattern = Pattern.compile("(?:([#\\d]+)\\s+)?(#+)\\s*\\/\\s*([#\\d]+)");
  141. /**
  142. * A regex to strip junk out of fraction formats
  143. */
  144. private static final Pattern fractionStripper = Pattern.compile("(\"[^\"]*\")|([^ \\?#\\d\\/]+)");
  145. /**
  146. * A regex to detect if an alternate grouping character is used
  147. * in a numeric format
  148. */
  149. private static final Pattern alternateGrouping = Pattern.compile("([#0]([^.#0])[#0]{3})");
  150. /**
  151. * Cells formatted with a date or time format and which contain invalid date or time values
  152. * show 255 pound signs ("#").
  153. */
  154. private static final String invalidDateTimeString;
  155. static {
  156. StringBuilder buf = new StringBuilder();
  157. for(int i = 0; i < 255; i++) buf.append('#');
  158. invalidDateTimeString = buf.toString();
  159. }
  160. /**
  161. * The decimal symbols of the locale used for formatting values.
  162. */
  163. private DecimalFormatSymbols decimalSymbols;
  164. /**
  165. * The date symbols of the locale used for formatting values.
  166. */
  167. private DateFormatSymbols dateSymbols;
  168. /**
  169. * A default date format, if no date format was given
  170. */
  171. private DateFormat defaultDateformat;
  172. /** <em>General</em> format for numbers. */
  173. private Format generalNumberFormat;
  174. /** A default format to use when a number pattern cannot be parsed. */
  175. private Format defaultNumFormat;
  176. /**
  177. * A map to cache formats.
  178. * Map<String,Format> formats
  179. */
  180. private final Map<String,Format> formats = new HashMap<String,Format>();
  181. private final boolean emulateCSV;
  182. /** stores the locale valid it the last formatting call */
  183. private Locale locale;
  184. /** stores if the locale should change according to {@link LocaleUtil#getUserLocale()} */
  185. private boolean localeIsAdapting;
  186. private class LocaleChangeObservable extends Observable {
  187. void checkForLocaleChange() {
  188. checkForLocaleChange(LocaleUtil.getUserLocale());
  189. }
  190. void checkForLocaleChange(Locale newLocale) {
  191. if (!localeIsAdapting) return;
  192. if (newLocale.equals(locale)) return;
  193. super.setChanged();
  194. notifyObservers(newLocale);
  195. }
  196. }
  197. /** the Observable to notify, when the locale has been changed */
  198. private final LocaleChangeObservable localeChangedObservable = new LocaleChangeObservable();
  199. /** For logging any problems we find */
  200. private static POILogger logger = POILogFactory.getLogger(DataFormatter.class);
  201. /**
  202. * Creates a formatter using the {@link Locale#getDefault() default locale}.
  203. */
  204. public DataFormatter() {
  205. this(false);
  206. }
  207. /**
  208. * Creates a formatter using the {@link Locale#getDefault() default locale}.
  209. *
  210. * @param emulateCSV whether to emulate CSV output.
  211. */
  212. public DataFormatter(boolean emulateCSV) {
  213. this(LocaleUtil.getUserLocale(), true, emulateCSV);
  214. }
  215. /**
  216. * Creates a formatter using the given locale.
  217. */
  218. public DataFormatter(Locale locale) {
  219. this(locale, false);
  220. }
  221. /**
  222. * Creates a formatter using the given locale.
  223. *
  224. * @param emulateCSV whether to emulate CSV output.
  225. */
  226. public DataFormatter(Locale locale, boolean emulateCSV) {
  227. this(locale, false, emulateCSV);
  228. }
  229. /**
  230. * Creates a formatter using the given locale.
  231. * @param localeIsAdapting (true only if locale is not user-specified)
  232. * @param emulateCSV whether to emulate CSV output.
  233. */
  234. private DataFormatter(Locale locale, boolean localeIsAdapting, boolean emulateCSV) {
  235. this.localeIsAdapting = true;
  236. localeChangedObservable.addObserver(this);
  237. // localeIsAdapting must be true prior to this first checkForLocaleChange call.
  238. localeChangedObservable.checkForLocaleChange(locale);
  239. // set localeIsAdapting so subsequent checks perform correctly
  240. // (whether a specific locale was provided to this DataFormatter or DataFormatter should
  241. // adapt to the current user locale as the locale changes)
  242. this.localeIsAdapting = localeIsAdapting;
  243. this.emulateCSV = emulateCSV;
  244. }
  245. /**
  246. * Return a Format for the given cell if one exists, otherwise try to
  247. * create one. This method will return <code>null</code> if the any of the
  248. * following is true:
  249. * <ul>
  250. * <li>the cell's style is null</li>
  251. * <li>the style's data format string is null or empty</li>
  252. * <li>the format string cannot be recognized as either a number or date</li>
  253. * </ul>
  254. *
  255. * @param cell The cell to retrieve a Format for
  256. * @return A Format for the format String
  257. */
  258. private Format getFormat(Cell cell) {
  259. if ( cell.getCellStyle() == null) {
  260. return null;
  261. }
  262. int formatIndex = cell.getCellStyle().getDataFormat();
  263. String formatStr = cell.getCellStyle().getDataFormatString();
  264. if(formatStr == null || formatStr.trim().length() == 0) {
  265. return null;
  266. }
  267. return getFormat(cell.getNumericCellValue(), formatIndex, formatStr);
  268. }
  269. private Format getFormat(double cellValue, int formatIndex, String formatStrIn) {
  270. localeChangedObservable.checkForLocaleChange();
  271. // // Might be better to separate out the n p and z formats, falling back to p when n and z are not set.
  272. // // That however would require other code to be re factored.
  273. // String[] formatBits = formatStrIn.split(";");
  274. // int i = cellValue > 0.0 ? 0 : cellValue < 0.0 ? 1 : 2;
  275. // String formatStr = (i < formatBits.length) ? formatBits[i] : formatBits[0];
  276. String formatStr = formatStrIn;
  277. // Excel supports 3+ part conditional data formats, eg positive/negative/zero,
  278. // or (>1000),(>0),(0),(negative). As Java doesn't handle these kinds
  279. // of different formats for different ranges, just +ve/-ve, we need to
  280. // handle these ourselves in a special way.
  281. // For now, if we detect 3+ parts, we call out to CellFormat to handle it
  282. // TODO Going forward, we should really merge the logic between the two classes
  283. if (formatStr.contains(";") &&
  284. formatStr.indexOf(';') != formatStr.lastIndexOf(';')) {
  285. try {
  286. // Ask CellFormat to get a formatter for it
  287. CellFormat cfmt = CellFormat.getInstance(formatStr);
  288. // CellFormat requires callers to identify date vs not, so do so
  289. Object cellValueO = Double.valueOf(cellValue);
  290. if (DateUtil.isADateFormat(formatIndex, formatStr) &&
  291. // don't try to handle Date value 0, let a 3 or 4-part format take care of it
  292. ((Double)cellValueO).doubleValue() != 0.0) {
  293. cellValueO = DateUtil.getJavaDate(cellValue);
  294. }
  295. // Wrap and return (non-cachable - CellFormat does that)
  296. return new CellFormatResultWrapper( cfmt.apply(cellValueO) );
  297. } catch (Exception e) {
  298. logger.log(POILogger.WARN, "Formatting failed for format " + formatStr + ", falling back", e);
  299. }
  300. }
  301. // Excel's # with value 0 will output empty where Java will output 0. This hack removes the # from the format.
  302. if (emulateCSV && cellValue == 0.0 && formatStr.contains("#") && !formatStr.contains("0")) {
  303. formatStr = formatStr.replaceAll("#", "");
  304. }
  305. // See if we already have it cached
  306. Format format = formats.get(formatStr);
  307. if (format != null) {
  308. return format;
  309. }
  310. // Is it one of the special built in types, General or @?
  311. if ("General".equalsIgnoreCase(formatStr) || "@".equals(formatStr)) {
  312. return generalNumberFormat;
  313. }
  314. // Build a formatter, and cache it
  315. format = createFormat(cellValue, formatIndex, formatStr);
  316. formats.put(formatStr, format);
  317. return format;
  318. }
  319. /**
  320. * Create and return a Format based on the format string from a cell's
  321. * style. If the pattern cannot be parsed, return a default pattern.
  322. *
  323. * @param cell The Excel cell
  324. * @return A Format representing the excel format. May return null.
  325. */
  326. public Format createFormat(Cell cell) {
  327. int formatIndex = cell.getCellStyle().getDataFormat();
  328. String formatStr = cell.getCellStyle().getDataFormatString();
  329. return createFormat(cell.getNumericCellValue(), formatIndex, formatStr);
  330. }
  331. private Format createFormat(double cellValue, int formatIndex, String sFormat) {
  332. localeChangedObservable.checkForLocaleChange();
  333. String formatStr = sFormat;
  334. // Remove colour formatting if present
  335. Matcher colourM = colorPattern.matcher(formatStr);
  336. while(colourM.find()) {
  337. String colour = colourM.group();
  338. // Paranoid replacement...
  339. int at = formatStr.indexOf(colour);
  340. if(at == -1) break;
  341. String nFormatStr = formatStr.substring(0,at) +
  342. formatStr.substring(at+colour.length());
  343. if(nFormatStr.equals(formatStr)) break;
  344. // Try again in case there's multiple
  345. formatStr = nFormatStr;
  346. colourM = colorPattern.matcher(formatStr);
  347. }
  348. // Strip off the locale information, we use an instance-wide locale for everything
  349. Matcher m = localePatternGroup.matcher(formatStr);
  350. while(m.find()) {
  351. String match = m.group();
  352. String symbol = match.substring(match.indexOf('$') + 1, match.indexOf('-'));
  353. if (symbol.indexOf('$') > -1) {
  354. symbol = symbol.substring(0, symbol.indexOf('$')) +
  355. '\\' +
  356. symbol.substring(symbol.indexOf('$'), symbol.length());
  357. }
  358. formatStr = m.replaceAll(symbol);
  359. m = localePatternGroup.matcher(formatStr);
  360. }
  361. // Check for special cases
  362. if(formatStr == null || formatStr.trim().length() == 0) {
  363. return getDefaultFormat(cellValue);
  364. }
  365. if ("General".equalsIgnoreCase(formatStr) || "@".equals(formatStr)) {
  366. return generalNumberFormat;
  367. }
  368. if(DateUtil.isADateFormat(formatIndex,formatStr) &&
  369. DateUtil.isValidExcelDate(cellValue)) {
  370. return createDateFormat(formatStr, cellValue);
  371. }
  372. // Excel supports fractions in format strings, which Java doesn't
  373. if (formatStr.contains("#/") || formatStr.contains("?/")) {
  374. String[] chunks = formatStr.split(";");
  375. for (String chunk1 : chunks) {
  376. String chunk = chunk1.replaceAll("\\?", "#");
  377. Matcher matcher = fractionStripper.matcher(chunk);
  378. chunk = matcher.replaceAll(" ");
  379. chunk = chunk.replaceAll(" +", " ");
  380. Matcher fractionMatcher = fractionPattern.matcher(chunk);
  381. //take the first match
  382. if (fractionMatcher.find()) {
  383. String wholePart = (fractionMatcher.group(1) == null) ? "" : defaultFractionWholePartFormat;
  384. return new FractionFormat(wholePart, fractionMatcher.group(3));
  385. }
  386. }
  387. // Strip custom text in quotes and escaped characters for now as it can cause performance problems in fractions.
  388. //String strippedFormatStr = formatStr.replaceAll("\\\\ ", " ").replaceAll("\\\\.", "").replaceAll("\"[^\"]*\"", " ").replaceAll("\\?", "#");
  389. //System.out.println("formatStr: "+strippedFormatStr);
  390. return new FractionFormat(defaultFractionWholePartFormat, defaultFractionFractionPartFormat);
  391. }
  392. if (numPattern.matcher(formatStr).find()) {
  393. return createNumberFormat(formatStr, cellValue);
  394. }
  395. if (emulateCSV) {
  396. return new ConstantStringFormat(cleanFormatForNumber(formatStr));
  397. }
  398. // TODO - when does this occur?
  399. return null;
  400. }
  401. private Format createDateFormat(String pFormatStr, double cellValue) {
  402. String formatStr = pFormatStr;
  403. formatStr = formatStr.replaceAll("\\\\-","-");
  404. formatStr = formatStr.replaceAll("\\\\,",",");
  405. formatStr = formatStr.replaceAll("\\\\\\.","."); // . is a special regexp char
  406. formatStr = formatStr.replaceAll("\\\\ "," ");
  407. formatStr = formatStr.replaceAll("\\\\/","/"); // weird: m\\/d\\/yyyy
  408. formatStr = formatStr.replaceAll(";@", "");
  409. formatStr = formatStr.replaceAll("\"/\"", "/"); // "/" is escaped for no reason in: mm"/"dd"/"yyyy
  410. formatStr = formatStr.replace("\"\"", "'"); // replace Excel quoting with Java style quoting
  411. formatStr = formatStr.replaceAll("\\\\T","'T'"); // Quote the T is iso8601 style dates
  412. boolean hasAmPm = false;
  413. Matcher amPmMatcher = amPmPattern.matcher(formatStr);
  414. while (amPmMatcher.find()) {
  415. formatStr = amPmMatcher.replaceAll("@");
  416. hasAmPm = true;
  417. amPmMatcher = amPmPattern.matcher(formatStr);
  418. }
  419. formatStr = formatStr.replaceAll("@", "a");
  420. Matcher dateMatcher = daysAsText.matcher(formatStr);
  421. if (dateMatcher.find()) {
  422. String match = dateMatcher.group(0).toUpperCase(Locale.ROOT).replaceAll("D", "E");
  423. formatStr = dateMatcher.replaceAll(match);
  424. }
  425. // Convert excel date format to SimpleDateFormat.
  426. // Excel uses lower and upper case 'm' for both minutes and months.
  427. // From Excel help:
  428. /*
  429. The "m" or "mm" code must appear immediately after the "h" or"hh"
  430. code or immediately before the "ss" code; otherwise, Microsoft
  431. Excel displays the month instead of minutes."
  432. */
  433. StringBuilder sb = new StringBuilder();
  434. char[] chars = formatStr.toCharArray();
  435. boolean mIsMonth = true;
  436. List<Integer> ms = new ArrayList<Integer>();
  437. boolean isElapsed = false;
  438. for(int j=0; j<chars.length; j++) {
  439. char c = chars[j];
  440. if (c == '\'') {
  441. sb.append(c);
  442. j++;
  443. // skip until the next quote
  444. while(j<chars.length) {
  445. c = chars[j];
  446. sb.append(c);
  447. if(c == '\'') {
  448. break;
  449. }
  450. j++;
  451. }
  452. }
  453. else if (c == '[' && !isElapsed) {
  454. isElapsed = true;
  455. mIsMonth = false;
  456. sb.append(c);
  457. }
  458. else if (c == ']' && isElapsed) {
  459. isElapsed = false;
  460. sb.append(c);
  461. }
  462. else if (isElapsed) {
  463. if (c == 'h' || c == 'H') {
  464. sb.append('H');
  465. }
  466. else if (c == 'm' || c == 'M') {
  467. sb.append('m');
  468. }
  469. else if (c == 's' || c == 'S') {
  470. sb.append('s');
  471. }
  472. else {
  473. sb.append(c);
  474. }
  475. }
  476. else if (c == 'h' || c == 'H') {
  477. mIsMonth = false;
  478. if (hasAmPm) {
  479. sb.append('h');
  480. } else {
  481. sb.append('H');
  482. }
  483. }
  484. else if (c == 'm' || c == 'M') {
  485. if(mIsMonth) {
  486. sb.append('M');
  487. ms.add(
  488. Integer.valueOf(sb.length() -1)
  489. );
  490. } else {
  491. sb.append('m');
  492. }
  493. }
  494. else if (c == 's' || c == 'S') {
  495. sb.append('s');
  496. // if 'M' precedes 's' it should be minutes ('m')
  497. for (int index : ms) {
  498. if (sb.charAt(index) == 'M') {
  499. sb.replace(index, index + 1, "m");
  500. }
  501. }
  502. mIsMonth = true;
  503. ms.clear();
  504. }
  505. else if (Character.isLetter(c)) {
  506. mIsMonth = true;
  507. ms.clear();
  508. if (c == 'y' || c == 'Y') {
  509. sb.append('y');
  510. }
  511. else if (c == 'd' || c == 'D') {
  512. sb.append('d');
  513. }
  514. else {
  515. sb.append(c);
  516. }
  517. }
  518. else {
  519. sb.append(c);
  520. }
  521. }
  522. formatStr = sb.toString();
  523. try {
  524. return new ExcelStyleDateFormatter(formatStr, dateSymbols);
  525. } catch(IllegalArgumentException iae) {
  526. // the pattern could not be parsed correctly,
  527. // so fall back to the default number format
  528. return getDefaultFormat(cellValue);
  529. }
  530. }
  531. private String cleanFormatForNumber(String formatStr) {
  532. StringBuilder sb = new StringBuilder(formatStr);
  533. if (emulateCSV) {
  534. // Requested spacers with "_" are replaced by a single space.
  535. // Full-column-width padding "*" are removed.
  536. // Not processing fractions at this time. Replace ? with space.
  537. // This matches CSV output.
  538. for (int i = 0; i < sb.length(); i++) {
  539. char c = sb.charAt(i);
  540. if (c == '_' || c == '*' || c == '?') {
  541. if (i > 0 && sb.charAt((i - 1)) == '\\') {
  542. // It's escaped, don't worry
  543. continue;
  544. }
  545. if (c == '?') {
  546. sb.setCharAt(i, ' ');
  547. } else if (i < sb.length() - 1) {
  548. // Remove the character we're supposed
  549. // to match the space of / pad to the
  550. // column width with
  551. if (c == '_') {
  552. sb.setCharAt(i + 1, ' ');
  553. } else {
  554. sb.deleteCharAt(i + 1);
  555. }
  556. // Remove the character too
  557. sb.deleteCharAt(i);
  558. i--;
  559. }
  560. }
  561. }
  562. } else {
  563. // If they requested spacers, with "_",
  564. // remove those as we don't do spacing
  565. // If they requested full-column-width
  566. // padding, with "*", remove those too
  567. for (int i = 0; i < sb.length(); i++) {
  568. char c = sb.charAt(i);
  569. if (c == '_' || c == '*') {
  570. if (i > 0 && sb.charAt((i - 1)) == '\\') {
  571. // It's escaped, don't worry
  572. continue;
  573. }
  574. if (i < sb.length() - 1) {
  575. // Remove the character we're supposed
  576. // to match the space of / pad to the
  577. // column width with
  578. sb.deleteCharAt(i + 1);
  579. }
  580. // Remove the _ too
  581. sb.deleteCharAt(i);
  582. i--;
  583. }
  584. }
  585. }
  586. // Now, handle the other aspects like
  587. // quoting and scientific notation
  588. for(int i = 0; i < sb.length(); i++) {
  589. char c = sb.charAt(i);
  590. // remove quotes and back slashes
  591. if (c == '\\' || c == '"') {
  592. sb.deleteCharAt(i);
  593. i--;
  594. // for scientific/engineering notation
  595. } else if (c == '+' && i > 0 && sb.charAt(i - 1) == 'E') {
  596. sb.deleteCharAt(i);
  597. i--;
  598. }
  599. }
  600. return sb.toString();
  601. }
  602. private Format createNumberFormat(String formatStr, double cellValue) {
  603. String format = cleanFormatForNumber(formatStr);
  604. DecimalFormatSymbols symbols = decimalSymbols;
  605. // Do we need to change the grouping character?
  606. // eg for a format like #'##0 which wants 12'345 not 12,345
  607. Matcher agm = alternateGrouping.matcher(format);
  608. if (agm.find()) {
  609. char grouping = agm.group(2).charAt(0);
  610. // Only replace the grouping character if it is not the default
  611. // grouping character for the US locale (',') in order to enable
  612. // correct grouping for non-US locales.
  613. if (grouping!=',') {
  614. symbols = DecimalFormatSymbols.getInstance(locale);
  615. symbols.setGroupingSeparator(grouping);
  616. String oldPart = agm.group(1);
  617. String newPart = oldPart.replace(grouping, ',');
  618. format = format.replace(oldPart, newPart);
  619. }
  620. }
  621. try {
  622. DecimalFormat df = new DecimalFormat(format, symbols);
  623. setExcelStyleRoundingMode(df);
  624. return df;
  625. } catch(IllegalArgumentException iae) {
  626. // the pattern could not be parsed correctly,
  627. // so fall back to the default number format
  628. return getDefaultFormat(cellValue);
  629. }
  630. }
  631. /**
  632. * Returns a default format for a cell.
  633. * @param cell The cell
  634. * @return a default format
  635. */
  636. public Format getDefaultFormat(Cell cell) {
  637. return getDefaultFormat(cell.getNumericCellValue());
  638. }
  639. private Format getDefaultFormat(double cellValue) {
  640. localeChangedObservable.checkForLocaleChange();
  641. // for numeric cells try user supplied default
  642. if (defaultNumFormat != null) {
  643. return defaultNumFormat;
  644. // otherwise use general format
  645. }
  646. return generalNumberFormat;
  647. }
  648. /**
  649. * Performs Excel-style date formatting, using the
  650. * supplied Date and format
  651. */
  652. private String performDateFormatting(Date d, Format dateFormat) {
  653. return (dateFormat != null ? dateFormat : defaultDateformat).format(d);
  654. }
  655. /**
  656. * Returns the formatted value of an Excel date as a <tt>String</tt> based
  657. * on the cell's <code>DataFormat</code>. i.e. "Thursday, January 02, 2003"
  658. * , "01/02/2003" , "02-Jan" , etc.
  659. *
  660. * @param cell The cell
  661. * @return a formatted date string
  662. */
  663. private String getFormattedDateString(Cell cell) {
  664. Format dateFormat = getFormat(cell);
  665. if(dateFormat instanceof ExcelStyleDateFormatter) {
  666. // Hint about the raw excel value
  667. ((ExcelStyleDateFormatter)dateFormat).setDateToBeFormatted(
  668. cell.getNumericCellValue()
  669. );
  670. }
  671. Date d = cell.getDateCellValue();
  672. return performDateFormatting(d, dateFormat);
  673. }
  674. /**
  675. * Returns the formatted value of an Excel number as a <tt>String</tt>
  676. * based on the cell's <code>DataFormat</code>. Supported formats include
  677. * currency, percents, decimals, phone number, SSN, etc.:
  678. * "61.54%", "$100.00", "(800) 555-1234".
  679. *
  680. * @param cell The cell
  681. * @return a formatted number string
  682. */
  683. private String getFormattedNumberString(Cell cell) {
  684. Format numberFormat = getFormat(cell);
  685. double d = cell.getNumericCellValue();
  686. if (numberFormat == null) {
  687. return String.valueOf(d);
  688. }
  689. String formatted = numberFormat.format(new Double(d));
  690. return formatted.replaceFirst("E(\\d)", "E+$1"); // to match Excel's E-notation
  691. }
  692. /**
  693. * Formats the given raw cell value, based on the supplied
  694. * format index and string, according to excel style rules.
  695. * @see #formatCellValue(Cell)
  696. */
  697. public String formatRawCellContents(double value, int formatIndex, String formatString) {
  698. return formatRawCellContents(value, formatIndex, formatString, false);
  699. }
  700. /**
  701. * Formats the given raw cell value, based on the supplied
  702. * format index and string, according to excel style rules.
  703. * @see #formatCellValue(Cell)
  704. */
  705. public String formatRawCellContents(double value, int formatIndex, String formatString, boolean use1904Windowing) {
  706. localeChangedObservable.checkForLocaleChange();
  707. // Is it a date?
  708. if(DateUtil.isADateFormat(formatIndex,formatString)) {
  709. if(DateUtil.isValidExcelDate(value)) {
  710. Format dateFormat = getFormat(value, formatIndex, formatString);
  711. if(dateFormat instanceof ExcelStyleDateFormatter) {
  712. // Hint about the raw excel value
  713. ((ExcelStyleDateFormatter)dateFormat).setDateToBeFormatted(value);
  714. }
  715. Date d = DateUtil.getJavaDate(value, use1904Windowing);
  716. return performDateFormatting(d, dateFormat);
  717. }
  718. // RK: Invalid dates are 255 #s.
  719. if (emulateCSV) {
  720. return invalidDateTimeString;
  721. }
  722. }
  723. // else Number
  724. Format numberFormat = getFormat(value, formatIndex, formatString);
  725. if (numberFormat == null) {
  726. return String.valueOf(value);
  727. }
  728. // When formatting 'value', double to text to BigDecimal produces more
  729. // accurate results than double to Double in JDK8 (as compared to
  730. // previous versions). However, if the value contains E notation, this
  731. // would expand the values, which we do not want, so revert to
  732. // original method.
  733. String result;
  734. final String textValue = NumberToTextConverter.toText(value);
  735. if (textValue.indexOf('E') > -1) {
  736. result = numberFormat.format(new Double(value));
  737. }
  738. else {
  739. result = numberFormat.format(new BigDecimal(textValue));
  740. }
  741. // Complete scientific notation by adding the missing +.
  742. if (result.indexOf('E') > -1 && !result.contains("E-")) {
  743. result = result.replaceFirst("E", "E+");
  744. }
  745. return result;
  746. }
  747. /**
  748. * <p>
  749. * Returns the formatted value of a cell as a <tt>String</tt> regardless
  750. * of the cell type. If the Excel format pattern cannot be parsed then the
  751. * cell value will be formatted using a default format.
  752. * </p>
  753. * <p>When passed a null or blank cell, this method will return an empty
  754. * String (""). Formulas in formula type cells will not be evaluated.
  755. * </p>
  756. *
  757. * @param cell The cell
  758. * @return the formatted cell value as a String
  759. */
  760. public String formatCellValue(Cell cell) {
  761. return formatCellValue(cell, null);
  762. }
  763. /**
  764. * <p>
  765. * Returns the formatted value of a cell as a <tt>String</tt> regardless
  766. * of the cell type. If the Excel format pattern cannot be parsed then the
  767. * cell value will be formatted using a default format.
  768. * </p>
  769. * <p>When passed a null or blank cell, this method will return an empty
  770. * String (""). Formula cells will be evaluated using the given
  771. * {@link FormulaEvaluator} if the evaluator is non-null. If the
  772. * evaluator is null, then the formula String will be returned. The caller
  773. * is responsible for setting the currentRow on the evaluator
  774. *</p>
  775. *
  776. * @param cell The cell (can be null)
  777. * @param evaluator The FormulaEvaluator (can be null)
  778. * @return a string value of the cell
  779. */
  780. public String formatCellValue(Cell cell, FormulaEvaluator evaluator) {
  781. localeChangedObservable.checkForLocaleChange();
  782. if (cell == null) {
  783. return "";
  784. }
  785. CellType cellType = cell.getCellTypeEnum();
  786. if (cellType == CellType.FORMULA) {
  787. if (evaluator == null) {
  788. return cell.getCellFormula();
  789. }
  790. cellType = evaluator.evaluateFormulaCellEnum(cell);
  791. }
  792. switch (cellType) {
  793. case NUMERIC :
  794. if (DateUtil.isCellDateFormatted(cell)) {
  795. return getFormattedDateString(cell);
  796. }
  797. return getFormattedNumberString(cell);
  798. case STRING :
  799. return cell.getRichStringCellValue().getString();
  800. case BOOLEAN :
  801. return cell.getBooleanCellValue() ? "TRUE" : "FALSE";
  802. case BLANK :
  803. return "";
  804. case ERROR:
  805. return FormulaError.forInt(cell.getErrorCellValue()).getString();
  806. default:
  807. throw new RuntimeException("Unexpected celltype (" + cellType + ")");
  808. }
  809. }
  810. /**
  811. * <p>
  812. * Sets a default number format to be used when the Excel format cannot be
  813. * parsed successfully. <b>Note:</b> This is a fall back for when an error
  814. * occurs while parsing an Excel number format pattern. This will not
  815. * affect cells with the <em>General</em> format.
  816. * </p>
  817. * <p>
  818. * The value that will be passed to the Format's format method (specified
  819. * by <code>java.text.Format#format</code>) will be a double value from a
  820. * numeric cell. Therefore the code in the format method should expect a
  821. * <code>Number</code> value.
  822. * </p>
  823. *
  824. * @param format A Format instance to be used as a default
  825. * @see java.text.Format#format
  826. */
  827. public void setDefaultNumberFormat(Format format) {
  828. for (Map.Entry<String, Format> entry : formats.entrySet()) {
  829. if (entry.getValue() == generalNumberFormat) {
  830. entry.setValue(format);
  831. }
  832. }
  833. defaultNumFormat = format;
  834. }
  835. /**
  836. * Adds a new format to the available formats.
  837. * <p>
  838. * The value that will be passed to the Format's format method (specified
  839. * by <code>java.text.Format#format</code>) will be a double value from a
  840. * numeric cell. Therefore the code in the format method should expect a
  841. * <code>Number</code> value.
  842. * </p>
  843. * @param excelFormatStr The data format string
  844. * @param format A Format instance
  845. */
  846. public void addFormat(String excelFormatStr, Format format) {
  847. formats.put(excelFormatStr, format);
  848. }
  849. // Some custom formats
  850. /**
  851. * @return a <tt>DecimalFormat</tt> with parseIntegerOnly set <code>true</code>
  852. */
  853. private static DecimalFormat createIntegerOnlyFormat(String fmt) {
  854. DecimalFormatSymbols dsf = DecimalFormatSymbols.getInstance(Locale.ROOT);
  855. DecimalFormat result = new DecimalFormat(fmt, dsf);
  856. result.setParseIntegerOnly(true);
  857. return result;
  858. }
  859. /**
  860. * Enables excel style rounding mode (round half up) on the
  861. * Decimal Format given.
  862. */
  863. public static void setExcelStyleRoundingMode(DecimalFormat format) {
  864. setExcelStyleRoundingMode(format, RoundingMode.HALF_UP);
  865. }
  866. /**
  867. * Enables custom rounding mode on the given Decimal Format.
  868. * @param format DecimalFormat
  869. * @param roundingMode RoundingMode
  870. */
  871. public static void setExcelStyleRoundingMode(DecimalFormat format, RoundingMode roundingMode) {
  872. format.setRoundingMode(roundingMode);
  873. }
  874. /**
  875. * If the Locale has been changed via {@link LocaleUtil#setUserLocale(Locale)} the stored
  876. * formats need to be refreshed. All formats which aren't originated from DataFormatter
  877. * itself, i.e. all Formats added via {@link DataFormatter#addFormat(String, Format)} and
  878. * {@link DataFormatter#setDefaultNumberFormat(Format)}, need to be added again.
  879. * To notify callers, the returned {@link Observable} should be used.
  880. * The Object in {@link Observer#update(Observable, Object)} is the new Locale.
  881. *
  882. * @return the listener object, where callers can register themselves
  883. */
  884. public Observable getLocaleChangedObservable() {
  885. return localeChangedObservable;
  886. }
  887. /**
  888. * Update formats when locale has been changed
  889. *
  890. * @param observable usually this is our own Observable instance
  891. * @param localeObj only reacts on Locale objects
  892. */
  893. public void update(Observable observable, Object localeObj) {
  894. if (!(localeObj instanceof Locale)) return;
  895. Locale newLocale = (Locale)localeObj;
  896. if (!localeIsAdapting || newLocale.equals(locale)) return;
  897. locale = newLocale;
  898. dateSymbols = DateFormatSymbols.getInstance(locale);
  899. decimalSymbols = DecimalFormatSymbols.getInstance(locale);
  900. generalNumberFormat = new ExcelGeneralNumberFormat(locale);
  901. // taken from Date.toString()
  902. defaultDateformat = new SimpleDateFormat("EEE MMM dd HH:mm:ss zzz yyyy", dateSymbols);
  903. defaultDateformat.setTimeZone(LocaleUtil.getUserTimeZone());
  904. // init built-in formats
  905. formats.clear();
  906. Format zipFormat = ZipPlusFourFormat.instance;
  907. addFormat("00000\\-0000", zipFormat);
  908. addFormat("00000-0000", zipFormat);
  909. Format phoneFormat = PhoneFormat.instance;
  910. // allow for format string variations
  911. addFormat("[<=9999999]###\\-####;\\(###\\)\\ ###\\-####", phoneFormat);
  912. addFormat("[<=9999999]###-####;(###) ###-####", phoneFormat);
  913. addFormat("###\\-####;\\(###\\)\\ ###\\-####", phoneFormat);
  914. addFormat("###-####;(###) ###-####", phoneFormat);
  915. Format ssnFormat = SSNFormat.instance;
  916. addFormat("000\\-00\\-0000", ssnFormat);
  917. addFormat("000-00-0000", ssnFormat);
  918. }
  919. /**
  920. * Format class for Excel's SSN format. This class mimics Excel's built-in
  921. * SSN formatting.
  922. *
  923. * @author James May
  924. */
  925. @SuppressWarnings("serial")
  926. private static final class SSNFormat extends Format {
  927. public static final Format instance = new SSNFormat();
  928. private static final DecimalFormat df = createIntegerOnlyFormat("000000000");
  929. private SSNFormat() {
  930. // enforce singleton
  931. }
  932. /** Format a number as an SSN */
  933. public static String format(Number num) {
  934. String result = df.format(num);
  935. return result.substring(0, 3) + '-' +
  936. result.substring(3, 5) + '-' +
  937. result.substring(5, 9);
  938. }
  939. @Override
  940. public StringBuffer format(Object obj, StringBuffer toAppendTo, FieldPosition pos) {
  941. return toAppendTo.append(format((Number)obj));
  942. }
  943. @Override
  944. public Object parseObject(String source, ParsePosition pos) {
  945. return df.parseObject(source, pos);
  946. }
  947. }
  948. /**
  949. * Format class for Excel Zip + 4 format. This class mimics Excel's
  950. * built-in formatting for Zip + 4.
  951. * @author James May
  952. */
  953. @SuppressWarnings("serial")
  954. private static final class ZipPlusFourFormat extends Format {
  955. public static final Format instance = new ZipPlusFourFormat();
  956. private static final DecimalFormat df = createIntegerOnlyFormat("000000000");
  957. private ZipPlusFourFormat() {
  958. // enforce singleton
  959. }
  960. /** Format a number as Zip + 4 */
  961. public static String format(Number num) {
  962. String result = df.format(num);
  963. return result.substring(0, 5) + '-' +
  964. result.substring(5, 9);
  965. }
  966. @Override
  967. public StringBuffer format(Object obj, StringBuffer toAppendTo, FieldPosition pos) {
  968. return toAppendTo.append(format((Number)obj));
  969. }
  970. @Override
  971. public Object parseObject(String source, ParsePosition pos) {
  972. return df.parseObject(source, pos);
  973. }
  974. }
  975. /**
  976. * Format class for Excel phone number format. This class mimics Excel's
  977. * built-in phone number formatting.
  978. * @author James May
  979. */
  980. @SuppressWarnings("serial")
  981. private static final class PhoneFormat extends Format {
  982. public static final Format instance = new PhoneFormat();
  983. private static final DecimalFormat df = createIntegerOnlyFormat("##########");
  984. private PhoneFormat() {
  985. // enforce singleton
  986. }
  987. /** Format a number as a phone number */
  988. public static String format(Number num) {
  989. String result = df.format(num);
  990. StringBuilder sb = new StringBuilder();
  991. String seg1, seg2, seg3;
  992. int len = result.length();
  993. if (len <= 4) {
  994. return result;
  995. }
  996. seg3 = result.substring(len - 4, len);
  997. seg2 = result.substring(Math.max(0, len - 7), len - 4);
  998. seg1 = result.substring(Math.max(0, len - 10), Math.max(0, len - 7));
  999. if(seg1.trim().length() > 0) {
  1000. sb.append('(').append(seg1).append(") ");
  1001. }
  1002. if(seg2.trim().length() > 0) {
  1003. sb.append(seg2).append('-');
  1004. }
  1005. sb.append(seg3);
  1006. return sb.toString();
  1007. }
  1008. @Override
  1009. public StringBuffer format(Object obj, StringBuffer toAppendTo, FieldPosition pos) {
  1010. return toAppendTo.append(format((Number)obj));
  1011. }
  1012. @Override
  1013. public Object parseObject(String source, ParsePosition pos) {
  1014. return df.parseObject(source, pos);
  1015. }
  1016. }
  1017. /**
  1018. * Format class that does nothing and always returns a constant string.
  1019. *
  1020. * This format is used to simulate Excel's handling of a format string
  1021. * of all # when the value is 0. Excel will output "", Java will output "0".
  1022. *
  1023. * @see DataFormatter#createFormat(double, int, String)
  1024. */
  1025. @SuppressWarnings("serial")
  1026. private static final class ConstantStringFormat extends Format {
  1027. private static final DecimalFormat df = createIntegerOnlyFormat("##########");
  1028. private final String str;
  1029. public ConstantStringFormat(String s) {
  1030. str = s;
  1031. }
  1032. @Override
  1033. public StringBuffer format(Object obj, StringBuffer toAppendTo, FieldPosition pos) {
  1034. return toAppendTo.append(str);
  1035. }
  1036. @Override
  1037. public Object parseObject(String source, ParsePosition pos) {
  1038. return df.parseObject(source, pos);
  1039. }
  1040. }
  1041. /**
  1042. * Workaround until we merge {@link DataFormatter} with {@link CellFormat}.
  1043. * Constant, non-cachable wrapper around a {@link CellFormatResult}
  1044. */
  1045. @SuppressWarnings("serial")
  1046. private final class CellFormatResultWrapper extends Format {
  1047. private final CellFormatResult result;
  1048. private CellFormatResultWrapper(CellFormatResult result) {
  1049. this.result = result;
  1050. }
  1051. public StringBuffer format(Object obj, StringBuffer toAppendTo, FieldPosition pos) {
  1052. if (emulateCSV) {
  1053. return toAppendTo.append(result.text);
  1054. } else {
  1055. return toAppendTo.append(result.text.trim());
  1056. }
  1057. }
  1058. public Object parseObject(String source, ParsePosition pos) {
  1059. return null; // Not supported
  1060. }
  1061. }
  1062. }