From: Yegor Kozlov Date: Tue, 22 Dec 2009 07:52:55 +0000 (+0000) Subject: improved performance of DateUtil.isCellDateFormatted(), see Bugzilla 48425 X-Git-Tag: REL_3_7_BETA1~192 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=f186843733438065c429b9ac855aac76a8c59584;p=poi.git improved performance of DateUtil.isCellDateFormatted(), see Bugzilla 48425 git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@893105 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 5ef12fb3ab..b7720e9e86 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + 48425 - improved performance of DateUtil.isCellDateFormatted() 47215 - fixed InterfaceEndRecord to tolerate unexpected record contents 48415 - improved javadoc on HSSPicture.resize() added Ant target to install artifacts in local repository diff --git a/src/java/org/apache/poi/ss/usermodel/DateUtil.java b/src/java/org/apache/poi/ss/usermodel/DateUtil.java index 90a7a5b509..e69225a854 100644 --- a/src/java/org/apache/poi/ss/usermodel/DateUtil.java +++ b/src/java/org/apache/poi/ss/usermodel/DateUtil.java @@ -47,6 +47,13 @@ public class DateUtil { private static final Pattern TIME_SEPARATOR_PATTERN = Pattern.compile(":"); + /** + * The following patterns are used in {@link #isADateFormat(int, String)} + */ + private static final Pattern date_ptrn1 = Pattern.compile("^\\[\\$\\-.*?\\]"); + private static final Pattern date_ptrn2 = Pattern.compile("^\\[[a-zA-Z]+\\]"); + private static final Pattern date_ptrn3 = Pattern.compile("^[yYmMdDhHsS\\-/,. :\\\\]+[ampAMP/]*$"); + /** * Given a Date, converts it into a double representing its internal Excel representation, * which is the number of days since 1/1/1900. Fractional days represent hours, minutes, and seconds. @@ -181,6 +188,7 @@ public class DateUtil { calendar.set(GregorianCalendar.MILLISECOND, millisecondsInDay); } + /** * Given a format ID and its format String, will check to see if the * format represents a date format or not. @@ -206,36 +214,62 @@ public class DateUtil { } String fs = formatString; - - // Translate \- into just -, before matching - fs = fs.replaceAll("\\\\-","-"); - // And \, into , - fs = fs.replaceAll("\\\\,",","); - // And \. into . - fs = fs.replaceAll("\\\\.","."); - // And '\ ' into ' ' - fs = fs.replaceAll("\\\\ "," "); - - // If it end in ;@, that's some crazy dd/mm vs mm/dd - // switching stuff, which we can ignore - fs = fs.replaceAll(";@", ""); + /* + Normalize the format string. The code below is equivalent + to the following consecutive regexp replacements: + + // Translate \- into just -, before matching + fs = fs.replaceAll("\\\\-","-"); + // And \, into , + fs = fs.replaceAll("\\\\,",","); + // And \. into . + fs = fs.replaceAll("\\\\\\.","."); + // And '\ ' into ' ' + fs = fs.replaceAll("\\\\ "," "); + + // If it end in ;@, that's some crazy dd/mm vs mm/dd + // switching stuff, which we can ignore + fs = fs.replaceAll(";@", ""); + + The code above was reworked as suggested in bug 48425: + simple loop is more efficient than consecutive regexp replacements. + */ + StringBuilder sb = new StringBuilder(); + for(int i = 0; i < fs.length(); i++){ + char c = fs.charAt(i); + if(i < fs.length() - 1){ + char nc = fs.charAt(i + 1); + if(c == '\\'){ + switch (nc){ + case '-': + case ',': + case '.': + case ' ': + case '\\': + //skip current '\' and continue to the next char + continue; + } + } else if (c == ';' && nc == '@'){ + i++; + //skip ";@" duplets + continue; + } + } + sb.append(c); + } + fs = sb.toString(); // If it starts with [$-...], then could be a date, but // who knows what that starting bit is all about - fs = fs.replaceAll("^\\[\\$\\-.*?\\]", ""); - + fs = date_ptrn1.matcher(fs).replaceAll(""); // If it starts with something like [Black] or [Yellow], // then it could be a date - fs = fs.replaceAll("^\\[[a-zA-Z]+\\]", ""); + fs = date_ptrn2.matcher(fs).replaceAll(""); // Otherwise, check it's only made up, in any case, of: - // y m d h s - / , . : + // y m d h s - \ / , . : // optionally followed by AM/PM - if(fs.matches("^[yYmMdDhHsS\\-/,. :]+[ampAMP/]*$")) { - return true; - } - - return false; + return date_ptrn3.matcher(fs).matches(); } /** diff --git a/src/testcases/org/apache/poi/hssf/usermodel/TestHSSFDateUtil.java b/src/testcases/org/apache/poi/hssf/usermodel/TestHSSFDateUtil.java index 02113fd6c2..1becfcb36d 100644 --- a/src/testcases/org/apache/poi/hssf/usermodel/TestHSSFDateUtil.java +++ b/src/testcases/org/apache/poi/hssf/usermodel/TestHSSFDateUtil.java @@ -26,6 +26,7 @@ import junit.framework.TestCase; import org.apache.poi.hssf.HSSFTestDataSamples; import org.apache.poi.hssf.model.InternalWorkbook; +import org.apache.poi.ss.usermodel.DateUtil; /** * Class TestHSSFDateUtil @@ -239,6 +240,10 @@ public final class TestHSSFDateUtil extends TestCase { "DD-MM-YY", "DD-mm-YYYY", "dd\\-mm\\-yy", // Sometimes escaped "dd.mm.yyyy", "dd\\.mm\\.yyyy", + "dd\\ mm\\.yyyy AM", "dd\\ mm\\.yyyy pm", + "dd\\ mm\\.yyyy\\-dd", "[h]:mm:ss", + + //YK: TODO "mm:ss.0" is a built-in date format which is not recognized by DateUtil.isInternalDateFormat // These crazy ones are valid "yyyy-mm-dd;@", "yyyy/mm/dd;@",