aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorNick Burch <nick@apache.org>2010-06-02 16:51:05 +0000
committerNick Burch <nick@apache.org>2010-06-02 16:51:05 +0000
commit8952570e0bcf435c612b1faffa07f09b51ff562f (patch)
tree647522bafde1995ea3313a55443cbe58e06757b9 /src
parent0d1692dbf270fe2aaa2b9c296385b3293d8739b9 (diff)
downloadpoi-8952570e0bcf435c612b1faffa07f09b51ff562f.tar.gz
poi-8952570e0bcf435c612b1faffa07f09b51ff562f.zip
Another fix inspired by bug #48494 - have ExcelExtractor make use of HSSFDataFormatter, so that numbers and dates come out closer to how Excel would render them
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@950657 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'src')
-rw-r--r--src/documentation/content/xdocs/status.xml1
-rw-r--r--src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java22
-rw-r--r--src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java83
-rw-r--r--src/testcases/org/apache/poi/hssf/usermodel/TestHSSFDataFormatter.java57
4 files changed, 141 insertions, 22 deletions
diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml
index 016fb1eceb..b5c0164728 100644
--- a/src/documentation/content/xdocs/status.xml
+++ b/src/documentation/content/xdocs/status.xml
@@ -34,6 +34,7 @@
<changes>
<release version="3.7-SNAPSHOT" date="2010-??-??">
+ <action dev="POI-DEVELOPERS" type="fix">48494 - have ExcelExtractor make use of HSSFDataFormatter, so that numbers and dates come out closer to how Excel would render them</action>
<action dev="POI-DEVELOPERS" type="fix">48494 - have EventBasedExcelExtractor make use of HSSFDataFormatter, so that numbers and dates come out closer to how Excel would render them</action>
<action dev="POI-DEVELOPERS" type="fix">49096 - add clone support to Chart begin and end records, to allow cloning of more Chart containing sheets</action>
<action dev="POI-DEVELOPERS" type="add">List attachment names in the output of OutlookTextExtractor (to get attachment contents, use ExtractorFactory as normal)</action>
diff --git a/src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java b/src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java
index dc70fef2fb..c2e234ce34 100644
--- a/src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java
+++ b/src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java
@@ -26,7 +26,9 @@ import java.io.PrintStream;
import org.apache.poi.POIOLE2TextExtractor;
import org.apache.poi.hssf.record.formula.eval.ErrorEval;
import org.apache.poi.hssf.usermodel.HSSFCell;
+import org.apache.poi.hssf.usermodel.HSSFCellStyle;
import org.apache.poi.hssf.usermodel.HSSFComment;
+import org.apache.poi.hssf.usermodel.HSSFDataFormatter;
import org.apache.poi.hssf.usermodel.HSSFRichTextString;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
@@ -51,6 +53,7 @@ import org.apache.poi.ss.usermodel.HeaderFooter;
*/
public class ExcelExtractor extends POIOLE2TextExtractor implements org.apache.poi.ss.extractor.ExcelExtractor {
private HSSFWorkbook _wb;
+ private HSSFDataFormatter _formatter;
private boolean _includeSheetNames = true;
private boolean _shouldEvaluateFormulas = true;
private boolean _includeCellComments = false;
@@ -60,6 +63,7 @@ public class ExcelExtractor extends POIOLE2TextExtractor implements org.apache.p
public ExcelExtractor(HSSFWorkbook wb) {
super(wb);
_wb = wb;
+ _formatter = new HSSFDataFormatter();
}
public ExcelExtractor(POIFSFileSystem fs) throws IOException {
this(fs.getRoot(), fs);
@@ -323,8 +327,9 @@ public class ExcelExtractor extends POIOLE2TextExtractor implements org.apache.p
text.append(cell.getRichStringCellValue().getString());
break;
case HSSFCell.CELL_TYPE_NUMERIC:
- // Note - we don't apply any formatting!
- text.append(cell.getNumericCellValue());
+ text.append(
+ _formatter.formatCellValue(cell)
+ );
break;
case HSSFCell.CELL_TYPE_BOOLEAN:
text.append(cell.getBooleanCellValue());
@@ -344,7 +349,18 @@ public class ExcelExtractor extends POIOLE2TextExtractor implements org.apache.p
}
break;
case HSSFCell.CELL_TYPE_NUMERIC:
- text.append(cell.getNumericCellValue());
+ HSSFCellStyle style = cell.getCellStyle();
+ if(style == null) {
+ text.append( cell.getNumericCellValue() );
+ } else {
+ text.append(
+ _formatter.formatRawCellContents(
+ cell.getNumericCellValue(),
+ style.getDataFormat(),
+ style.getDataFormatString()
+ )
+ );
+ }
break;
case HSSFCell.CELL_TYPE_BOOLEAN:
text.append(cell.getBooleanCellValue());
diff --git a/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java b/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java
index bc264d097e..ed2da48bd4 100644
--- a/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java
+++ b/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java
@@ -63,11 +63,11 @@ public final class TestExcelExtractor extends TestCase {
assertEquals(
"Sheet1\n" +
- "1000.0\t1.0\t5.0\n" +
- "2000.0\t2.0\n" +
- "3000.0\t3.0\n" +
- "4000.0\t4.0\n" +
- "5000.0\t5.0\n" +
+ "1000\t1\t5\n" +
+ "2000\t2\n" +
+ "3000\t3\n" +
+ "4000\t4\n" +
+ "5000\t5\n" +
"Sheet2\nSheet3\n",
extractor.getText()
);
@@ -76,11 +76,11 @@ public final class TestExcelExtractor extends TestCase {
assertEquals(
"Sheet1\n" +
- "1000.0\t1.0\tSUMIF(A1:A5,\">4000\",B1:B5)\n" +
- "2000.0\t2.0\n" +
- "3000.0\t3.0\n" +
- "4000.0\t4.0\n" +
- "5000.0\t5.0\n" +
+ "1000\t1\tSUMIF(A1:A5,\">4000\",B1:B5)\n" +
+ "2000\t2\n" +
+ "3000\t3\n" +
+ "4000\t4\n" +
+ "5000\t5\n" +
"Sheet2\nSheet3\n",
extractor.getText()
);
@@ -173,18 +173,18 @@ public final class TestExcelExtractor extends TestCase {
// Check without comments
assertEquals(
- "1.0\tone\n" +
- "2.0\ttwo\n" +
- "3.0\tthree\n",
+ "1\tone\n" +
+ "2\ttwo\n" +
+ "3\tthree\n",
extractor.getText()
);
// Now with
extractor.setIncludeCellComments(true);
assertEquals(
- "1.0\tone Comment by Yegor Kozlov: Yegor Kozlov: first cell\n" +
- "2.0\ttwo Comment by Yegor Kozlov: Yegor Kozlov: second cell\n" +
- "3.0\tthree Comment by Yegor Kozlov: Yegor Kozlov: third cell\n",
+ "1\tone Comment by Yegor Kozlov: Yegor Kozlov: first cell\n" +
+ "2\ttwo Comment by Yegor Kozlov: Yegor Kozlov: second cell\n" +
+ "3\tthree Comment by Yegor Kozlov: Yegor Kozlov: third cell\n",
extractor.getText()
);
}
@@ -199,20 +199,65 @@ public final class TestExcelExtractor extends TestCase {
"Sheet1\n" +
"&[TAB]\t\n" +
"Hello\n" +
- "11.0\t23.0\n"
+ "11\t23\n"
));
assertTrue(padded.startsWith(
"Sheet1\n" +
"&[TAB]\t\n" +
"Hello\n" +
- "11.0\t\t\t23.0\n"
+ "11\t\t\t23\n"
));
}
+ public void testFormatting() throws Exception {
+ ExcelExtractor extractor = createExtractor("Formatting.xls");
+ extractor.setIncludeBlankCells(false);
+ extractor.setIncludeSheetNames(false);
+ String text = extractor.getText();
+
+ // Note - not all the formats in the file
+ // actually quite match what they claim to
+ // be, as some are auto-local builtins...
+
+ assertTrue(text.startsWith(
+ "Dates, all 24th November 2006\n"
+ ));
+ assertTrue(
+ text.indexOf(
+ "yyyy/mm/dd\t2006/11/24\n"
+ ) > -1
+ );
+ assertTrue(
+ text.indexOf(
+ "yyyy-mm-dd\t2006-11-24\n"
+ ) > -1
+ );
+ assertTrue(
+ text.indexOf(
+ "dd-mm-yy\t24-11-06\n"
+ ) > -1
+ );
+
+ assertTrue(
+ text.indexOf(
+ "nn.nn\t10.52\n"
+ ) > -1
+ );
+ assertTrue(
+ text.indexOf(
+ "nn.nnn\t10.520\n"
+ ) > -1
+ );
+ assertTrue(
+ text.indexOf(
+ "£nn.nn\t£10.52\n"
+ ) > -1
+ );
+ }
/**
- * Embded in a non-excel file
+ * Embeded in a non-excel file
*/
public void testWithEmbeded() throws Exception {
POIFSFileSystem fs = new POIFSFileSystem(
diff --git a/src/testcases/org/apache/poi/hssf/usermodel/TestHSSFDataFormatter.java b/src/testcases/org/apache/poi/hssf/usermodel/TestHSSFDataFormatter.java
index d0e5c4183f..c0e4150b4b 100644
--- a/src/testcases/org/apache/poi/hssf/usermodel/TestHSSFDataFormatter.java
+++ b/src/testcases/org/apache/poi/hssf/usermodel/TestHSSFDataFormatter.java
@@ -309,6 +309,63 @@ public final class TestHSSFDataFormatter extends TestCase {
assertEquals("2345", f.formatCellValue(cellA1));
}
+
+ /**
+ * Tests various formattings of dates and numbers
+ */
+ public void testFromFile() {
+ HSSFWorkbook workbook = HSSFTestDataSamples.openSampleWorkbook("Formatting.xls");
+ HSSFSheet sheet = workbook.getSheetAt(0);
+
+ HSSFDataFormatter f = new HSSFDataFormatter();
+
+ // This one is one of the nasty auto-locale changing ones...
+ assertEquals("dd/mm/yyyy", sheet.getRow(1).getCell(0).getStringCellValue());
+ assertEquals("m/d/yy", sheet.getRow(1).getCell(1).getCellStyle().getDataFormatString());
+ assertEquals("11/24/06", f.formatCellValue(sheet.getRow(1).getCell(1)));
+
+ assertEquals("yyyy/mm/dd", sheet.getRow(2).getCell(0).getStringCellValue());
+ assertEquals("yyyy/mm/dd", sheet.getRow(2).getCell(1).getCellStyle().getDataFormatString());
+ assertEquals("2006/11/24", f.formatCellValue(sheet.getRow(2).getCell(1)));
+
+ assertEquals("yyyy-mm-dd", sheet.getRow(3).getCell(0).getStringCellValue());
+ assertEquals("yyyy\\-mm\\-dd", sheet.getRow(3).getCell(1).getCellStyle().getDataFormatString());
+ assertEquals("2006-11-24", f.formatCellValue(sheet.getRow(3).getCell(1)));
+
+ assertEquals("yy/mm/dd", sheet.getRow(4).getCell(0).getStringCellValue());
+ assertEquals("yy/mm/dd", sheet.getRow(4).getCell(1).getCellStyle().getDataFormatString());
+ assertEquals("06/11/24", f.formatCellValue(sheet.getRow(4).getCell(1)));
+
+ // Another builtin fun one
+ assertEquals("dd/mm/yy", sheet.getRow(5).getCell(0).getStringCellValue());
+ assertEquals("d/m/yy;@", sheet.getRow(5).getCell(1).getCellStyle().getDataFormatString());
+ assertEquals("24/11/06", f.formatCellValue(sheet.getRow(5).getCell(1)));
+
+ assertEquals("dd-mm-yy", sheet.getRow(6).getCell(0).getStringCellValue());
+ assertEquals("dd\\-mm\\-yy", sheet.getRow(6).getCell(1).getCellStyle().getDataFormatString());
+ assertEquals("24-11-06", f.formatCellValue(sheet.getRow(6).getCell(1)));
+
+
+ // Another builtin fun one
+ assertEquals("nn.nn", sheet.getRow(9).getCell(0).getStringCellValue());
+ assertEquals("General", sheet.getRow(9).getCell(1).getCellStyle().getDataFormatString());
+ assertEquals("10.52", f.formatCellValue(sheet.getRow(9).getCell(1)));
+
+ // text isn't quite the format rule...
+ assertEquals("nn.nnn", sheet.getRow(10).getCell(0).getStringCellValue());
+ assertEquals("0.000", sheet.getRow(10).getCell(1).getCellStyle().getDataFormatString());
+ assertEquals("10.520", f.formatCellValue(sheet.getRow(10).getCell(1)));
+
+ // text isn't quite the format rule...
+ assertEquals("nn.n", sheet.getRow(11).getCell(0).getStringCellValue());
+ assertEquals("0.0", sheet.getRow(11).getCell(1).getCellStyle().getDataFormatString());
+ assertEquals("10.5", f.formatCellValue(sheet.getRow(11).getCell(1)));
+
+ // text isn't quite the format rule...
+ assertEquals("£nn.nn", sheet.getRow(12).getCell(0).getStringCellValue());
+ assertEquals("\"£\"#,##0.00", sheet.getRow(12).getCell(1).getCellStyle().getDataFormatString());
+ assertEquals("£10.52", f.formatCellValue(sheet.getRow(12).getCell(1)));
+ }
private static void log(String msg) {
if (false) { // successful tests should be silent