diff options
author | Nick Burch <nick@apache.org> | 2010-06-02 16:51:05 +0000 |
---|---|---|
committer | Nick Burch <nick@apache.org> | 2010-06-02 16:51:05 +0000 |
commit | 8952570e0bcf435c612b1faffa07f09b51ff562f (patch) | |
tree | 647522bafde1995ea3313a55443cbe58e06757b9 /src | |
parent | 0d1692dbf270fe2aaa2b9c296385b3293d8739b9 (diff) | |
download | poi-8952570e0bcf435c612b1faffa07f09b51ff562f.tar.gz poi-8952570e0bcf435c612b1faffa07f09b51ff562f.zip |
Another fix inspired by bug #48494 - have ExcelExtractor make use of HSSFDataFormatter, so that numbers and dates come out closer to how Excel would render them
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@950657 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'src')
4 files changed, 141 insertions, 22 deletions
diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 016fb1eceb..b5c0164728 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ <changes> <release version="3.7-SNAPSHOT" date="2010-??-??"> + <action dev="POI-DEVELOPERS" type="fix">48494 - have ExcelExtractor make use of HSSFDataFormatter, so that numbers and dates come out closer to how Excel would render them</action> <action dev="POI-DEVELOPERS" type="fix">48494 - have EventBasedExcelExtractor make use of HSSFDataFormatter, so that numbers and dates come out closer to how Excel would render them</action> <action dev="POI-DEVELOPERS" type="fix">49096 - add clone support to Chart begin and end records, to allow cloning of more Chart containing sheets</action> <action dev="POI-DEVELOPERS" type="add">List attachment names in the output of OutlookTextExtractor (to get attachment contents, use ExtractorFactory as normal)</action> diff --git a/src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java b/src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java index dc70fef2fb..c2e234ce34 100644 --- a/src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java +++ b/src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java @@ -26,7 +26,9 @@ import java.io.PrintStream; import org.apache.poi.POIOLE2TextExtractor; import org.apache.poi.hssf.record.formula.eval.ErrorEval; import org.apache.poi.hssf.usermodel.HSSFCell; +import org.apache.poi.hssf.usermodel.HSSFCellStyle; import org.apache.poi.hssf.usermodel.HSSFComment; +import org.apache.poi.hssf.usermodel.HSSFDataFormatter; import org.apache.poi.hssf.usermodel.HSSFRichTextString; import org.apache.poi.hssf.usermodel.HSSFRow; import org.apache.poi.hssf.usermodel.HSSFSheet; @@ -51,6 +53,7 @@ import org.apache.poi.ss.usermodel.HeaderFooter; */ public class ExcelExtractor extends POIOLE2TextExtractor implements org.apache.poi.ss.extractor.ExcelExtractor { private HSSFWorkbook _wb; + private HSSFDataFormatter _formatter; private boolean _includeSheetNames = true; private boolean _shouldEvaluateFormulas = true; private boolean _includeCellComments = false; @@ -60,6 +63,7 @@ public class ExcelExtractor extends POIOLE2TextExtractor implements org.apache.p public ExcelExtractor(HSSFWorkbook wb) { super(wb); _wb = wb; + _formatter = new HSSFDataFormatter(); } public ExcelExtractor(POIFSFileSystem fs) throws IOException { this(fs.getRoot(), fs); @@ -323,8 +327,9 @@ public class ExcelExtractor extends POIOLE2TextExtractor implements org.apache.p text.append(cell.getRichStringCellValue().getString()); break; case HSSFCell.CELL_TYPE_NUMERIC: - // Note - we don't apply any formatting! - text.append(cell.getNumericCellValue()); + text.append( + _formatter.formatCellValue(cell) + ); break; case HSSFCell.CELL_TYPE_BOOLEAN: text.append(cell.getBooleanCellValue()); @@ -344,7 +349,18 @@ public class ExcelExtractor extends POIOLE2TextExtractor implements org.apache.p } break; case HSSFCell.CELL_TYPE_NUMERIC: - text.append(cell.getNumericCellValue()); + HSSFCellStyle style = cell.getCellStyle(); + if(style == null) { + text.append( cell.getNumericCellValue() ); + } else { + text.append( + _formatter.formatRawCellContents( + cell.getNumericCellValue(), + style.getDataFormat(), + style.getDataFormatString() + ) + ); + } break; case HSSFCell.CELL_TYPE_BOOLEAN: text.append(cell.getBooleanCellValue()); diff --git a/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java b/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java index bc264d097e..ed2da48bd4 100644 --- a/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java +++ b/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java @@ -63,11 +63,11 @@ public final class TestExcelExtractor extends TestCase { assertEquals( "Sheet1\n" + - "1000.0\t1.0\t5.0\n" + - "2000.0\t2.0\n" + - "3000.0\t3.0\n" + - "4000.0\t4.0\n" + - "5000.0\t5.0\n" + + "1000\t1\t5\n" + + "2000\t2\n" + + "3000\t3\n" + + "4000\t4\n" + + "5000\t5\n" + "Sheet2\nSheet3\n", extractor.getText() ); @@ -76,11 +76,11 @@ public final class TestExcelExtractor extends TestCase { assertEquals( "Sheet1\n" + - "1000.0\t1.0\tSUMIF(A1:A5,\">4000\",B1:B5)\n" + - "2000.0\t2.0\n" + - "3000.0\t3.0\n" + - "4000.0\t4.0\n" + - "5000.0\t5.0\n" + + "1000\t1\tSUMIF(A1:A5,\">4000\",B1:B5)\n" + + "2000\t2\n" + + "3000\t3\n" + + "4000\t4\n" + + "5000\t5\n" + "Sheet2\nSheet3\n", extractor.getText() ); @@ -173,18 +173,18 @@ public final class TestExcelExtractor extends TestCase { // Check without comments assertEquals( - "1.0\tone\n" + - "2.0\ttwo\n" + - "3.0\tthree\n", + "1\tone\n" + + "2\ttwo\n" + + "3\tthree\n", extractor.getText() ); // Now with extractor.setIncludeCellComments(true); assertEquals( - "1.0\tone Comment by Yegor Kozlov: Yegor Kozlov: first cell\n" + - "2.0\ttwo Comment by Yegor Kozlov: Yegor Kozlov: second cell\n" + - "3.0\tthree Comment by Yegor Kozlov: Yegor Kozlov: third cell\n", + "1\tone Comment by Yegor Kozlov: Yegor Kozlov: first cell\n" + + "2\ttwo Comment by Yegor Kozlov: Yegor Kozlov: second cell\n" + + "3\tthree Comment by Yegor Kozlov: Yegor Kozlov: third cell\n", extractor.getText() ); } @@ -199,20 +199,65 @@ public final class TestExcelExtractor extends TestCase { "Sheet1\n" + "&[TAB]\t\n" + "Hello\n" + - "11.0\t23.0\n" + "11\t23\n" )); assertTrue(padded.startsWith( "Sheet1\n" + "&[TAB]\t\n" + "Hello\n" + - "11.0\t\t\t23.0\n" + "11\t\t\t23\n" )); } + public void testFormatting() throws Exception { + ExcelExtractor extractor = createExtractor("Formatting.xls"); + extractor.setIncludeBlankCells(false); + extractor.setIncludeSheetNames(false); + String text = extractor.getText(); + + // Note - not all the formats in the file + // actually quite match what they claim to + // be, as some are auto-local builtins... + + assertTrue(text.startsWith( + "Dates, all 24th November 2006\n" + )); + assertTrue( + text.indexOf( + "yyyy/mm/dd\t2006/11/24\n" + ) > -1 + ); + assertTrue( + text.indexOf( + "yyyy-mm-dd\t2006-11-24\n" + ) > -1 + ); + assertTrue( + text.indexOf( + "dd-mm-yy\t24-11-06\n" + ) > -1 + ); + + assertTrue( + text.indexOf( + "nn.nn\t10.52\n" + ) > -1 + ); + assertTrue( + text.indexOf( + "nn.nnn\t10.520\n" + ) > -1 + ); + assertTrue( + text.indexOf( + "£nn.nn\t£10.52\n" + ) > -1 + ); + } /** - * Embded in a non-excel file + * Embeded in a non-excel file */ public void testWithEmbeded() throws Exception { POIFSFileSystem fs = new POIFSFileSystem( diff --git a/src/testcases/org/apache/poi/hssf/usermodel/TestHSSFDataFormatter.java b/src/testcases/org/apache/poi/hssf/usermodel/TestHSSFDataFormatter.java index d0e5c4183f..c0e4150b4b 100644 --- a/src/testcases/org/apache/poi/hssf/usermodel/TestHSSFDataFormatter.java +++ b/src/testcases/org/apache/poi/hssf/usermodel/TestHSSFDataFormatter.java @@ -309,6 +309,63 @@ public final class TestHSSFDataFormatter extends TestCase { assertEquals("2345", f.formatCellValue(cellA1)); } + + /** + * Tests various formattings of dates and numbers + */ + public void testFromFile() { + HSSFWorkbook workbook = HSSFTestDataSamples.openSampleWorkbook("Formatting.xls"); + HSSFSheet sheet = workbook.getSheetAt(0); + + HSSFDataFormatter f = new HSSFDataFormatter(); + + // This one is one of the nasty auto-locale changing ones... + assertEquals("dd/mm/yyyy", sheet.getRow(1).getCell(0).getStringCellValue()); + assertEquals("m/d/yy", sheet.getRow(1).getCell(1).getCellStyle().getDataFormatString()); + assertEquals("11/24/06", f.formatCellValue(sheet.getRow(1).getCell(1))); + + assertEquals("yyyy/mm/dd", sheet.getRow(2).getCell(0).getStringCellValue()); + assertEquals("yyyy/mm/dd", sheet.getRow(2).getCell(1).getCellStyle().getDataFormatString()); + assertEquals("2006/11/24", f.formatCellValue(sheet.getRow(2).getCell(1))); + + assertEquals("yyyy-mm-dd", sheet.getRow(3).getCell(0).getStringCellValue()); + assertEquals("yyyy\\-mm\\-dd", sheet.getRow(3).getCell(1).getCellStyle().getDataFormatString()); + assertEquals("2006-11-24", f.formatCellValue(sheet.getRow(3).getCell(1))); + + assertEquals("yy/mm/dd", sheet.getRow(4).getCell(0).getStringCellValue()); + assertEquals("yy/mm/dd", sheet.getRow(4).getCell(1).getCellStyle().getDataFormatString()); + assertEquals("06/11/24", f.formatCellValue(sheet.getRow(4).getCell(1))); + + // Another builtin fun one + assertEquals("dd/mm/yy", sheet.getRow(5).getCell(0).getStringCellValue()); + assertEquals("d/m/yy;@", sheet.getRow(5).getCell(1).getCellStyle().getDataFormatString()); + assertEquals("24/11/06", f.formatCellValue(sheet.getRow(5).getCell(1))); + + assertEquals("dd-mm-yy", sheet.getRow(6).getCell(0).getStringCellValue()); + assertEquals("dd\\-mm\\-yy", sheet.getRow(6).getCell(1).getCellStyle().getDataFormatString()); + assertEquals("24-11-06", f.formatCellValue(sheet.getRow(6).getCell(1))); + + + // Another builtin fun one + assertEquals("nn.nn", sheet.getRow(9).getCell(0).getStringCellValue()); + assertEquals("General", sheet.getRow(9).getCell(1).getCellStyle().getDataFormatString()); + assertEquals("10.52", f.formatCellValue(sheet.getRow(9).getCell(1))); + + // text isn't quite the format rule... + assertEquals("nn.nnn", sheet.getRow(10).getCell(0).getStringCellValue()); + assertEquals("0.000", sheet.getRow(10).getCell(1).getCellStyle().getDataFormatString()); + assertEquals("10.520", f.formatCellValue(sheet.getRow(10).getCell(1))); + + // text isn't quite the format rule... + assertEquals("nn.n", sheet.getRow(11).getCell(0).getStringCellValue()); + assertEquals("0.0", sheet.getRow(11).getCell(1).getCellStyle().getDataFormatString()); + assertEquals("10.5", f.formatCellValue(sheet.getRow(11).getCell(1))); + + // text isn't quite the format rule... + assertEquals("£nn.nn", sheet.getRow(12).getCell(0).getStringCellValue()); + assertEquals("\"£\"#,##0.00", sheet.getRow(12).getCell(1).getCellStyle().getDataFormatString()); + assertEquals("£10.52", f.formatCellValue(sheet.getRow(12).getCell(1))); + } private static void log(String msg) { if (false) { // successful tests should be silent |