From 289d9722e6b686559f03e0970abb7d2316bf6d35 Mon Sep 17 00:00:00 2001 From: Sergey Vladimirov Date: Mon, 29 Aug 2011 10:45:01 +0000 Subject: [PATCH] add initial implementation for Excel to XSL FO converter git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1162724 13f79535-47bb-0310-9956-ffa450edef68 --- .../converter/AbstractExcelConverter.java | 191 ++++++ .../hssf/converter/AbstractExcelUtils.java | 81 +++ .../hssf/converter/ExcelToFoConverter.java | 644 ++++++++++++++++++ .../poi/hssf/converter/ExcelToFoUtils.java | 25 + .../hssf/converter/ExcelToHtmlConverter.java | 159 +---- .../poi/hssf/converter/ExcelToHtmlUtils.java | 54 +- .../poi/hwpf/converter/FoDocumentFacade.java | 19 +- ...uite.java => TestExcelConverterSuite.java} | 48 +- 8 files changed, 1001 insertions(+), 220 deletions(-) create mode 100644 src/scratchpad/src/org/apache/poi/hssf/converter/AbstractExcelConverter.java create mode 100644 src/scratchpad/src/org/apache/poi/hssf/converter/AbstractExcelUtils.java create mode 100644 src/scratchpad/src/org/apache/poi/hssf/converter/ExcelToFoConverter.java create mode 100644 src/scratchpad/src/org/apache/poi/hssf/converter/ExcelToFoUtils.java rename src/scratchpad/testcases/org/apache/poi/hssf/converter/{TestExcelToHtmlConverterSuite.java => TestExcelConverterSuite.java} (68%) diff --git a/src/scratchpad/src/org/apache/poi/hssf/converter/AbstractExcelConverter.java b/src/scratchpad/src/org/apache/poi/hssf/converter/AbstractExcelConverter.java new file mode 100644 index 0000000000..ca832c7710 --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hssf/converter/AbstractExcelConverter.java @@ -0,0 +1,191 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.hssf.converter; + +import org.apache.poi.hssf.usermodel.HSSFCell; +import org.apache.poi.hssf.usermodel.HSSFCellStyle; +import org.apache.poi.hssf.usermodel.HSSFDataFormatter; +import org.apache.poi.hssf.usermodel.HSSFRichTextString; +import org.apache.poi.hssf.usermodel.HSSFRow; +import org.apache.poi.hssf.usermodel.HSSFSheet; +import org.apache.poi.hwpf.converter.AbstractWordConverter; +import org.apache.poi.ss.formula.eval.ErrorEval; +import org.apache.poi.util.Beta; +import org.w3c.dom.Document; + +/** + * Common class for {@link ExcelToFoConverter} and {@link ExcelToHtmlConverter} + * + * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com) + * @see AbstractWordConverter + */ +@Beta +public abstract class AbstractExcelConverter +{ + protected static int getColumnWidth( HSSFSheet sheet, int columnIndex ) + { + return ExcelToHtmlUtils.getColumnWidthInPx( sheet + .getColumnWidth( columnIndex ) ); + } + + protected final HSSFDataFormatter _formatter = new HSSFDataFormatter(); + + private boolean outputColumnHeaders = true; + + private boolean outputHiddenColumns = false; + + private boolean outputHiddenRows = false; + + private boolean outputLeadingSpacesAsNonBreaking = true; + + private boolean outputRowNumbers = true; + + /** + * Generates name for output as column header in case + * {@link #isOutputColumnHeaders()} == true + * + * @param columnIndex + * 0-based column index + */ + protected String getColumnName( int columnIndex ) + { + return String.valueOf( columnIndex + 1 ); + } + + protected abstract Document getDocument(); + + /** + * Generates name for output as row number in case + * {@link #isOutputRowNumbers()} == true + */ + protected String getRowName( HSSFRow row ) + { + return String.valueOf( row.getRowNum() + 1 ); + } + + public boolean isOutputColumnHeaders() + { + return outputColumnHeaders; + } + + public boolean isOutputHiddenColumns() + { + return outputHiddenColumns; + } + + public boolean isOutputHiddenRows() + { + return outputHiddenRows; + } + + public boolean isOutputLeadingSpacesAsNonBreaking() + { + return outputLeadingSpacesAsNonBreaking; + } + + public boolean isOutputRowNumbers() + { + return outputRowNumbers; + } + + protected boolean isTextEmpty( HSSFCell cell ) + { + final String value; + switch ( cell.getCellType() ) + { + case HSSFCell.CELL_TYPE_STRING: + // XXX: enrich + value = cell.getRichStringCellValue().getString(); + break; + case HSSFCell.CELL_TYPE_FORMULA: + switch ( cell.getCachedFormulaResultType() ) + { + case HSSFCell.CELL_TYPE_STRING: + HSSFRichTextString str = cell.getRichStringCellValue(); + if ( str == null || str.length() <= 0 ) + return false; + + value = str.toString(); + break; + case HSSFCell.CELL_TYPE_NUMERIC: + HSSFCellStyle style = cell.getCellStyle(); + if ( style == null ) + { + return false; + } + + value = ( _formatter.formatRawCellContents( + cell.getNumericCellValue(), style.getDataFormat(), + style.getDataFormatString() ) ); + break; + case HSSFCell.CELL_TYPE_BOOLEAN: + value = String.valueOf( cell.getBooleanCellValue() ); + break; + case HSSFCell.CELL_TYPE_ERROR: + value = ErrorEval.getText( cell.getErrorCellValue() ); + break; + default: + value = ExcelToHtmlUtils.EMPTY; + break; + } + break; + case HSSFCell.CELL_TYPE_BLANK: + value = ExcelToHtmlUtils.EMPTY; + break; + case HSSFCell.CELL_TYPE_NUMERIC: + value = _formatter.formatCellValue( cell ); + break; + case HSSFCell.CELL_TYPE_BOOLEAN: + value = String.valueOf( cell.getBooleanCellValue() ); + break; + case HSSFCell.CELL_TYPE_ERROR: + value = ErrorEval.getText( cell.getErrorCellValue() ); + break; + default: + return true; + } + + return ExcelToHtmlUtils.isEmpty( value ); + } + + public void setOutputColumnHeaders( boolean outputColumnHeaders ) + { + this.outputColumnHeaders = outputColumnHeaders; + } + + public void setOutputHiddenColumns( boolean outputZeroWidthColumns ) + { + this.outputHiddenColumns = outputZeroWidthColumns; + } + + public void setOutputHiddenRows( boolean outputZeroHeightRows ) + { + this.outputHiddenRows = outputZeroHeightRows; + } + + public void setOutputLeadingSpacesAsNonBreaking( + boolean outputPrePostSpacesAsNonBreaking ) + { + this.outputLeadingSpacesAsNonBreaking = outputPrePostSpacesAsNonBreaking; + } + + public void setOutputRowNumbers( boolean outputRowNumbers ) + { + this.outputRowNumbers = outputRowNumbers; + } + +} diff --git a/src/scratchpad/src/org/apache/poi/hssf/converter/AbstractExcelUtils.java b/src/scratchpad/src/org/apache/poi/hssf/converter/AbstractExcelUtils.java new file mode 100644 index 0000000000..fe95df1d96 --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hssf/converter/AbstractExcelUtils.java @@ -0,0 +1,81 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.hssf.converter; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; + +import org.apache.poi.hssf.usermodel.HSSFWorkbook; +import org.apache.poi.hwpf.converter.AbstractWordUtils; +import org.apache.poi.util.Beta; +import org.apache.poi.util.IOUtils; + +/** + * Common class for {@link ExcelToFoUtils} and {@link ExcelToHtmlUtils} + * + * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com) + * @see AbstractWordUtils + */ +@Beta +public class AbstractExcelUtils +{ + static final String EMPTY = ""; + private static final short EXCEL_COLUMN_WIDTH_FACTOR = 256; + private static final int UNIT_OFFSET_LENGTH = 7; + + /** + * See here for Xio explanation and details + */ + public static int getColumnWidthInPx( int widthUnits ) + { + int pixels = ( widthUnits / EXCEL_COLUMN_WIDTH_FACTOR ) + * UNIT_OFFSET_LENGTH; + + int offsetWidthUnits = widthUnits % EXCEL_COLUMN_WIDTH_FACTOR; + pixels += Math.round( offsetWidthUnits + / ( (float) EXCEL_COLUMN_WIDTH_FACTOR / UNIT_OFFSET_LENGTH ) ); + + return pixels; + } + + static boolean isEmpty( String str ) + { + return str == null || str.length() == 0; + } + + static boolean isNotEmpty( String str ) + { + return !isEmpty( str ); + } + + public static HSSFWorkbook loadXls( File xlsFile ) throws IOException + { + final FileInputStream inputStream = new FileInputStream( xlsFile ); + try + { + return new HSSFWorkbook( inputStream ); + } + finally + { + IOUtils.closeQuietly( inputStream ); + } + } + +} diff --git a/src/scratchpad/src/org/apache/poi/hssf/converter/ExcelToFoConverter.java b/src/scratchpad/src/org/apache/poi/hssf/converter/ExcelToFoConverter.java new file mode 100644 index 0000000000..8c1e30416a --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hssf/converter/ExcelToFoConverter.java @@ -0,0 +1,644 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.hssf.converter; + +import java.io.File; +import java.io.FileWriter; +import java.util.ArrayList; +import java.util.List; + +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.transform.OutputKeys; +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerFactory; +import javax.xml.transform.dom.DOMSource; +import javax.xml.transform.stream.StreamResult; + +import org.apache.poi.hpsf.SummaryInformation; +import org.apache.poi.hssf.usermodel.HSSFCell; +import org.apache.poi.hssf.usermodel.HSSFCellStyle; +import org.apache.poi.hssf.usermodel.HSSFRichTextString; +import org.apache.poi.hssf.usermodel.HSSFRow; +import org.apache.poi.hssf.usermodel.HSSFSheet; +import org.apache.poi.hssf.usermodel.HSSFWorkbook; +import org.apache.poi.hwpf.converter.FoDocumentFacade; +import org.apache.poi.ss.formula.eval.ErrorEval; +import org.apache.poi.ss.util.CellRangeAddress; +import org.apache.poi.util.Beta; +import org.apache.poi.util.POILogFactory; +import org.apache.poi.util.POILogger; +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.Text; + +/** + * Converts xls files (97-2007) to XSL FO. + * + * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com) + */ +@Beta +public class ExcelToFoConverter extends AbstractExcelConverter +{ + private static final float CM_PER_INCH = 2.54f; + + private static final float DPI = 72; + + private static final POILogger logger = POILogFactory + .getLogger( ExcelToFoConverter.class ); + + private static final float PAPER_A4_HEIGHT_INCHES = 29.4f / CM_PER_INCH; + + private static final float PAPER_A4_WIDTH_INCHES = 21.0f / CM_PER_INCH; + + /** + * Java main() interface to interact with {@link ExcelToFoConverter} + * + *

+ * Usage: ExcelToHtmlConverter infile outfile + *

+ * Where infile is an input .xls file ( Word 97-2007) which will be rendered + * as XSL FO into outfile + */ + public static void main( String[] args ) + { + if ( args.length < 2 ) + { + System.err + .println( "Usage: ExcelToFoConverter " ); + return; + } + + System.out.println( "Converting " + args[0] ); + System.out.println( "Saving output to " + args[1] ); + try + { + Document doc = ExcelToHtmlConverter.process( new File( args[0] ) ); + + FileWriter out = new FileWriter( args[1] ); + DOMSource domSource = new DOMSource( doc ); + StreamResult streamResult = new StreamResult( out ); + + TransformerFactory tf = TransformerFactory.newInstance(); + Transformer serializer = tf.newTransformer(); + // TODO set encoding from a command argument + serializer.setOutputProperty( OutputKeys.ENCODING, "UTF-8" ); + serializer.setOutputProperty( OutputKeys.INDENT, "no" ); + serializer.setOutputProperty( OutputKeys.METHOD, "xml" ); + serializer.transform( domSource, streamResult ); + out.close(); + } + catch ( Exception e ) + { + e.printStackTrace(); + } + } + + /** + * Converts Excel file (97-2007) into XSL FO file. + * + * @param xlsFile + * file to process + * @return DOM representation of result XSL FO + */ + public static Document process( File xlsFile ) throws Exception + { + final HSSFWorkbook workbook = ExcelToFoUtils.loadXls( xlsFile ); + ExcelToFoConverter excelToHtmlConverter = new ExcelToFoConverter( + DocumentBuilderFactory.newInstance().newDocumentBuilder() + .newDocument() ); + excelToHtmlConverter.processWorkbook( workbook ); + return excelToHtmlConverter.getDocument(); + } + + private final FoDocumentFacade foDocumentFacade; + + public ExcelToFoConverter( Document document ) + { + this.foDocumentFacade = new FoDocumentFacade( document ); + } + + protected String createPageMaster( HSSFSheet sheet, int maxSheetColumns, + String pageMasterName ) + { + final float paperHeightIn; + final float paperWidthIn; + { + float requiredWidthIn = ExcelToFoUtils + .getColumnWidthInPx( getSheetWidth( sheet, maxSheetColumns ) ) + / DPI + 2; + + if ( requiredWidthIn < PAPER_A4_WIDTH_INCHES ) + { + // portrait orientation + paperWidthIn = PAPER_A4_WIDTH_INCHES; + paperHeightIn = PAPER_A4_HEIGHT_INCHES; + } + else + { + // landscape orientation + paperWidthIn = requiredWidthIn; + paperHeightIn = paperWidthIn + * ( PAPER_A4_WIDTH_INCHES / PAPER_A4_HEIGHT_INCHES ); + } + } + + final float leftMargin = 1; + final float rightMargin = 1; + final float topMargin = 1; + final float bottomMargin = 1; + + Element pageMaster = foDocumentFacade + .addSimplePageMaster( pageMasterName ); + pageMaster.setAttribute( "page-height", paperHeightIn + "in" ); + pageMaster.setAttribute( "page-width", paperWidthIn + "in" ); + + Element regionBody = foDocumentFacade.addRegionBody( pageMaster ); + regionBody.setAttribute( "margin", topMargin + "in " + rightMargin + + "in " + bottomMargin + "in " + leftMargin + "in" ); + + return pageMasterName; + } + + @Override + protected Document getDocument() + { + return foDocumentFacade.getDocument(); + } + + protected int getSheetWidth( HSSFSheet sheet, int maxSheetColumns ) + { + int width = 0; + if ( isOutputRowNumbers() ) + { + width += sheet.getDefaultColumnWidth(); + } + + for ( int columnIndex = 0; columnIndex < maxSheetColumns; columnIndex++ ) + { + if ( !isOutputHiddenColumns() && sheet.isColumnHidden( columnIndex ) ) + continue; + width += sheet.getColumnWidth( columnIndex ); + } + return width; + } + + protected boolean processCell( HSSFCell cell, Element tableCellElement, + int normalWidthPx, int maxSpannedWidthPx, float normalHeightPt ) + { + final HSSFCellStyle cellStyle = cell.getCellStyle(); + + String value; + switch ( cell.getCellType() ) + { + case HSSFCell.CELL_TYPE_STRING: + // XXX: enrich + value = cell.getRichStringCellValue().getString(); + break; + case HSSFCell.CELL_TYPE_FORMULA: + switch ( cell.getCachedFormulaResultType() ) + { + case HSSFCell.CELL_TYPE_STRING: + HSSFRichTextString str = cell.getRichStringCellValue(); + if ( str != null && str.length() > 0 ) + { + value = ( str.toString() ); + } + else + { + value = ExcelToHtmlUtils.EMPTY; + } + break; + case HSSFCell.CELL_TYPE_NUMERIC: + HSSFCellStyle style = cellStyle; + if ( style == null ) + { + value = String.valueOf( cell.getNumericCellValue() ); + } + else + { + value = ( _formatter.formatRawCellContents( + cell.getNumericCellValue(), style.getDataFormat(), + style.getDataFormatString() ) ); + } + break; + case HSSFCell.CELL_TYPE_BOOLEAN: + value = String.valueOf( cell.getBooleanCellValue() ); + break; + case HSSFCell.CELL_TYPE_ERROR: + value = ErrorEval.getText( cell.getErrorCellValue() ); + break; + default: + logger.log( + POILogger.WARN, + "Unexpected cell cachedFormulaResultType (" + + cell.getCachedFormulaResultType() + ")" ); + value = ExcelToHtmlUtils.EMPTY; + break; + } + break; + case HSSFCell.CELL_TYPE_BLANK: + value = ExcelToHtmlUtils.EMPTY; + break; + case HSSFCell.CELL_TYPE_NUMERIC: + value = _formatter.formatCellValue( cell ); + break; + case HSSFCell.CELL_TYPE_BOOLEAN: + value = String.valueOf( cell.getBooleanCellValue() ); + break; + case HSSFCell.CELL_TYPE_ERROR: + value = ErrorEval.getText( cell.getErrorCellValue() ); + break; + default: + logger.log( POILogger.WARN, + "Unexpected cell type (" + cell.getCellType() + ")" ); + return true; + } + + final boolean noText = ExcelToHtmlUtils.isEmpty( value ); + // final boolean wrapInDivs = !noText && isUseDivsToSpan() + // && !cellStyle.getWrapText(); + + final short cellStyleIndex = cellStyle.getIndex(); + if ( cellStyleIndex != 0 ) + { + // HSSFWorkbook workbook = cell.getRow().getSheet().getWorkbook(); + // String mainCssClass = getStyleClassName( workbook, cellStyle ); + // if ( wrapInDivs ) + // { + // tableCellElement.setAttribute( "class", mainCssClass + " " + // + cssClassContainerCell ); + // } + // else + // { + // tableCellElement.setAttribute( "class", mainCssClass ); + // } + + if ( noText ) + { + /* + * if cell style is defined (like borders, etc.) but cell text + * is empty, add " " to output, so browser won't collapse + * and ignore cell + */ + value = "\u00A0"; + } + } + + if ( isOutputLeadingSpacesAsNonBreaking() && value.startsWith( " " ) ) + { + StringBuilder builder = new StringBuilder(); + for ( int c = 0; c < value.length(); c++ ) + { + if ( value.charAt( c ) != ' ' ) + break; + builder.append( '\u00a0' ); + } + + if ( value.length() != builder.length() ) + builder.append( value.substring( builder.length() ) ); + + value = builder.toString(); + } + + Text text = foDocumentFacade.createText( value ); + + // if ( wrapInDivs ) + // { + // Element outerDiv = htmlDocumentFacade.createBlock(); + // outerDiv.setAttribute( "class", this.cssClassContainerDiv ); + // + // Element innerDiv = htmlDocumentFacade.createBlock(); + // StringBuilder innerDivStyle = new StringBuilder(); + // innerDivStyle.append( "position:absolute;min-width:" ); + // innerDivStyle.append( normalWidthPx ); + // innerDivStyle.append( "px;" ); + // if ( maxSpannedWidthPx != Integer.MAX_VALUE ) + // { + // innerDivStyle.append( "max-width:" ); + // innerDivStyle.append( maxSpannedWidthPx ); + // innerDivStyle.append( "px;" ); + // } + // innerDivStyle.append( "overflow:hidden;max-height:" ); + // innerDivStyle.append( normalHeightPt ); + // innerDivStyle.append( "pt;white-space:nowrap;" ); + // ExcelToHtmlUtils.appendAlign( innerDivStyle, + // cellStyle.getAlignment() ); + // htmlDocumentFacade.addStyleClass( outerDiv, "d", + // innerDivStyle.toString() ); + // + // innerDiv.appendChild( text ); + // outerDiv.appendChild( innerDiv ); + // tableCellElement.appendChild( outerDiv ); + // } + // else + { + tableCellElement.appendChild( text ); + } + + return ExcelToHtmlUtils.isEmpty( value ) && cellStyleIndex == 0; + } + + protected void processColumnHeaders( HSSFSheet sheet, int maxSheetColumns, + Element table ) + { + Element tableHeader = foDocumentFacade.createTableHeader(); + Element row = foDocumentFacade.createTableRow(); + + if ( isOutputRowNumbers() ) + { + // empty cell at left-top corner + row.appendChild( foDocumentFacade.createTableCell() ); + } + + for ( int c = 0; c < maxSheetColumns; c++ ) + { + if ( !isOutputHiddenColumns() && sheet.isColumnHidden( c ) ) + continue; + + Element cell = foDocumentFacade.createTableCell(); + String text = getColumnName( c ); + cell.appendChild( foDocumentFacade.createText( text ) ); + row.appendChild( cell ); + } + + tableHeader.appendChild( row ); + table.appendChild( tableHeader ); + } + + /** + * Creates COLGROUP element with width specified for all columns. (Except + * first if {@link #isOutputRowNumbers()}==true) + */ + protected void processColumnWidths( HSSFSheet sheet, int maxSheetColumns, + Element table ) + { + if ( isOutputRowNumbers() ) + { + table.appendChild( foDocumentFacade.createTableColumn() ); + } + + for ( int c = 0; c < maxSheetColumns; c++ ) + { + if ( !isOutputHiddenColumns() && sheet.isColumnHidden( c ) ) + continue; + + Element col = foDocumentFacade.createTableColumn(); + col.setAttribute( "column-width", + String.valueOf( getColumnWidth( sheet, c ) / DPI ) + "in" ); + table.appendChild( col ); + } + } + + protected void processDocumentInformation( + SummaryInformation summaryInformation ) + { + if ( ExcelToFoUtils.isNotEmpty( summaryInformation.getTitle() ) ) + foDocumentFacade.setTitle( summaryInformation.getTitle() ); + + if ( ExcelToFoUtils.isNotEmpty( summaryInformation.getAuthor() ) ) + foDocumentFacade.setCreator( summaryInformation.getAuthor() ); + + if ( ExcelToFoUtils.isNotEmpty( summaryInformation.getKeywords() ) ) + foDocumentFacade.setKeywords( summaryInformation.getKeywords() ); + + if ( ExcelToFoUtils.isNotEmpty( summaryInformation.getComments() ) ) + foDocumentFacade.setDescription( summaryInformation.getComments() ); + } + + /** + * @return maximum 1-base index of column that were rendered, zero if none + */ + protected int processRow( CellRangeAddress[][] mergedRanges, HSSFRow row, + Element tableRowElement ) + { + final HSSFSheet sheet = row.getSheet(); + final short maxColIx = row.getLastCellNum(); + if ( maxColIx <= 0 ) + return 0; + + final List emptyCells = new ArrayList( maxColIx ); + + if ( isOutputRowNumbers() ) + { + Element tableRowNumberCellElement = foDocumentFacade + .createTableCell(); + processRowNumber( row, tableRowNumberCellElement ); + emptyCells.add( tableRowNumberCellElement ); + } + + int maxRenderedColumn = 0; + for ( int colIx = 0; colIx < maxColIx; colIx++ ) + { + if ( !isOutputHiddenColumns() && sheet.isColumnHidden( colIx ) ) + continue; + + CellRangeAddress range = ExcelToHtmlUtils.getMergedRange( + mergedRanges, row.getRowNum(), colIx ); + + if ( range != null + && ( range.getFirstColumn() != colIx || range.getFirstRow() != row + .getRowNum() ) ) + continue; + + HSSFCell cell = row.getCell( colIx ); + + int divWidthPx = 0; + // if ( isUseDivsToSpan() ) + // { + // divWidthPx = getColumnWidth( sheet, colIx ); + // + // boolean hasBreaks = false; + // for ( int nextColumnIndex = colIx + 1; nextColumnIndex < + // maxColIx; nextColumnIndex++ ) + // { + // if ( !isOutputHiddenColumns() + // && sheet.isColumnHidden( nextColumnIndex ) ) + // continue; + // + // if ( row.getCell( nextColumnIndex ) != null + // && !isTextEmpty( row.getCell( nextColumnIndex ) ) ) + // { + // hasBreaks = true; + // break; + // } + // + // divWidthPx += getColumnWidth( sheet, nextColumnIndex ); + // } + // + // if ( !hasBreaks ) + // divWidthPx = Integer.MAX_VALUE; + // } + + Element tableCellElement = foDocumentFacade.createTableCell(); + + if ( range != null ) + { + if ( range.getFirstColumn() != range.getLastColumn() ) + tableCellElement.setAttribute( + "number-columns-spanned", + String.valueOf( range.getLastColumn() + - range.getFirstColumn() + 1 ) ); + if ( range.getFirstRow() != range.getLastRow() ) + tableCellElement.setAttribute( + "number-rows-spanned", + String.valueOf( range.getLastRow() + - range.getFirstRow() + 1 ) ); + } + + boolean emptyCell; + if ( cell != null ) + { + emptyCell = processCell( cell, tableCellElement, + getColumnWidth( sheet, colIx ), divWidthPx, + row.getHeight() / 20f ); + } + else + { + emptyCell = true; + } + + if ( emptyCell ) + { + emptyCells.add( tableCellElement ); + } + else + { + for ( Element emptyCellElement : emptyCells ) + { + tableRowElement.appendChild( emptyCellElement ); + } + emptyCells.clear(); + + tableRowElement.appendChild( tableCellElement ); + maxRenderedColumn = colIx; + } + } + + return maxRenderedColumn + 1; + } + + protected void processRowNumber( HSSFRow row, + Element tableRowNumberCellElement ) + { + Text text = foDocumentFacade.createText( getRowName( row ) ); + tableRowNumberCellElement.appendChild( text ); + } + + protected int processSheet( HSSFSheet sheet, Element flow ) + { + final int physicalNumberOfRows = sheet.getPhysicalNumberOfRows(); + if ( physicalNumberOfRows <= 0 ) + return 0; + + processSheetName( sheet, flow ); + + Element table = foDocumentFacade.createTable(); + Element tableBody = foDocumentFacade.createTableBody(); + + final CellRangeAddress[][] mergedRanges = ExcelToHtmlUtils + .buildMergedRangesMap( sheet ); + + final List emptyRowElements = new ArrayList( + physicalNumberOfRows ); + int maxSheetColumns = 1; + for ( int r = 0; r < physicalNumberOfRows; r++ ) + { + HSSFRow row = sheet.getRow( r ); + + if ( row == null ) + continue; + + if ( !isOutputHiddenRows() && row.getZeroHeight() ) + continue; + + Element tableRowElement = foDocumentFacade.createTableRow(); + tableRowElement.setAttribute( "height", row.getHeight() / 20f + + "pt" ); + + int maxRowColumnNumber = processRow( mergedRanges, row, + tableRowElement ); + + if ( maxRowColumnNumber == 0 ) + { + emptyRowElements.add( tableRowElement ); + } + else + { + if ( !emptyRowElements.isEmpty() ) + { + for ( Element emptyRowElement : emptyRowElements ) + { + tableBody.appendChild( emptyRowElement ); + } + emptyRowElements.clear(); + } + + tableBody.appendChild( tableRowElement ); + } + maxSheetColumns = Math.max( maxSheetColumns, maxRowColumnNumber ); + } + + processColumnWidths( sheet, maxSheetColumns, table ); + + if ( isOutputColumnHeaders() ) + { + processColumnHeaders( sheet, maxSheetColumns, table ); + } + + table.appendChild( tableBody ); + flow.appendChild( table ); + + return maxSheetColumns; + } + + protected void processSheetName( HSSFSheet sheet, Element flow ) + { + Element titleBlock = foDocumentFacade.createBlock(); + Element titleInline = foDocumentFacade.createInline(); + titleInline.appendChild( foDocumentFacade.createText( sheet + .getSheetName() ) ); + titleBlock.appendChild( titleInline ); + flow.appendChild( titleBlock ); + } + + public void processWorkbook( HSSFWorkbook workbook ) + { + final SummaryInformation summaryInformation = workbook + .getSummaryInformation(); + if ( summaryInformation != null ) + { + processDocumentInformation( summaryInformation ); + } + + for ( int s = 0; s < workbook.getNumberOfSheets(); s++ ) + { + String pageMasterName = "sheet-" + s; + + Element pageSequence = foDocumentFacade + .addPageSequence( pageMasterName ); + Element flow = foDocumentFacade.addFlowToPageSequence( + pageSequence, "xsl-region-body" ); + + HSSFSheet sheet = workbook.getSheetAt( s ); + int maxSheetColumns = processSheet( sheet, flow ); + + if ( maxSheetColumns != 0 ) + createPageMaster( sheet, maxSheetColumns, pageMasterName ); + } + } + +} diff --git a/src/scratchpad/src/org/apache/poi/hssf/converter/ExcelToFoUtils.java b/src/scratchpad/src/org/apache/poi/hssf/converter/ExcelToFoUtils.java new file mode 100644 index 0000000000..2682550b2a --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hssf/converter/ExcelToFoUtils.java @@ -0,0 +1,25 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.hssf.converter; + +import org.apache.poi.util.Beta; + +@Beta +public class ExcelToFoUtils extends AbstractExcelUtils +{ + +} diff --git a/src/scratchpad/src/org/apache/poi/hssf/converter/ExcelToHtmlConverter.java b/src/scratchpad/src/org/apache/poi/hssf/converter/ExcelToHtmlConverter.java index b25d221a86..946770fd00 100644 --- a/src/scratchpad/src/org/apache/poi/hssf/converter/ExcelToHtmlConverter.java +++ b/src/scratchpad/src/org/apache/poi/hssf/converter/ExcelToHtmlConverter.java @@ -33,7 +33,6 @@ import javax.xml.transform.stream.StreamResult; import org.apache.poi.hpsf.SummaryInformation; import org.apache.poi.hssf.usermodel.HSSFCell; import org.apache.poi.hssf.usermodel.HSSFCellStyle; -import org.apache.poi.hssf.usermodel.HSSFDataFormatter; import org.apache.poi.hssf.usermodel.HSSFFont; import org.apache.poi.hssf.usermodel.HSSFRichTextString; import org.apache.poi.hssf.usermodel.HSSFRow; @@ -43,6 +42,7 @@ import org.apache.poi.hssf.util.HSSFColor; import org.apache.poi.hwpf.converter.HtmlDocumentFacade; import org.apache.poi.ss.formula.eval.ErrorEval; import org.apache.poi.ss.util.CellRangeAddress; +import org.apache.poi.util.Beta; import org.apache.poi.util.POILogFactory; import org.apache.poi.util.POILogger; import org.w3c.dom.Document; @@ -54,18 +54,13 @@ import org.w3c.dom.Text; * * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com) */ -public class ExcelToHtmlConverter +@Beta +public class ExcelToHtmlConverter extends AbstractExcelConverter { private static final POILogger logger = POILogFactory .getLogger( ExcelToHtmlConverter.class ); - protected static int getColumnWidth( HSSFSheet sheet, int columnIndex ) - { - return ExcelToHtmlUtils.getColumnWidthInPx( sheet - .getColumnWidth( columnIndex ) ); - } - /** * Java main() interface to interact with {@link ExcelToHtmlConverter} * @@ -80,7 +75,7 @@ public class ExcelToHtmlConverter if ( args.length < 2 ) { System.err - .println( "Usage: ExcelToHtmlConverter " ); + .println( "Usage: ExcelToHtmlConverter " ); return; } @@ -98,7 +93,7 @@ public class ExcelToHtmlConverter Transformer serializer = tf.newTransformer(); // TODO set encoding from a command argument serializer.setOutputProperty( OutputKeys.ENCODING, "UTF-8" ); - serializer.setOutputProperty( OutputKeys.INDENT, "yes" ); + serializer.setOutputProperty( OutputKeys.INDENT, "no" ); serializer.setOutputProperty( OutputKeys.METHOD, "html" ); serializer.transform( domSource, streamResult ); out.close(); @@ -126,8 +121,6 @@ public class ExcelToHtmlConverter return excelToHtmlConverter.getDocument(); } - private final HSSFDataFormatter _formatter = new HSSFDataFormatter(); - private String cssClassContainerCell = null; private String cssClassContainerDiv = null; @@ -138,16 +131,6 @@ public class ExcelToHtmlConverter private final HtmlDocumentFacade htmlDocumentFacade; - private boolean outputColumnHeaders = true; - - private boolean outputHiddenColumns = false; - - private boolean outputHiddenRows = false; - - private boolean outputLeadingSpacesAsNonBreaking = true; - - private boolean outputRowNumbers = true; - private boolean useDivsToSpan = false; public ExcelToHtmlConverter( Document doc ) @@ -251,32 +234,11 @@ public class ExcelToHtmlConverter } } - /** - * Generates name for output as column header in case - * {@link #isOutputColumnHeaders()} == true - * - * @param columnIndex - * 0-based column index - */ - protected String getColumnName( int columnIndex ) - { - return String.valueOf( columnIndex + 1 ); - } - public Document getDocument() { return htmlDocumentFacade.getDocument(); } - /** - * Generates name for output as row number in case - * {@link #isOutputRowNumbers()} == true - */ - private String getRowName( HSSFRow row ) - { - return String.valueOf( row.getRowNum() + 1 ); - } - protected String getStyleClassName( HSSFWorkbook workbook, HSSFCellStyle cellStyle ) { @@ -293,91 +255,6 @@ public class ExcelToHtmlConverter return cssClass; } - public boolean isOutputColumnHeaders() - { - return outputColumnHeaders; - } - - public boolean isOutputHiddenColumns() - { - return outputHiddenColumns; - } - - public boolean isOutputHiddenRows() - { - return outputHiddenRows; - } - - public boolean isOutputLeadingSpacesAsNonBreaking() - { - return outputLeadingSpacesAsNonBreaking; - } - - public boolean isOutputRowNumbers() - { - return outputRowNumbers; - } - - protected boolean isTextEmpty( HSSFCell cell ) - { - final String value; - switch ( cell.getCellType() ) - { - case HSSFCell.CELL_TYPE_STRING: - // XXX: enrich - value = cell.getRichStringCellValue().getString(); - break; - case HSSFCell.CELL_TYPE_FORMULA: - switch ( cell.getCachedFormulaResultType() ) - { - case HSSFCell.CELL_TYPE_STRING: - HSSFRichTextString str = cell.getRichStringCellValue(); - if ( str == null || str.length() <= 0 ) - return false; - - value = str.toString(); - break; - case HSSFCell.CELL_TYPE_NUMERIC: - HSSFCellStyle style = cell.getCellStyle(); - if ( style == null ) - { - return false; - } - - value = ( _formatter.formatRawCellContents( - cell.getNumericCellValue(), style.getDataFormat(), - style.getDataFormatString() ) ); - break; - case HSSFCell.CELL_TYPE_BOOLEAN: - value = String.valueOf( cell.getBooleanCellValue() ); - break; - case HSSFCell.CELL_TYPE_ERROR: - value = ErrorEval.getText( cell.getErrorCellValue() ); - break; - default: - value = ExcelToHtmlUtils.EMPTY; - break; - } - break; - case HSSFCell.CELL_TYPE_BLANK: - value = ExcelToHtmlUtils.EMPTY; - break; - case HSSFCell.CELL_TYPE_NUMERIC: - value = _formatter.formatCellValue( cell ); - break; - case HSSFCell.CELL_TYPE_BOOLEAN: - value = String.valueOf( cell.getBooleanCellValue() ); - break; - case HSSFCell.CELL_TYPE_ERROR: - value = ErrorEval.getText( cell.getErrorCellValue() ); - break; - default: - return true; - } - - return ExcelToHtmlUtils.isEmpty( value ); - } - public boolean isUseDivsToSpan() { return useDivsToSpan; @@ -831,32 +708,6 @@ public class ExcelToHtmlConverter htmlDocumentFacade.updateStylesheet(); } - public void setOutputColumnHeaders( boolean outputColumnHeaders ) - { - this.outputColumnHeaders = outputColumnHeaders; - } - - public void setOutputHiddenColumns( boolean outputZeroWidthColumns ) - { - this.outputHiddenColumns = outputZeroWidthColumns; - } - - public void setOutputHiddenRows( boolean outputZeroHeightRows ) - { - this.outputHiddenRows = outputZeroHeightRows; - } - - public void setOutputLeadingSpacesAsNonBreaking( - boolean outputPrePostSpacesAsNonBreaking ) - { - this.outputLeadingSpacesAsNonBreaking = outputPrePostSpacesAsNonBreaking; - } - - public void setOutputRowNumbers( boolean outputRowNumbers ) - { - this.outputRowNumbers = outputRowNumbers; - } - /** * Allows converter to wrap content into two additional DIVs with tricky * styles, so it will wrap across empty cells (like in Excel). diff --git a/src/scratchpad/src/org/apache/poi/hssf/converter/ExcelToHtmlUtils.java b/src/scratchpad/src/org/apache/poi/hssf/converter/ExcelToHtmlUtils.java index 987b2bf040..3cb1744bfc 100644 --- a/src/scratchpad/src/org/apache/poi/hssf/converter/ExcelToHtmlUtils.java +++ b/src/scratchpad/src/org/apache/poi/hssf/converter/ExcelToHtmlUtils.java @@ -16,25 +16,17 @@ ==================================================================== */ package org.apache.poi.hssf.converter; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; import java.util.Arrays; import org.apache.poi.hssf.usermodel.HSSFCellStyle; import org.apache.poi.hssf.usermodel.HSSFSheet; -import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.hssf.util.HSSFColor; import org.apache.poi.ss.util.CellRangeAddress; -import org.apache.poi.util.IOUtils; +import org.apache.poi.util.Beta; -public class ExcelToHtmlUtils +@Beta +public class ExcelToHtmlUtils extends AbstractExcelUtils { - static final String EMPTY = ""; - - private static final short EXCEL_COLUMN_WIDTH_FACTOR = 256; - private static final int UNIT_OFFSET_LENGTH = 7; - public static void appendAlign( StringBuilder style, short alignment ) { switch ( alignment ) @@ -196,23 +188,6 @@ public class ExcelToHtmlUtils return result; } - /** - * See here for Xio explanation and details - */ - public static int getColumnWidthInPx( int widthUnits ) - { - int pixels = ( widthUnits / EXCEL_COLUMN_WIDTH_FACTOR ) - * UNIT_OFFSET_LENGTH; - - int offsetWidthUnits = widthUnits % EXCEL_COLUMN_WIDTH_FACTOR; - pixels += Math.round( offsetWidthUnits - / ( (float) EXCEL_COLUMN_WIDTH_FACTOR / UNIT_OFFSET_LENGTH ) ); - - return pixels; - } - /** * @param mergedRanges * map of sheet merged ranges built with @@ -232,27 +207,4 @@ public class ExcelToHtmlUtils return cellRangeAddress; } - static boolean isEmpty( String str ) - { - return str == null || str.length() == 0; - } - - static boolean isNotEmpty( String str ) - { - return !isEmpty( str ); - } - - public static HSSFWorkbook loadXls( File xlsFile ) throws IOException - { - final FileInputStream inputStream = new FileInputStream( xlsFile ); - try - { - return new HSSFWorkbook( inputStream ); - } - finally - { - IOUtils.closeQuietly( inputStream ); - } - } - } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/FoDocumentFacade.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/FoDocumentFacade.java index 42fa15044f..fd49c47d5e 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/converter/FoDocumentFacade.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/converter/FoDocumentFacade.java @@ -110,7 +110,7 @@ public class FoDocumentFacade return simplePageMaster; } - protected Element createBasicLinkExternal( String externalDestination ) + public Element createBasicLinkExternal( String externalDestination ) { final Element basicLink = document.createElementNS( NS_XSLFO, "fo:basic-link" ); @@ -184,32 +184,37 @@ public class FoDocumentFacade return result; } - protected Element createTable() + public Element createTable() { return document.createElementNS( NS_XSLFO, "fo:table" ); } - protected Element createTableBody() + public Element createTableBody() { return document.createElementNS( NS_XSLFO, "fo:table-body" ); } - protected Element createTableCell() + public Element createTableCell() { return document.createElementNS( NS_XSLFO, "fo:table-cell" ); } - protected Element createTableHeader() + public Element createTableColumn() + { + return document.createElementNS( NS_XSLFO, "fo:table-column" ); + } + + public Element createTableHeader() { return document.createElementNS( NS_XSLFO, "fo:table-header" ); } - protected Element createTableRow() + public Element createTableRow() { return document.createElementNS( NS_XSLFO, "fo:table-row" ); } - protected Text createText( String data ) + public Text createText( String data ) { return document.createTextNode( data ); } diff --git a/src/scratchpad/testcases/org/apache/poi/hssf/converter/TestExcelToHtmlConverterSuite.java b/src/scratchpad/testcases/org/apache/poi/hssf/converter/TestExcelConverterSuite.java similarity index 68% rename from src/scratchpad/testcases/org/apache/poi/hssf/converter/TestExcelToHtmlConverterSuite.java rename to src/scratchpad/testcases/org/apache/poi/hssf/converter/TestExcelConverterSuite.java index 5454703b37..736f7577ac 100644 --- a/src/scratchpad/testcases/org/apache/poi/hssf/converter/TestExcelToHtmlConverterSuite.java +++ b/src/scratchpad/testcases/org/apache/poi/hssf/converter/TestExcelConverterSuite.java @@ -36,7 +36,7 @@ import junit.framework.TestSuite; import org.apache.poi.POIDataSamples; import org.apache.poi.hssf.usermodel.HSSFWorkbook; -public class TestExcelToHtmlConverterSuite +public class TestExcelConverterSuite { /** * YK: a quick hack to exclude failing documents from the suite. @@ -48,7 +48,7 @@ public class TestExcelToHtmlConverterSuite public static Test suite() { TestSuite suite = new TestSuite( - TestExcelToHtmlConverterSuite.class.getName() ); + TestExcelConverterSuite.class.getName() ); File directory = POIDataSamples.getSpreadSheetInstance().getFile( "../spreadsheet" ); @@ -61,20 +61,26 @@ public class TestExcelToHtmlConverterSuite } ) ) { final String name = child.getName(); + suite.addTest( new TestCase( name + " [FO]" ) + { + public void runTest() throws Exception + { + testFo( child ); + } + } ); suite.addTest( new TestCase( name + " [HTML]" ) { public void runTest() throws Exception { - test( child, true ); + testHtml( child ); } } ); - } return suite; } - protected static void test( File child, boolean html ) throws Exception + protected static void testFo( File child ) throws Exception { HSSFWorkbook workbook; try @@ -98,13 +104,39 @@ public class TestExcelToHtmlConverterSuite .newTransformer(); transformer.setOutputProperty( OutputKeys.ENCODING, "utf-8" ); transformer.setOutputProperty( OutputKeys.INDENT, "yes" ); + transformer.setOutputProperty( OutputKeys.METHOD, "xml" ); transformer.transform( new DOMSource( excelToHtmlConverter.getDocument() ), new StreamResult( stringWriter ) ); + } - if ( html ) - transformer.setOutputProperty( OutputKeys.METHOD, "html" ); + protected static void testHtml( File child ) throws Exception + { + HSSFWorkbook workbook; + try + { + workbook = ExcelToHtmlUtils.loadXls( child ); + } + catch ( Exception exc ) + { + // unable to parse file -- not WordToFoConverter fault + return; + } + + ExcelToHtmlConverter excelToHtmlConverter = new ExcelToHtmlConverter( + DocumentBuilderFactory.newInstance().newDocumentBuilder() + .newDocument() ); + excelToHtmlConverter.processWorkbook( workbook ); - // no exceptions + StringWriter stringWriter = new StringWriter(); + + Transformer transformer = TransformerFactory.newInstance() + .newTransformer(); + transformer.setOutputProperty( OutputKeys.ENCODING, "utf-8" ); + transformer.setOutputProperty( OutputKeys.INDENT, "no" ); + transformer.setOutputProperty( OutputKeys.METHOD, "html" ); + transformer.transform( + new DOMSource( excelToHtmlConverter.getDocument() ), + new StreamResult( stringWriter ) ); } } -- 2.39.5