--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.hssf.usermodel.converter;
+
+import java.io.File;
+import java.io.FileWriter;
+import java.util.ArrayList;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Set;
+
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.transform.OutputKeys;
+import javax.xml.transform.Transformer;
+import javax.xml.transform.TransformerFactory;
+import javax.xml.transform.dom.DOMSource;
+import javax.xml.transform.stream.StreamResult;
+
+import org.apache.poi.hssf.usermodel.HSSFCell;
+import org.apache.poi.hssf.usermodel.HSSFCellStyle;
+import org.apache.poi.hssf.usermodel.HSSFDataFormatter;
+import org.apache.poi.hssf.usermodel.HSSFFont;
+import org.apache.poi.hssf.usermodel.HSSFRichTextString;
+import org.apache.poi.hssf.usermodel.HSSFRow;
+import org.apache.poi.hssf.usermodel.HSSFSheet;
+import org.apache.poi.hssf.usermodel.HSSFWorkbook;
+import org.apache.poi.hssf.util.HSSFColor;
+import org.apache.poi.hwpf.converter.HtmlDocumentFacade;
+import org.apache.poi.ss.formula.eval.ErrorEval;
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.Text;
+
+/**
+ * Converts xls files (97-2007) to HTML file.
+ *
+ * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
+ */
+public class ExcelToHtmlConverter
+{
+
+ private static final POILogger logger = POILogFactory
+ .getLogger( ExcelToHtmlConverter.class );
+
+ /**
+ * Java main() interface to interact with {@link ExcelToHtmlConverter}
+ *
+ * <p>
+ * Usage: ExcelToHtmlConverter infile outfile
+ * </p>
+ * Where infile is an input .xls file ( Word 97-2007) which will be rendered
+ * as HTML into outfile
+ */
+ public static void main( String[] args )
+ {
+ if ( args.length < 2 )
+ {
+ System.err
+ .println( "Usage: ExcelToHtmlConverter <inputFile.doc> <saveTo.html>" );
+ return;
+ }
+
+ System.out.println( "Converting " + args[0] );
+ System.out.println( "Saving output to " + args[1] );
+ try
+ {
+ Document doc = ExcelToHtmlConverter.process( new File( args[0] ) );
+
+ FileWriter out = new FileWriter( args[1] );
+ DOMSource domSource = new DOMSource( doc );
+ StreamResult streamResult = new StreamResult( out );
+
+ TransformerFactory tf = TransformerFactory.newInstance();
+ Transformer serializer = tf.newTransformer();
+ // TODO set encoding from a command argument
+ serializer.setOutputProperty( OutputKeys.ENCODING, "UTF-8" );
+ serializer.setOutputProperty( OutputKeys.INDENT, "yes" );
+ serializer.setOutputProperty( OutputKeys.METHOD, "html" );
+ serializer.transform( domSource, streamResult );
+ out.close();
+ }
+ catch ( Exception e )
+ {
+ e.printStackTrace();
+ }
+ }
+
+ /**
+ * Converts Excel file (97-2007) into HTML file.
+ *
+ * @param xlsFile
+ * file to process
+ * @return DOM representation of result HTML
+ */
+ public static Document process( File xlsFile ) throws Exception
+ {
+ final HSSFWorkbook workbook = ExcelToHtmlUtils.loadXls( xlsFile );
+ ExcelToHtmlConverter excelToHtmlConverter = new ExcelToHtmlConverter(
+ DocumentBuilderFactory.newInstance().newDocumentBuilder()
+ .newDocument() );
+ excelToHtmlConverter.processWorkbook( workbook );
+ return excelToHtmlConverter.getDocument();
+ }
+
+ private final HSSFDataFormatter _formatter = new HSSFDataFormatter();
+
+ private final HtmlDocumentFacade htmlDocumentFacade;
+
+ private final Element styles;
+
+ private final Set<Short> usedStyles = new LinkedHashSet<Short>();
+
+ public ExcelToHtmlConverter( Document doc )
+ {
+ htmlDocumentFacade = new HtmlDocumentFacade( doc );
+
+ styles = doc.createElement( "style" );
+ styles.setAttribute( "type", "text/css" );
+ htmlDocumentFacade.getHead().appendChild( styles );
+ }
+
+ private String buildStyle( HSSFWorkbook workbook, HSSFCellStyle cellStyle )
+ {
+ StringBuilder style = new StringBuilder();
+
+ style.append( "white-space: pre-wrap; " );
+
+ switch ( cellStyle.getAlignment() )
+ {
+ case HSSFCellStyle.ALIGN_CENTER:
+ style.append( "text-align: center; " );
+ break;
+ case HSSFCellStyle.ALIGN_CENTER_SELECTION:
+ style.append( "text-align: center; " );
+ break;
+ case HSSFCellStyle.ALIGN_FILL:
+ // XXX: shall we support fill?
+ break;
+ case HSSFCellStyle.ALIGN_GENERAL:
+ break;
+ case HSSFCellStyle.ALIGN_JUSTIFY:
+ style.append( "text-align: justify; " );
+ break;
+ case HSSFCellStyle.ALIGN_LEFT:
+ style.append( "text-align: left; " );
+ break;
+ case HSSFCellStyle.ALIGN_RIGHT:
+ style.append( "text-align: right; " );
+ break;
+ }
+
+ if ( cellStyle.getFillPattern() == 0 )
+ {
+ // no fill
+ }
+ else if ( cellStyle.getFillPattern() == 1 )
+ {
+ final HSSFColor foregroundColor = cellStyle
+ .getFillForegroundColorColor();
+ if ( foregroundColor != null )
+ style.append( "background-color: "
+ + ExcelToHtmlUtils.getColor( foregroundColor ) + "; " );
+ }
+ else
+ {
+ final HSSFColor backgroundColor = cellStyle
+ .getFillBackgroundColorColor();
+ if ( backgroundColor != null )
+ style.append( "background-color: "
+ + ExcelToHtmlUtils.getColor( backgroundColor ) + "; " );
+ }
+
+ buildStyle_border( workbook, style, "top", cellStyle.getBorderTop(),
+ cellStyle.getTopBorderColor() );
+ buildStyle_border( workbook, style, "right",
+ cellStyle.getBorderRight(), cellStyle.getRightBorderColor() );
+ buildStyle_border( workbook, style, "bottom",
+ cellStyle.getBorderBottom(), cellStyle.getBottomBorderColor() );
+ buildStyle_border( workbook, style, "left", cellStyle.getBorderLeft(),
+ cellStyle.getLeftBorderColor() );
+
+ HSSFFont font = cellStyle.getFont( workbook );
+ buildStyle_font( workbook, style, font );
+
+ return style.toString();
+ }
+
+ private void buildStyle_border( HSSFWorkbook workbook, StringBuilder style,
+ String type, short xlsBorder, short borderColor )
+ {
+ style.append( type + "-border-style: "
+ + ExcelToHtmlUtils.getBorderStyle( xlsBorder ) + "; " );
+
+ if ( xlsBorder == HSSFCellStyle.BORDER_NONE )
+ return;
+
+ style.append( type + "-border-width: "
+ + ExcelToHtmlUtils.getBorderWidth( xlsBorder ) + "; " );
+
+ final HSSFColor color = workbook.getCustomPalette().getColor(
+ borderColor );
+ if ( color != null )
+ style.append( type + "-border-color: "
+ + ExcelToHtmlUtils.getColor( color ) + "; " );
+ }
+
+ void buildStyle_font( HSSFWorkbook workbook, StringBuilder style,
+ HSSFFont font )
+ {
+ switch ( font.getBoldweight() )
+ {
+ case HSSFFont.BOLDWEIGHT_BOLD:
+ style.append( "font-weight: bold; " );
+ break;
+ case HSSFFont.BOLDWEIGHT_NORMAL:
+ style.append( "font-weight: normal; " );
+ break;
+ }
+
+ final HSSFColor fontColor = workbook.getCustomPalette().getColor(
+ font.getColor() );
+ if ( fontColor != null )
+ style.append( "color: " + ExcelToHtmlUtils.getColor( fontColor )
+ + "; " );
+
+ if ( font.getFontHeightInPoints() != 0 )
+ style.append( "font-size: " + font.getFontHeightInPoints() + "pt; " );
+
+ if ( font.getItalic() )
+ {
+ style.append( "font-style: italic; " );
+ }
+ }
+
+ public Document getDocument()
+ {
+ return htmlDocumentFacade.getDocument();
+ }
+
+ protected boolean processCell( HSSFCell cell, Element tableCellElement )
+ {
+ final HSSFCellStyle cellStyle = cell.getCellStyle();
+
+ String value;
+ switch ( cell.getCellType() )
+ {
+ case HSSFCell.CELL_TYPE_STRING:
+ // XXX: enrich
+ value = cell.getRichStringCellValue().getString();
+ break;
+ case HSSFCell.CELL_TYPE_FORMULA:
+ switch ( cell.getCachedFormulaResultType() )
+ {
+ case HSSFCell.CELL_TYPE_STRING:
+ HSSFRichTextString str = cell.getRichStringCellValue();
+ if ( str != null && str.length() > 0 )
+ {
+ value = ( str.toString() );
+ }
+ else
+ {
+ value = ExcelToHtmlUtils.EMPTY;
+ }
+ break;
+ case HSSFCell.CELL_TYPE_NUMERIC:
+ HSSFCellStyle style = cellStyle;
+ if ( style == null )
+ {
+ value = String.valueOf( cell.getNumericCellValue() );
+ }
+ else
+ {
+ value = ( _formatter.formatRawCellContents(
+ cell.getNumericCellValue(), style.getDataFormat(),
+ style.getDataFormatString() ) );
+ }
+ break;
+ case HSSFCell.CELL_TYPE_BOOLEAN:
+ value = String.valueOf( cell.getBooleanCellValue() );
+ break;
+ case HSSFCell.CELL_TYPE_ERROR:
+ value = ErrorEval.getText( cell.getErrorCellValue() );
+ break;
+ default:
+ logger.log(
+ POILogger.WARN,
+ "Unexpected cell cachedFormulaResultType ("
+ + cell.getCachedFormulaResultType() + ")" );
+ value = ExcelToHtmlUtils.EMPTY;
+ break;
+ }
+ break;
+ case HSSFCell.CELL_TYPE_BLANK:
+ value = ExcelToHtmlUtils.EMPTY;
+ break;
+ case HSSFCell.CELL_TYPE_NUMERIC:
+ value = _formatter.formatCellValue( cell );
+ break;
+ case HSSFCell.CELL_TYPE_BOOLEAN:
+ value = String.valueOf( cell.getBooleanCellValue() );
+ break;
+ case HSSFCell.CELL_TYPE_ERROR:
+ value = ErrorEval.getText( cell.getErrorCellValue() );
+ break;
+ default:
+ logger.log( POILogger.WARN,
+ "Unexpected cell type (" + cell.getCellType() + ")" );
+ return true;
+ }
+
+ final short cellStyleIndex = cellStyle.getIndex();
+ if ( cellStyleIndex != 0 )
+ {
+ tableCellElement.setAttribute( "class", "cellstyle_"
+ + cellStyleIndex );
+ usedStyles.add( Short.valueOf( cellStyleIndex ) );
+ if ( ExcelToHtmlUtils.isEmpty( value ) )
+ {
+ /*
+ * if cell style is defined (like borders, etc.) but cell text
+ * is empty, add " " to output, so browser won't collapse
+ * and ignore cell
+ */
+ value = "\u00A0";
+ }
+ }
+
+ Text text = htmlDocumentFacade.createText( value );
+ tableCellElement.appendChild( text );
+
+ return ExcelToHtmlUtils.isEmpty( value ) && cellStyleIndex == 0;
+ }
+
+ protected boolean processRow( HSSFRow row, Element tableRowElement )
+ {
+ boolean emptyRow = true;
+
+ final short maxColIx = row.getLastCellNum();
+ if ( maxColIx <= 0 )
+ return true;
+
+ final List<Element> emptyCells = new ArrayList<Element>( maxColIx );
+
+ for ( int colIx = 0; colIx < maxColIx; colIx++ )
+ {
+ HSSFCell cell = row.getCell( colIx );
+
+ Element tableCellElement = htmlDocumentFacade.createTableCell();
+
+ boolean emptyCell;
+ if ( cell != null )
+ {
+ emptyCell = processCell( cell, tableCellElement );
+ }
+ else
+ {
+ emptyCell = true;
+ }
+
+ if ( emptyCell )
+ {
+ emptyCells.add( tableCellElement );
+ }
+ else
+ {
+ for ( Element emptyCellElement : emptyCells )
+ {
+ tableRowElement.appendChild( emptyCellElement );
+ }
+ emptyCells.clear();
+
+ tableRowElement.appendChild( tableCellElement );
+ emptyRow = false;
+ }
+ }
+
+ return emptyRow;
+ }
+
+ protected void processSheet( HSSFSheet sheet )
+ {
+ Element h1 = htmlDocumentFacade.createHeader1();
+ h1.appendChild( htmlDocumentFacade.createText( sheet.getSheetName() ) );
+ htmlDocumentFacade.getBody().appendChild( h1 );
+
+ final int physicalNumberOfRows = sheet.getPhysicalNumberOfRows();
+ if ( physicalNumberOfRows <= 0 )
+ return;
+
+ Element table = htmlDocumentFacade.createTable();
+ Element tableBody = htmlDocumentFacade.createTableBody();
+
+ final List<Element> emptyRowElements = new ArrayList<Element>(
+ physicalNumberOfRows );
+
+ for ( int r = 0; r < physicalNumberOfRows; r++ )
+ {
+ HSSFRow row = sheet.getRow( r );
+
+ Element tableRowElement = htmlDocumentFacade.createTableRow();
+
+ boolean emptyRow;
+ if ( row != null )
+ {
+ emptyRow = processRow( row, tableRowElement );
+ }
+ else
+ {
+ emptyRow = true;
+ }
+
+ if ( emptyRow )
+ {
+ emptyRowElements.add( tableRowElement );
+ }
+ else
+ {
+ if ( !emptyRowElements.isEmpty() )
+ {
+ for ( Element emptyCellElement : emptyRowElements )
+ {
+ tableBody.appendChild( emptyCellElement );
+ }
+ emptyRowElements.clear();
+ }
+
+ tableBody.appendChild( tableRowElement );
+ emptyRow = false;
+ }
+ }
+
+ table.appendChild( tableBody );
+ htmlDocumentFacade.getBody().appendChild( table );
+ }
+
+ public void processWorkbook( HSSFWorkbook workbook )
+ {
+ for ( short i = 0; i < workbook.getNumCellStyles(); i++ )
+ {
+ HSSFCellStyle cellStyle = workbook.getCellStyleAt( i );
+
+ if ( cellStyle == null )
+ continue;
+
+ if ( usedStyles.contains( Short.valueOf( i ) ) )
+ styles.appendChild( htmlDocumentFacade
+ .createText( "td.cellstyle_" + i + "{"
+ + buildStyle( workbook, cellStyle ) + "}\n" ) );
+ }
+
+ for ( int s = 0; s < workbook.getNumberOfSheets(); s++ )
+ {
+ HSSFSheet sheet = workbook.getSheetAt( s );
+ processSheet( sheet );
+ }
+ }
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.hssf.usermodel.converter;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+
+import org.apache.poi.hssf.usermodel.HSSFCellStyle;
+import org.apache.poi.hssf.usermodel.HSSFWorkbook;
+import org.apache.poi.hssf.util.HSSFColor;
+import org.apache.poi.util.IOUtils;
+
+public class ExcelToHtmlUtils
+{
+ static final String EMPTY = "";
+
+ public static String getBorderStyle( short xlsBorder )
+ {
+ final String borderStyle;
+ switch ( xlsBorder )
+ {
+ case HSSFCellStyle.BORDER_NONE:
+ borderStyle = "none";
+ break;
+ case HSSFCellStyle.BORDER_DASH_DOT:
+ case HSSFCellStyle.BORDER_DASH_DOT_DOT:
+ case HSSFCellStyle.BORDER_DOTTED:
+ case HSSFCellStyle.BORDER_HAIR:
+ case HSSFCellStyle.BORDER_MEDIUM_DASH_DOT:
+ case HSSFCellStyle.BORDER_MEDIUM_DASH_DOT_DOT:
+ case HSSFCellStyle.BORDER_SLANTED_DASH_DOT:
+ borderStyle = "dotted";
+ break;
+ case HSSFCellStyle.BORDER_DASHED:
+ case HSSFCellStyle.BORDER_MEDIUM_DASHED:
+ borderStyle = "dashed";
+ break;
+ case HSSFCellStyle.BORDER_DOUBLE:
+ borderStyle = "double";
+ break;
+ default:
+ borderStyle = "solid";
+ break;
+ }
+ return borderStyle;
+ }
+
+ public static String getBorderWidth( short xlsBorder )
+ {
+ final String borderWidth;
+ switch ( xlsBorder )
+ {
+ case HSSFCellStyle.BORDER_MEDIUM_DASH_DOT:
+ case HSSFCellStyle.BORDER_MEDIUM_DASH_DOT_DOT:
+ case HSSFCellStyle.BORDER_MEDIUM_DASHED:
+ borderWidth = "2pt";
+ break;
+ case HSSFCellStyle.BORDER_THICK:
+ borderWidth = "thick";
+ break;
+ default:
+ borderWidth = "thin";
+ break;
+ }
+ return borderWidth;
+ }
+
+ public static String getColor( HSSFColor color )
+ {
+ StringBuilder stringBuilder = new StringBuilder();
+ for ( short s : color.getTriplet() )
+ {
+ if ( s < 10 )
+ stringBuilder.append( '0' );
+
+ stringBuilder.append( Integer.toHexString( s ) );
+ }
+ return stringBuilder.toString();
+ }
+
+ static boolean isEmpty( String str )
+ {
+ return str == null || str.length() == 0;
+ }
+
+ static boolean isNotEmpty( String str )
+ {
+ return !isEmpty( str );
+ }
+
+ public static HSSFWorkbook loadXls( File xlsFile ) throws IOException
+ {
+ final FileInputStream inputStream = new FileInputStream( xlsFile );
+ try
+ {
+ return new HSSFWorkbook( inputStream );
+ }
+ finally
+ {
+ IOUtils.closeQuietly( inputStream );
+ }
+ }
+
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.hssf.converter;
+
+import java.io.File;
+import java.io.FilenameFilter;
+import java.io.StringWriter;
+import java.util.Arrays;
+import java.util.List;
+
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.transform.OutputKeys;
+import javax.xml.transform.Transformer;
+import javax.xml.transform.TransformerFactory;
+import javax.xml.transform.dom.DOMSource;
+import javax.xml.transform.stream.StreamResult;
+
+import junit.framework.Test;
+import junit.framework.TestCase;
+import junit.framework.TestSuite;
+import org.apache.poi.POIDataSamples;
+import org.apache.poi.hssf.usermodel.HSSFWorkbook;
+import org.apache.poi.hssf.usermodel.converter.ExcelToHtmlConverter;
+import org.apache.poi.hssf.usermodel.converter.ExcelToHtmlUtils;
+
+public class TestExcelToHtmlConverterSuite
+{
+ /**
+ * YK: a quick hack to exclude failing documents from the suite.
+ */
+ private static List<String> failingFiles = Arrays.asList();
+
+ public static Test suite()
+ {
+ TestSuite suite = new TestSuite();
+
+ File directory = POIDataSamples.getSpreadSheetInstance().getFile(
+ "../spreadsheet" );
+ for ( final File child : directory.listFiles( new FilenameFilter()
+ {
+ public boolean accept( File dir, String name )
+ {
+ return name.endsWith( ".xls" ) && !failingFiles.contains( name );
+ }
+ } ) )
+ {
+ final String name = child.getName();
+ suite.addTest( new TestCase( name + " [HTML]" )
+ {
+ public void runTest() throws Exception
+ {
+ test( child, true );
+ }
+ } );
+
+ }
+
+ return suite;
+ }
+
+ protected static void test( File child, boolean html ) throws Exception
+ {
+ HSSFWorkbook workbook;
+ try
+ {
+ workbook = ExcelToHtmlUtils.loadXls( child );
+ }
+ catch ( Exception exc )
+ {
+ // unable to parse file -- not WordToFoConverter fault
+ return;
+ }
+
+ ExcelToHtmlConverter excelToHtmlConverter = new ExcelToHtmlConverter(
+ DocumentBuilderFactory.newInstance().newDocumentBuilder()
+ .newDocument() );
+ excelToHtmlConverter.processWorkbook( workbook );
+
+ StringWriter stringWriter = new StringWriter();
+
+ Transformer transformer = TransformerFactory.newInstance()
+ .newTransformer();
+ transformer.setOutputProperty( OutputKeys.ENCODING, "utf-8" );
+ transformer.setOutputProperty( OutputKeys.INDENT, "yes" );
+ transformer.transform(
+ new DOMSource( excelToHtmlConverter.getDocument() ),
+ new StreamResult( stringWriter ) );
+
+ if ( html )
+ transformer.setOutputProperty( OutputKeys.METHOD, "html" );
+
+ // no exceptions
+ }
+}