From b5a06a44169312f118b1143a8e763f67c888709e Mon Sep 17 00:00:00 2001 From: Sergey Vladimirov Date: Wed, 6 Jul 2011 09:37:32 +0000 Subject: [PATCH] output document properties to html and pdf git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1143314 13f79535-47bb-0310-9956-ffa450edef68 --- .../converter/ExcelToHtmlConverter.java | 25 ++++ .../hwpf/converter/AbstractWordConverter.java | 11 ++ .../poi/hwpf/converter/FoDocumentFacade.java | 121 ++++++++++++++++++ .../hwpf/converter/HtmlDocumentFacade.java | 53 ++++++++ .../poi/hwpf/converter/WordToFoConverter.java | 21 ++- .../hwpf/converter/WordToHtmlConverter.java | 22 +++- .../hwpf/converter/TestWordToFoConverter.java | 10 ++ .../converter/TestWordToHtmlConverter.java | 11 +- 8 files changed, 271 insertions(+), 3 deletions(-) diff --git a/src/scratchpad/src/org/apache/poi/hssf/usermodel/converter/ExcelToHtmlConverter.java b/src/scratchpad/src/org/apache/poi/hssf/usermodel/converter/ExcelToHtmlConverter.java index 5d2d3a6b85..57deb81570 100644 --- a/src/scratchpad/src/org/apache/poi/hssf/usermodel/converter/ExcelToHtmlConverter.java +++ b/src/scratchpad/src/org/apache/poi/hssf/usermodel/converter/ExcelToHtmlConverter.java @@ -30,6 +30,7 @@ import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; +import org.apache.poi.hpsf.SummaryInformation; import org.apache.poi.hssf.usermodel.HSSFCell; import org.apache.poi.hssf.usermodel.HSSFCellStyle; import org.apache.poi.hssf.usermodel.HSSFDataFormatter; @@ -347,6 +348,23 @@ public class ExcelToHtmlConverter return ExcelToHtmlUtils.isEmpty( value ) && cellStyleIndex == 0; } + protected void processDocumentInformation( + SummaryInformation summaryInformation ) + { + if ( ExcelToHtmlUtils.isNotEmpty( summaryInformation.getTitle() ) ) + htmlDocumentFacade.setTitle( summaryInformation.getTitle() ); + + if ( ExcelToHtmlUtils.isNotEmpty( summaryInformation.getAuthor() ) ) + htmlDocumentFacade.addAuthor( summaryInformation.getAuthor() ); + + if ( ExcelToHtmlUtils.isNotEmpty( summaryInformation.getKeywords() ) ) + htmlDocumentFacade.addKeywords( summaryInformation.getKeywords() ); + + if ( ExcelToHtmlUtils.isNotEmpty( summaryInformation.getComments() ) ) + htmlDocumentFacade + .addDescription( summaryInformation.getComments() ); + } + protected boolean processRow( HSSFRow row, Element tableRowElement ) { boolean emptyRow = true; @@ -451,6 +469,13 @@ public class ExcelToHtmlConverter public void processWorkbook( HSSFWorkbook workbook ) { + final SummaryInformation summaryInformation = workbook + .getSummaryInformation(); + if ( summaryInformation != null ) + { + processDocumentInformation( summaryInformation ); + } + for ( short i = 0; i < workbook.getNumCellStyles(); i++ ) { HSSFCellStyle cellStyle = workbook.getCellStyleAt( i ); diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordConverter.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordConverter.java index 9ac5633c1d..f60514c5d7 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordConverter.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordConverter.java @@ -20,6 +20,7 @@ import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.apache.poi.hpsf.SummaryInformation; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.HWPFDocumentCore; import org.apache.poi.hwpf.model.ListFormatOverride; @@ -127,6 +128,13 @@ public abstract class AbstractWordConverter public void processDocument( HWPFDocumentCore wordDocument ) { + final SummaryInformation summaryInformation = wordDocument + .getSummaryInformation(); + if ( summaryInformation != null ) + { + processDocumentInformation( summaryInformation ); + } + final Range range = wordDocument.getRange(); for ( int s = 0; s < range.numSections(); s++ ) { @@ -134,6 +142,9 @@ public abstract class AbstractWordConverter } } + protected abstract void processDocumentInformation( + SummaryInformation summaryInformation ); + protected void processField( HWPFDocumentCore wordDocument, Element currentBlock, Paragraph paragraph, int currentTableLevel, List characterRuns, int beginMark, int separatorMark, diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/FoDocumentFacade.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/FoDocumentFacade.java index 8a944a9ee9..773f0d29cb 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/converter/FoDocumentFacade.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/converter/FoDocumentFacade.java @@ -18,14 +18,20 @@ package org.apache.poi.hwpf.converter; import org.w3c.dom.Document; import org.w3c.dom.Element; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; import org.w3c.dom.Text; public class FoDocumentFacade { + private static final String NS_RDF = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; + private static final String NS_XSLFO = "http://www.w3.org/1999/XSL/Format"; + protected final Element declarations; protected final Document document; protected final Element layoutMasterSet; + protected Element propertiesRoot; protected final Element root; public FoDocumentFacade( Document document ) @@ -38,6 +44,9 @@ public class FoDocumentFacade layoutMasterSet = document.createElementNS( NS_XSLFO, "fo:layout-master-set" ); root.appendChild( layoutMasterSet ); + + declarations = document.createElementNS( NS_XSLFO, "fo:declarations" ); + root.appendChild( declarations ); } public Element addFlowToPageSequence( final Element pageSequence, @@ -198,4 +207,116 @@ public class FoDocumentFacade return document; } + protected Element getOrCreatePropertiesRoot() + { + if ( propertiesRoot != null ) + return propertiesRoot; + + // See http://xmlgraphics.apache.org/fop/0.95/metadata.html + + Element xmpmeta = document.createElementNS( "adobe:ns:meta", + "x:xmpmeta" ); + declarations.appendChild( xmpmeta ); + + Element rdf = document.createElementNS( NS_RDF, "rdf:RDF" ); + xmpmeta.appendChild( rdf ); + + propertiesRoot = document.createElementNS( NS_RDF, "rdf:Description" ); + rdf.appendChild( propertiesRoot ); + + return propertiesRoot; + } + + public void setCreator( String value ) + { + setDublinCoreProperty( "creator", value ); + } + + public void setCreatorTool( String value ) + { + setXmpProperty( "CreatorTool", value ); + } + + public void setDescription( String value ) + { + Element element = setDublinCoreProperty( "description", value ); + + if ( element != null ) + { + element.setAttributeNS( "http://www.w3.org/XML/1998/namespace", + "xml:lang", "x-default" ); + } + } + + public Element setDublinCoreProperty( String name, String value ) + { + return setProperty( "http://purl.org/dc/elements/1.1/", "dc", name, + value ); + } + + public void setKeywords( String value ) + { + setPdfProperty( "Keywords", value ); + } + + public Element setPdfProperty( String name, String value ) + { + return setProperty( "http://ns.adobe.com/pdf/1.3/", "pdf", name, value ); + } + + public void setProducer( String value ) + { + setPdfProperty( "Producer", value ); + } + + protected Element setProperty( String namespace, String prefix, + String name, String value ) + { + Element propertiesRoot = getOrCreatePropertiesRoot(); + NodeList existingChildren = propertiesRoot.getChildNodes(); + for ( int i = 0; i < existingChildren.getLength(); i++ ) + { + Node child = existingChildren.item( i ); + if ( child.getNodeType() == Node.ELEMENT_NODE ) + { + Element childElement = (Element) child; + if ( WordToFoUtils.isNotEmpty( childElement.getNamespaceURI() ) + && WordToFoUtils.isNotEmpty( childElement + .getLocalName() ) + && namespace.equals( childElement.getNamespaceURI() ) + && name.equals( childElement.getLocalName() ) ) + { + propertiesRoot.removeChild( childElement ); + break; + } + } + } + + if ( WordToFoUtils.isNotEmpty( value ) ) + { + Element property = document.createElementNS( namespace, prefix + + ":" + name ); + property.appendChild( document.createTextNode( value ) ); + propertiesRoot.appendChild( property ); + return property; + } + + return null; + } + + public void setSubject( String value ) + { + setDublinCoreProperty( "title", value ); + } + + public void setTitle( String value ) + { + setDublinCoreProperty( "title", value ); + } + + public Element setXmpProperty( String name, String value ) + { + return setProperty( "http://ns.adobe.com/xap/1.0/", "xmp", name, value ); + } + } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/HtmlDocumentFacade.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/HtmlDocumentFacade.java index 68da7fd9f6..6a57704a95 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/converter/HtmlDocumentFacade.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/converter/HtmlDocumentFacade.java @@ -28,6 +28,9 @@ public class HtmlDocumentFacade protected final Element head; protected final Element html; + protected Element title; + protected Text titleText; + public HtmlDocumentFacade( Document document ) { this.document = document; @@ -42,6 +45,29 @@ public class HtmlDocumentFacade html.appendChild( body ); } + public void addAuthor( String value ) + { + addMeta( "author", value ); + } + + public void addDescription( String value ) + { + addMeta( "description", value ); + } + + public void addKeywords( String value ) + { + addMeta( "keywords", value ); + } + + public void addMeta( final String name, String value ) + { + Element meta = document.createElement( "meta" ); + meta.setAttribute( "name", name ); + meta.setAttribute( "content", value ); + head.appendChild( meta ); + } + public Element createHeader1() { return document.createElement( "h1" ); @@ -119,4 +145,31 @@ public class HtmlDocumentFacade return head; } + public String getTitle() + { + if ( title == null ) + return null; + + return titleText.getTextContent(); + } + + public void setTitle( String titleText ) + { + if ( WordToHtmlUtils.isEmpty( titleText ) && this.title != null ) + { + this.head.removeChild( this.title ); + this.title = null; + this.titleText = null; + } + + if ( this.title == null ) + { + this.title = document.createElement( "title" ); + this.titleText = document.createTextNode( titleText ); + this.title.appendChild( this.titleText ); + this.head.appendChild( title ); + } + + this.titleText.setData( titleText ); + } } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoConverter.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoConverter.java index baf2d4c6ff..04c5ad2833 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoConverter.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoConverter.java @@ -28,6 +28,7 @@ import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; +import org.apache.poi.hpsf.SummaryInformation; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.HWPFDocumentCore; import org.apache.poi.hwpf.usermodel.CharacterRun; @@ -230,6 +231,23 @@ public class WordToFoConverter extends AbstractWordConverter inline.appendChild( textNode ); } + @Override + protected void processDocumentInformation( + SummaryInformation summaryInformation ) + { + if ( WordToHtmlUtils.isNotEmpty( summaryInformation.getTitle() ) ) + foDocumentFacade.setTitle( summaryInformation.getTitle() ); + + if ( WordToHtmlUtils.isNotEmpty( summaryInformation.getAuthor() ) ) + foDocumentFacade.setCreator( summaryInformation.getAuthor() ); + + if ( WordToHtmlUtils.isNotEmpty( summaryInformation.getKeywords() ) ) + foDocumentFacade.setKeywords( summaryInformation.getKeywords() ); + + if ( WordToHtmlUtils.isNotEmpty( summaryInformation.getComments() ) ) + foDocumentFacade.setDescription( summaryInformation.getComments() ); + } + protected void processHyperlink( HWPFDocumentCore hwpfDocument, Element currentBlock, Paragraph paragraph, List characterRuns, int currentTableLevel, @@ -368,7 +386,8 @@ public class WordToFoConverter extends AbstractWordConverter Element flow = foDocumentFacade.addFlowToPageSequence( pageSequence, "xsl-region-body" ); - processSectionParagraphes( wordDocument, flow, section, Integer.MIN_VALUE ); + processSectionParagraphes( wordDocument, flow, section, + Integer.MIN_VALUE ); } protected void processTable( HWPFDocumentCore wordDocument, Element flow, diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java index bbffc9d7d2..7122b863ea 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java @@ -28,6 +28,7 @@ import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; +import org.apache.poi.hpsf.SummaryInformation; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.HWPFDocumentCore; import org.apache.poi.hwpf.usermodel.CharacterRun; @@ -207,6 +208,24 @@ public class WordToHtmlConverter extends AbstractWordConverter span.appendChild( textNode ); } + @Override + protected void processDocumentInformation( + SummaryInformation summaryInformation ) + { + if ( WordToHtmlUtils.isNotEmpty( summaryInformation.getTitle() ) ) + htmlDocumentFacade.setTitle( summaryInformation.getTitle() ); + + if ( WordToHtmlUtils.isNotEmpty( summaryInformation.getAuthor() ) ) + htmlDocumentFacade.addAuthor( summaryInformation.getAuthor() ); + + if ( WordToHtmlUtils.isNotEmpty( summaryInformation.getKeywords() ) ) + htmlDocumentFacade.addKeywords( summaryInformation.getKeywords() ); + + if ( WordToHtmlUtils.isNotEmpty( summaryInformation.getComments() ) ) + htmlDocumentFacade + .addDescription( summaryInformation.getComments() ); + } + protected void processHyperlink( HWPFDocumentCore wordDocument, Element currentBlock, Paragraph paragraph, List characterRuns, int currentTableLevel, @@ -326,7 +345,8 @@ public class WordToHtmlConverter extends AbstractWordConverter div.setAttribute( "style", getSectionStyle( section ) ); htmlDocumentFacade.body.appendChild( div ); - processSectionParagraphes( wordDocument, div, section, Integer.MIN_VALUE ); + processSectionParagraphes( wordDocument, div, section, + Integer.MIN_VALUE ); } @Override diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToFoConverter.java b/src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToFoConverter.java index b5c442aea7..a797d7b33d 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToFoConverter.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToFoConverter.java @@ -62,6 +62,16 @@ public class TestWordToFoConverter extends TestCase return result; } + public void testDocumentProperties() throws Exception + { + String result = getFoText( "documentProperties.doc" ); + + assertTrue( result + .contains( "This is document title" ) ); + assertTrue( result + .contains( "This is document keywords" ) ); + } + public void testEquation() throws Exception { final String sampleFileName = "equation.doc"; diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToHtmlConverter.java b/src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToHtmlConverter.java index ced950965a..420add7454 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToHtmlConverter.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToHtmlConverter.java @@ -30,7 +30,7 @@ import org.apache.poi.POIDataSamples; import org.apache.poi.hwpf.HWPFDocument; /** - * Test cases for {@link WordToFoConverter} + * Test cases for {@link WordToHtmlConverter} * * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com) */ @@ -81,6 +81,15 @@ public class TestWordToHtmlConverter extends TestCase assertTrue( result.contains( "" ) ); } + public void testDocumentProperties() throws Exception + { + String result = getHtmlText( "documentProperties.doc" ); + + assertTrue( result.contains( "This is document title" ) ); + assertTrue( result + .contains( "" ) ); + } + public void testEquation() throws Exception { String result = getHtmlText( "equation.doc" ); -- 2.39.5