]> source.dussan.org Git - poi.git/commitdiff
output document properties to html and pdf
authorSergey Vladimirov <sergey@apache.org>
Wed, 6 Jul 2011 09:37:32 +0000 (09:37 +0000)
committerSergey Vladimirov <sergey@apache.org>
Wed, 6 Jul 2011 09:37:32 +0000 (09:37 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1143314 13f79535-47bb-0310-9956-ffa450edef68

src/scratchpad/src/org/apache/poi/hssf/usermodel/converter/ExcelToHtmlConverter.java
src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordConverter.java
src/scratchpad/src/org/apache/poi/hwpf/converter/FoDocumentFacade.java
src/scratchpad/src/org/apache/poi/hwpf/converter/HtmlDocumentFacade.java
src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoConverter.java
src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java
src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToFoConverter.java
src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToHtmlConverter.java

index 5d2d3a6b854e733bc3a4e39daaa022802a92dfed..57deb81570eb6fa0541975846cad88ba75e7b34c 100644 (file)
@@ -30,6 +30,7 @@ import javax.xml.transform.TransformerFactory;
 import javax.xml.transform.dom.DOMSource;
 import javax.xml.transform.stream.StreamResult;
 
+import org.apache.poi.hpsf.SummaryInformation;
 import org.apache.poi.hssf.usermodel.HSSFCell;
 import org.apache.poi.hssf.usermodel.HSSFCellStyle;
 import org.apache.poi.hssf.usermodel.HSSFDataFormatter;
@@ -347,6 +348,23 @@ public class ExcelToHtmlConverter
         return ExcelToHtmlUtils.isEmpty( value ) && cellStyleIndex == 0;
     }
 
+    protected void processDocumentInformation(
+            SummaryInformation summaryInformation )
+    {
+        if ( ExcelToHtmlUtils.isNotEmpty( summaryInformation.getTitle() ) )
+            htmlDocumentFacade.setTitle( summaryInformation.getTitle() );
+
+        if ( ExcelToHtmlUtils.isNotEmpty( summaryInformation.getAuthor() ) )
+            htmlDocumentFacade.addAuthor( summaryInformation.getAuthor() );
+
+        if ( ExcelToHtmlUtils.isNotEmpty( summaryInformation.getKeywords() ) )
+            htmlDocumentFacade.addKeywords( summaryInformation.getKeywords() );
+
+        if ( ExcelToHtmlUtils.isNotEmpty( summaryInformation.getComments() ) )
+            htmlDocumentFacade
+                    .addDescription( summaryInformation.getComments() );
+    }
+
     protected boolean processRow( HSSFRow row, Element tableRowElement )
     {
         boolean emptyRow = true;
@@ -451,6 +469,13 @@ public class ExcelToHtmlConverter
 
     public void processWorkbook( HSSFWorkbook workbook )
     {
+        final SummaryInformation summaryInformation = workbook
+                .getSummaryInformation();
+        if ( summaryInformation != null )
+        {
+            processDocumentInformation( summaryInformation );
+        }
+
         for ( short i = 0; i < workbook.getNumCellStyles(); i++ )
         {
             HSSFCellStyle cellStyle = workbook.getCellStyleAt( i );
index 9ac5633c1d439e09312b53fea38ee2093efbcc4d..f60514c5d7cfa39845e94eafecbc5ee03804cf65 100644 (file)
@@ -20,6 +20,7 @@ import java.util.List;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
+import org.apache.poi.hpsf.SummaryInformation;
 import org.apache.poi.hwpf.HWPFDocument;
 import org.apache.poi.hwpf.HWPFDocumentCore;
 import org.apache.poi.hwpf.model.ListFormatOverride;
@@ -127,6 +128,13 @@ public abstract class AbstractWordConverter
 
     public void processDocument( HWPFDocumentCore wordDocument )
     {
+        final SummaryInformation summaryInformation = wordDocument
+                .getSummaryInformation();
+        if ( summaryInformation != null )
+        {
+            processDocumentInformation( summaryInformation );
+        }
+
         final Range range = wordDocument.getRange();
         for ( int s = 0; s < range.numSections(); s++ )
         {
@@ -134,6 +142,9 @@ public abstract class AbstractWordConverter
         }
     }
 
+    protected abstract void processDocumentInformation(
+            SummaryInformation summaryInformation );
+
     protected void processField( HWPFDocumentCore wordDocument,
             Element currentBlock, Paragraph paragraph, int currentTableLevel,
             List<CharacterRun> characterRuns, int beginMark, int separatorMark,
index 8a944a9ee9c4564553465cf27e8ea2b879129cec..773f0d29cb76fd42ee0510affd25e05420ffd303 100644 (file)
@@ -18,14 +18,20 @@ package org.apache.poi.hwpf.converter;
 
 import org.w3c.dom.Document;
 import org.w3c.dom.Element;
+import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
 import org.w3c.dom.Text;
 
 public class FoDocumentFacade
 {
+    private static final String NS_RDF = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
+
     private static final String NS_XSLFO = "http://www.w3.org/1999/XSL/Format";
 
+    protected final Element declarations;
     protected final Document document;
     protected final Element layoutMasterSet;
+    protected Element propertiesRoot;
     protected final Element root;
 
     public FoDocumentFacade( Document document )
@@ -38,6 +44,9 @@ public class FoDocumentFacade
         layoutMasterSet = document.createElementNS( NS_XSLFO,
                 "fo:layout-master-set" );
         root.appendChild( layoutMasterSet );
+
+        declarations = document.createElementNS( NS_XSLFO, "fo:declarations" );
+        root.appendChild( declarations );
     }
 
     public Element addFlowToPageSequence( final Element pageSequence,
@@ -198,4 +207,116 @@ public class FoDocumentFacade
         return document;
     }
 
+    protected Element getOrCreatePropertiesRoot()
+    {
+        if ( propertiesRoot != null )
+            return propertiesRoot;
+
+        // See http://xmlgraphics.apache.org/fop/0.95/metadata.html
+
+        Element xmpmeta = document.createElementNS( "adobe:ns:meta",
+                "x:xmpmeta" );
+        declarations.appendChild( xmpmeta );
+
+        Element rdf = document.createElementNS( NS_RDF, "rdf:RDF" );
+        xmpmeta.appendChild( rdf );
+
+        propertiesRoot = document.createElementNS( NS_RDF, "rdf:Description" );
+        rdf.appendChild( propertiesRoot );
+
+        return propertiesRoot;
+    }
+
+    public void setCreator( String value )
+    {
+        setDublinCoreProperty( "creator", value );
+    }
+
+    public void setCreatorTool( String value )
+    {
+        setXmpProperty( "CreatorTool", value );
+    }
+
+    public void setDescription( String value )
+    {
+        Element element = setDublinCoreProperty( "description", value );
+
+        if ( element != null )
+        {
+            element.setAttributeNS( "http://www.w3.org/XML/1998/namespace",
+                    "xml:lang", "x-default" );
+        }
+    }
+
+    public Element setDublinCoreProperty( String name, String value )
+    {
+        return setProperty( "http://purl.org/dc/elements/1.1/", "dc", name,
+                value );
+    }
+
+    public void setKeywords( String value )
+    {
+        setPdfProperty( "Keywords", value );
+    }
+
+    public Element setPdfProperty( String name, String value )
+    {
+        return setProperty( "http://ns.adobe.com/pdf/1.3/", "pdf", name, value );
+    }
+
+    public void setProducer( String value )
+    {
+        setPdfProperty( "Producer", value );
+    }
+
+    protected Element setProperty( String namespace, String prefix,
+            String name, String value )
+    {
+        Element propertiesRoot = getOrCreatePropertiesRoot();
+        NodeList existingChildren = propertiesRoot.getChildNodes();
+        for ( int i = 0; i < existingChildren.getLength(); i++ )
+        {
+            Node child = existingChildren.item( i );
+            if ( child.getNodeType() == Node.ELEMENT_NODE )
+            {
+                Element childElement = (Element) child;
+                if ( WordToFoUtils.isNotEmpty( childElement.getNamespaceURI() )
+                        && WordToFoUtils.isNotEmpty( childElement
+                                .getLocalName() )
+                        && namespace.equals( childElement.getNamespaceURI() )
+                        && name.equals( childElement.getLocalName() ) )
+                {
+                    propertiesRoot.removeChild( childElement );
+                    break;
+                }
+            }
+        }
+
+        if ( WordToFoUtils.isNotEmpty( value ) )
+        {
+            Element property = document.createElementNS( namespace, prefix
+                    + ":" + name );
+            property.appendChild( document.createTextNode( value ) );
+            propertiesRoot.appendChild( property );
+            return property;
+        }
+
+        return null;
+    }
+
+    public void setSubject( String value )
+    {
+        setDublinCoreProperty( "title", value );
+    }
+
+    public void setTitle( String value )
+    {
+        setDublinCoreProperty( "title", value );
+    }
+
+    public Element setXmpProperty( String name, String value )
+    {
+        return setProperty( "http://ns.adobe.com/xap/1.0/", "xmp", name, value );
+    }
+
 }
index 68da7fd9f63bb5d0a66005b5cc2187a35cb19bc7..6a57704a952bddae90e2c948108e9a481adc26f5 100644 (file)
@@ -28,6 +28,9 @@ public class HtmlDocumentFacade
     protected final Element head;
     protected final Element html;
 
+    protected Element title;
+    protected Text titleText;
+
     public HtmlDocumentFacade( Document document )
     {
         this.document = document;
@@ -42,6 +45,29 @@ public class HtmlDocumentFacade
         html.appendChild( body );
     }
 
+    public void addAuthor( String value )
+    {
+        addMeta( "author", value );
+    }
+
+    public void addDescription( String value )
+    {
+        addMeta( "description", value );
+    }
+
+    public void addKeywords( String value )
+    {
+        addMeta( "keywords", value );
+    }
+
+    public void addMeta( final String name, String value )
+    {
+        Element meta = document.createElement( "meta" );
+        meta.setAttribute( "name", name );
+        meta.setAttribute( "content", value );
+        head.appendChild( meta );
+    }
+
     public Element createHeader1()
     {
         return document.createElement( "h1" );
@@ -119,4 +145,31 @@ public class HtmlDocumentFacade
         return head;
     }
 
+    public String getTitle()
+    {
+        if ( title == null )
+            return null;
+
+        return titleText.getTextContent();
+    }
+
+    public void setTitle( String titleText )
+    {
+        if ( WordToHtmlUtils.isEmpty( titleText ) && this.title != null )
+        {
+            this.head.removeChild( this.title );
+            this.title = null;
+            this.titleText = null;
+        }
+
+        if ( this.title == null )
+        {
+            this.title = document.createElement( "title" );
+            this.titleText = document.createTextNode( titleText );
+            this.title.appendChild( this.titleText );
+            this.head.appendChild( title );
+        }
+
+        this.titleText.setData( titleText );
+    }
 }
index baf2d4c6ff0b037609335140f9e6c21edc7d2600..04c5ad2833b1f92970418b650b67b81369c44ba2 100644 (file)
@@ -28,6 +28,7 @@ import javax.xml.transform.TransformerFactory;
 import javax.xml.transform.dom.DOMSource;
 import javax.xml.transform.stream.StreamResult;
 
+import org.apache.poi.hpsf.SummaryInformation;
 import org.apache.poi.hwpf.HWPFDocument;
 import org.apache.poi.hwpf.HWPFDocumentCore;
 import org.apache.poi.hwpf.usermodel.CharacterRun;
@@ -230,6 +231,23 @@ public class WordToFoConverter extends AbstractWordConverter
         inline.appendChild( textNode );
     }
 
+    @Override
+    protected void processDocumentInformation(
+            SummaryInformation summaryInformation )
+    {
+        if ( WordToHtmlUtils.isNotEmpty( summaryInformation.getTitle() ) )
+            foDocumentFacade.setTitle( summaryInformation.getTitle() );
+
+        if ( WordToHtmlUtils.isNotEmpty( summaryInformation.getAuthor() ) )
+            foDocumentFacade.setCreator( summaryInformation.getAuthor() );
+
+        if ( WordToHtmlUtils.isNotEmpty( summaryInformation.getKeywords() ) )
+            foDocumentFacade.setKeywords( summaryInformation.getKeywords() );
+
+        if ( WordToHtmlUtils.isNotEmpty( summaryInformation.getComments() ) )
+            foDocumentFacade.setDescription( summaryInformation.getComments() );
+    }
+
     protected void processHyperlink( HWPFDocumentCore hwpfDocument,
             Element currentBlock, Paragraph paragraph,
             List<CharacterRun> characterRuns, int currentTableLevel,
@@ -368,7 +386,8 @@ public class WordToFoConverter extends AbstractWordConverter
         Element flow = foDocumentFacade.addFlowToPageSequence( pageSequence,
                 "xsl-region-body" );
 
-        processSectionParagraphes( wordDocument, flow, section, Integer.MIN_VALUE );
+        processSectionParagraphes( wordDocument, flow, section,
+                Integer.MIN_VALUE );
     }
 
     protected void processTable( HWPFDocumentCore wordDocument, Element flow,
index bbffc9d7d232b6d40209e17f3337c4571fa06751..7122b863eaca3f8fb769fa7e6d39ab5d72e87436 100644 (file)
@@ -28,6 +28,7 @@ import javax.xml.transform.TransformerFactory;
 import javax.xml.transform.dom.DOMSource;
 import javax.xml.transform.stream.StreamResult;
 
+import org.apache.poi.hpsf.SummaryInformation;
 import org.apache.poi.hwpf.HWPFDocument;
 import org.apache.poi.hwpf.HWPFDocumentCore;
 import org.apache.poi.hwpf.usermodel.CharacterRun;
@@ -207,6 +208,24 @@ public class WordToHtmlConverter extends AbstractWordConverter
         span.appendChild( textNode );
     }
 
+    @Override
+    protected void processDocumentInformation(
+            SummaryInformation summaryInformation )
+    {
+        if ( WordToHtmlUtils.isNotEmpty( summaryInformation.getTitle() ) )
+            htmlDocumentFacade.setTitle( summaryInformation.getTitle() );
+
+        if ( WordToHtmlUtils.isNotEmpty( summaryInformation.getAuthor() ) )
+            htmlDocumentFacade.addAuthor( summaryInformation.getAuthor() );
+
+        if ( WordToHtmlUtils.isNotEmpty( summaryInformation.getKeywords() ) )
+            htmlDocumentFacade.addKeywords( summaryInformation.getKeywords() );
+
+        if ( WordToHtmlUtils.isNotEmpty( summaryInformation.getComments() ) )
+            htmlDocumentFacade
+                    .addDescription( summaryInformation.getComments() );
+    }
+
     protected void processHyperlink( HWPFDocumentCore wordDocument,
             Element currentBlock, Paragraph paragraph,
             List<CharacterRun> characterRuns, int currentTableLevel,
@@ -326,7 +345,8 @@ public class WordToHtmlConverter extends AbstractWordConverter
         div.setAttribute( "style", getSectionStyle( section ) );
         htmlDocumentFacade.body.appendChild( div );
 
-        processSectionParagraphes( wordDocument, div, section, Integer.MIN_VALUE );
+        processSectionParagraphes( wordDocument, div, section,
+                Integer.MIN_VALUE );
     }
 
     @Override
index b5c442aea76b06cdc6814a749879f81d6ca428f6..a797d7b33d8a301df1ab179aef347f7ceb6e3c67 100644 (file)
@@ -62,6 +62,16 @@ public class TestWordToFoConverter extends TestCase
         return result;
     }
 
+    public void testDocumentProperties() throws Exception
+    {
+        String result = getFoText( "documentProperties.doc" );
+
+        assertTrue( result
+                .contains( "<dc:title xmlns:dc=\"http://purl.org/dc/elements/1.1/\">This is document title</dc:title>" ) );
+        assertTrue( result
+                .contains( "<pdf:Keywords xmlns:pdf=\"http://ns.adobe.com/pdf/1.3/\">This is document keywords</pdf:Keywords>" ) );
+    }
+
     public void testEquation() throws Exception
     {
         final String sampleFileName = "equation.doc";
index ced950965a2d688968ba6af8785d8ed3cf211a29..420add745475f19f9e0e79a2fbafb07a2c02e7a8 100644 (file)
@@ -30,7 +30,7 @@ import org.apache.poi.POIDataSamples;
 import org.apache.poi.hwpf.HWPFDocument;
 
 /**
- * Test cases for {@link WordToFoConverter}
+ * Test cases for {@link WordToHtmlConverter}
  * 
  * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
  */
@@ -81,6 +81,15 @@ public class TestWordToHtmlConverter extends TestCase
         assertTrue( result.contains( "<table>" ) );
     }
 
+    public void testDocumentProperties() throws Exception
+    {
+        String result = getHtmlText( "documentProperties.doc" );
+
+        assertTrue( result.contains( "<title>This is document title</title>" ) );
+        assertTrue( result
+                .contains( "<meta content=\"This is document keywords\" name=\"keywords\">" ) );
+    }
+
     public void testEquation() throws Exception
     {
         String result = getHtmlText( "equation.doc" );