]> source.dussan.org Git - poi.git/commitdiff
add bookmarks support for Word-to-HTML and Word-to-FO converters
authorSergey Vladimirov <sergey@apache.org>
Wed, 20 Jul 2011 16:01:19 +0000 (16:01 +0000)
committerSergey Vladimirov <sergey@apache.org>
Wed, 20 Jul 2011 16:01:19 +0000 (16:01 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1148824 13f79535-47bb-0310-9956-ffa450edef68

src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordConverter.java
src/scratchpad/src/org/apache/poi/hwpf/converter/HtmlDocumentFacade.java
src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoConverter.java
src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java
src/scratchpad/src/org/apache/poi/hwpf/usermodel/Bookmark.java
src/scratchpad/src/org/apache/poi/hwpf/usermodel/BookmarksImpl.java
src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToHtmlConverter.java

index 61daedff827138ea05092f1ed7063f8dd66ea3c0..d0ba6211f04be4af0d416150e3efb716d7c4ffa6 100644 (file)
 ==================================================================== */
 package org.apache.poi.hwpf.converter;
 
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
@@ -23,10 +29,11 @@ import org.apache.poi.hpsf.SummaryInformation;
 import org.apache.poi.hwpf.HWPFDocument;
 import org.apache.poi.hwpf.HWPFDocumentCore;
 import org.apache.poi.hwpf.converter.FontReplacer.Triplet;
-import org.apache.poi.hwpf.model.FieldsDocumentPart;
 import org.apache.poi.hwpf.model.Field;
+import org.apache.poi.hwpf.model.FieldsDocumentPart;
 import org.apache.poi.hwpf.model.ListFormatOverride;
 import org.apache.poi.hwpf.model.ListTables;
+import org.apache.poi.hwpf.usermodel.Bookmark;
 import org.apache.poi.hwpf.usermodel.CharacterRun;
 import org.apache.poi.hwpf.usermodel.Paragraph;
 import org.apache.poi.hwpf.usermodel.Picture;
@@ -51,6 +58,8 @@ public abstract class AbstractWordConverter
     private static final POILogger logger = POILogFactory
             .getLogger( AbstractWordConverter.class );
 
+    private final Set<Bookmark> bookmarkStack = new LinkedHashSet<Bookmark>();
+
     private FontReplacer fontReplacer = new DefaultFontReplacer();
 
     protected Triplet getCharacterRunTriplet( CharacterRun characterRun )
@@ -73,7 +82,16 @@ public abstract class AbstractWordConverter
     protected abstract void outputCharacters( Element block,
             CharacterRun characterRun, String text );
 
-    protected boolean processCharacters( HWPFDocumentCore hwpfDocument,
+    /**
+     * Wrap range into bookmark(s) and process it. All bookmarks have starts
+     * equal to range start and ends equal to range end. Usually it's only one
+     * bookmark.
+     */
+    protected abstract void processBookmarks( HWPFDocumentCore wordDocument,
+            Element currentBlock, Range range, int currentTableLevel,
+            List<Bookmark> rangeBookmarks );
+
+    protected boolean processCharacters( HWPFDocumentCore document,
             int currentTableLevel, Range range, final Element block )
     {
         if ( range == null )
@@ -81,6 +99,22 @@ public abstract class AbstractWordConverter
 
         boolean haveAnyText = false;
 
+        if ( document instanceof HWPFDocument )
+        {
+            final HWPFDocument doc = (HWPFDocument) document;
+            Map<Integer, List<Bookmark>> rangeBookmarks = doc.getBookmarks()
+                    .getBookmarksStartedBetween( range.getStartOffset(),
+                            range.getEndOffset() );
+
+            if ( rangeBookmarks != null && !rangeBookmarks.isEmpty() )
+            {
+                boolean processedAny = processRangeBookmarks( doc,
+                        currentTableLevel, range, block, rangeBookmarks );
+                if ( processedAny )
+                    return true;
+            }
+        }
+
         for ( int c = 0; c < range.numCharacterRuns(); c++ )
         {
             CharacterRun characterRun = range.getCharacterRun( c );
@@ -88,11 +122,11 @@ public abstract class AbstractWordConverter
             if ( characterRun == null )
                 throw new AssertionError();
 
-            if ( hwpfDocument instanceof HWPFDocument
-                    && ( (HWPFDocument) hwpfDocument ).getPicturesTable()
+            if ( document instanceof HWPFDocument
+                    && ( (HWPFDocument) document ).getPicturesTable()
                             .hasPicture( characterRun ) )
             {
-                HWPFDocument newFormat = (HWPFDocument) hwpfDocument;
+                HWPFDocument newFormat = (HWPFDocument) document;
                 Picture picture = newFormat.getPicturesTable().extractPicture(
                         characterRun, true );
 
@@ -107,15 +141,15 @@ public abstract class AbstractWordConverter
 
             if ( text.getBytes()[0] == FIELD_BEGIN_MARK )
             {
-                if ( hwpfDocument instanceof HWPFDocument )
+                if ( document instanceof HWPFDocument )
                 {
-                    Field aliveField = ( (HWPFDocument) hwpfDocument )
+                    Field aliveField = ( (HWPFDocument) document )
                             .getFieldsTables().lookupFieldByStartOffset(
                                     FieldsDocumentPart.MAIN,
                                     characterRun.getStartOffset() );
                     if ( aliveField != null )
                     {
-                        processField( ( (HWPFDocument) hwpfDocument ), range,
+                        processField( ( (HWPFDocument) document ), range,
                                 currentTableLevel, aliveField, block );
 
                         int continueAfter = aliveField.getFieldEndOffset();
@@ -130,8 +164,8 @@ public abstract class AbstractWordConverter
                     }
                 }
 
-                int skipTo = tryDeadField( hwpfDocument, range,
-                        currentTableLevel, c, block );
+                int skipTo = tryDeadField( document, range, currentTableLevel,
+                        c, block );
 
                 if ( skipTo != c )
                 {
@@ -337,6 +371,129 @@ public abstract class AbstractWordConverter
             Element parentFopElement, int currentTableLevel,
             Paragraph paragraph, String bulletText );
 
+    private boolean processRangeBookmarks( HWPFDocumentCore document,
+            int currentTableLevel, Range range, final Element block,
+            Map<Integer, List<Bookmark>> rangeBookmakrs )
+    {
+        final int startOffset = range.getStartOffset();
+        final int endOffset = range.getEndOffset();
+
+        int beforeBookmarkStart = startOffset;
+        for ( Map.Entry<Integer, List<Bookmark>> entry : rangeBookmakrs
+                .entrySet() )
+        {
+            final List<Bookmark> startedAt = entry.getValue();
+
+            final List<Bookmark> bookmarks;
+            if ( entry.getKey().intValue() == startOffset
+                    && !bookmarkStack.isEmpty() )
+            {
+                /*
+                 * we need to filter out some bookmarks because already
+                 * processing them in caller methods
+                 */
+                List<Bookmark> filtered = new ArrayList<Bookmark>(
+                        startedAt.size() );
+                for ( Bookmark bookmark : startedAt )
+                {
+                    if ( this.bookmarkStack.contains( bookmark ) )
+                        continue;
+
+                    filtered.add( bookmark );
+                }
+
+                if ( filtered.isEmpty() )
+                    // no bookmarks - skip to next start point
+                    continue;
+
+                bookmarks = filtered;
+            }
+            else
+            {
+                bookmarks = startedAt;
+            }
+
+            // TODO: test me
+            /*
+             * we processing only bookmarks with max size, they shall be first
+             * in sorted list. Other bookmarks will be processed by called
+             * method
+             */
+            final Bookmark firstBookmark = bookmarks.iterator().next();
+            final int startBookmarkOffset = firstBookmark.getStart();
+            final int endBookmarkOffset = Math.min( firstBookmark.getEnd(),
+                    range.getEndOffset() );
+            List<Bookmark> toProcess = new ArrayList<Bookmark>(
+                    bookmarks.size() );
+            for ( Bookmark bookmark : bookmarks )
+            {
+                if ( Math.min( bookmark.getEnd(), range.getEndOffset() ) != endBookmarkOffset )
+                    break;
+                toProcess.add( bookmark );
+            }
+
+            if ( beforeBookmarkStart != startBookmarkOffset )
+            {
+                // we have range before bookmark
+                Range beforeBookmarkRange = new Range( beforeBookmarkStart,
+                        startBookmarkOffset, range )
+                {
+                    @Override
+                    public String toString()
+                    {
+                        return "BeforeBookmarkRange (" + super.toString() + ")";
+                    }
+                };
+                processCharacters( document, currentTableLevel,
+                        beforeBookmarkRange, block );
+            }
+            Range bookmarkRange = new Range( startBookmarkOffset,
+                    endBookmarkOffset, range )
+            {
+                @Override
+                public String toString()
+                {
+                    return "BookmarkRange (" + super.toString() + ")";
+                }
+            };
+
+            bookmarkStack.addAll( toProcess );
+            try
+            {
+                processBookmarks( document, block, bookmarkRange,
+                        currentTableLevel,
+                        Collections.unmodifiableList( toProcess ) );
+            }
+            finally
+            {
+                bookmarkStack.removeAll( toProcess );
+            }
+            beforeBookmarkStart = endBookmarkOffset;
+        }
+
+        if ( beforeBookmarkStart == startOffset )
+        {
+            return false;
+        }
+
+        if ( beforeBookmarkStart != endOffset )
+        {
+            // we have range after last bookmark
+            Range afterLastBookmarkRange = new Range( beforeBookmarkStart,
+                    endOffset, range )
+            {
+                @Override
+                public String toString()
+                {
+                    return "AfterBookmarkRange (" + super.toString() + ")";
+                }
+            };
+            processCharacters( document, currentTableLevel,
+                    afterLastBookmarkRange, block );
+        }
+        return true;
+    }
+
     protected abstract void processSection( HWPFDocumentCore wordDocument,
             Section section, int s );
 
index 6cd6227a1ae237819edf7145a1a172dd2d6edd25..a6e38073ce2285eae9e34ac82f42ab24ccb351db 100644 (file)
@@ -89,6 +89,13 @@ public class HtmlDocumentFacade
         return document.createElement( "div" );
     }
 
+    public Element createBookmark( String name )
+    {
+        final Element basicLink = document.createElement( "a" );
+        basicLink.setAttribute( "name", name );
+        return basicLink;
+    }
+
     public Element createHeader1()
     {
         return document.createElement( "h1" );
index 939f749dc8739649a5ea5b431b49b5962507d4de..bde7a8c6ad11272b5db26a166c7e57015edd9e4c 100644 (file)
@@ -18,6 +18,7 @@ package org.apache.poi.hwpf.converter;
 
 import java.io.File;
 import java.io.FileWriter;
+import java.util.List;
 import java.util.Stack;
 
 import javax.xml.parsers.DocumentBuilderFactory;
@@ -31,6 +32,7 @@ import org.apache.poi.hpsf.SummaryInformation;
 import org.apache.poi.hwpf.HWPFDocument;
 import org.apache.poi.hwpf.HWPFDocumentCore;
 import org.apache.poi.hwpf.converter.FontReplacer.Triplet;
+import org.apache.poi.hwpf.usermodel.Bookmark;
 import org.apache.poi.hwpf.usermodel.CharacterRun;
 import org.apache.poi.hwpf.usermodel.Paragraph;
 import org.apache.poi.hwpf.usermodel.Picture;
@@ -51,28 +53,6 @@ import org.w3c.dom.Text;
 public class WordToFoConverter extends AbstractWordConverter
 {
 
-    /**
-     * Holds properties values, applied to current <tt>fo:block</tt> element.
-     * Those properties shall not be doubled in children <tt>fo:inline</tt>
-     * elements.
-     */
-    private static class BlockProperies
-    {
-        final boolean pBold;
-        final String pFontName;
-        final int pFontSize;
-        final boolean pItalic;
-
-        public BlockProperies( String pFontName, int pFontSize, boolean pBold,
-                boolean pItalic )
-        {
-            this.pFontName = pFontName;
-            this.pFontSize = pFontSize;
-            this.pBold = pBold;
-            this.pItalic = pItalic;
-        }
-    }
-
     private static final POILogger logger = POILogFactory
             .getLogger( WordToFoConverter.class );
 
@@ -237,6 +217,24 @@ public class WordToFoConverter extends AbstractWordConverter
         inline.appendChild( textNode );
     }
 
+    @Override
+    protected void processBookmarks( HWPFDocumentCore wordDocument,
+            Element currentBlock, Range range, int currentTableLevel,
+            List<Bookmark> rangeBookmarks )
+    {
+        Element parent = currentBlock;
+        for ( Bookmark bookmark : rangeBookmarks )
+        {
+            Element bookmarkElement = foDocumentFacade.createInline();
+            bookmarkElement.setAttribute( "id", bookmark.getName() );
+            parent.appendChild( bookmarkElement );
+            parent = bookmarkElement;
+        }
+
+        if ( range != null )
+            processCharacters( wordDocument, currentTableLevel, range, parent );
+    }
+
     @Override
     protected void processDocumentInformation(
             SummaryInformation summaryInformation )
@@ -509,4 +507,26 @@ public class WordToFoConverter extends AbstractWordConverter
         }
     }
 
+    /**
+     * Holds properties values, applied to current <tt>fo:block</tt> element.
+     * Those properties shall not be doubled in children <tt>fo:inline</tt>
+     * elements.
+     */
+    private static class BlockProperies
+    {
+        final boolean pBold;
+        final String pFontName;
+        final int pFontSize;
+        final boolean pItalic;
+
+        public BlockProperies( String pFontName, int pFontSize, boolean pBold,
+                boolean pItalic )
+        {
+            this.pFontName = pFontName;
+            this.pFontSize = pFontSize;
+            this.pBold = pBold;
+            this.pItalic = pItalic;
+        }
+    }
+
 }
index f81be74c8248d03876ad1516e82018cef5615b27..71eddf12ab3b75313a40cdee02a4464946eb02a8 100644 (file)
@@ -18,6 +18,7 @@ package org.apache.poi.hwpf.converter;
 
 import java.io.File;
 import java.io.FileWriter;
+import java.util.List;
 import java.util.Stack;
 
 import javax.xml.parsers.DocumentBuilderFactory;
@@ -31,6 +32,7 @@ import org.apache.poi.hpsf.SummaryInformation;
 import org.apache.poi.hwpf.HWPFDocument;
 import org.apache.poi.hwpf.HWPFDocumentCore;
 import org.apache.poi.hwpf.converter.FontReplacer.Triplet;
+import org.apache.poi.hwpf.usermodel.Bookmark;
 import org.apache.poi.hwpf.usermodel.CharacterRun;
 import org.apache.poi.hwpf.usermodel.Paragraph;
 import org.apache.poi.hwpf.usermodel.Picture;
@@ -234,6 +236,24 @@ public class WordToHtmlConverter extends AbstractWordConverter
                     basicLink );
     }
 
+    @Override
+    protected void processBookmarks( HWPFDocumentCore wordDocument,
+            Element currentBlock, Range range, int currentTableLevel,
+            List<Bookmark> rangeBookmarks )
+    {
+        Element parent = currentBlock;
+        for ( Bookmark bookmark : rangeBookmarks )
+        {
+            Element bookmarkElement = htmlDocumentFacade
+                    .createBookmark( bookmark.getName() );
+            parent.appendChild( bookmarkElement );
+            parent = bookmarkElement;
+        }
+
+        if ( range != null )
+            processCharacters( wordDocument, currentTableLevel, range, parent );
+    }
+
     /**
      * This method shall store image bytes in external file and convert it if
      * necessary. Images shall be stored using PNG format. Other formats may be
index 9dfd6b6908eebd9f6011ceffef722af6a3091f82..1836d8906c88444a23e821908700a809ae040a3b 100644 (file)
@@ -1,12 +1,33 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
 package org.apache.poi.hwpf.usermodel;
 
+/**
+ * User friendly interface to access information about document bookmarks
+ * 
+ * @author Sergey Vladimirov (vlsergey {at} gmail {doc} com)
+ */
 public interface Bookmark
 {
-    public int getEnd();
+    int getEnd();
 
-    public String getName();
+    String getName();
 
-    public int getStart();
+    int getStart();
 
-    public void setName( String name );
+    void setName( String name );
 }
index 40b40644b3da42f03d9786e0796bc01185daedfc..9bc5e77f3b6df19838d86acb13d4cda0e07a7d4e 100644 (file)
@@ -50,50 +50,7 @@ public class BookmarksImpl implements Bookmarks
 
     private Bookmark getBookmark( final GenericPropertyNode first )
     {
-        return new Bookmark()
-        {
-            public int getEnd()
-            {
-                int currentIndex = bookmarksTables
-                        .getDescriptorFirstIndex( first );
-                try
-                {
-                    GenericPropertyNode descriptorLim = bookmarksTables
-                            .getDescriptorLim( currentIndex );
-                    return descriptorLim.getStart();
-                }
-                catch ( IndexOutOfBoundsException exc )
-                {
-                    return first.getEnd();
-                }
-            }
-
-            public String getName()
-            {
-                int currentIndex = bookmarksTables
-                        .getDescriptorFirstIndex( first );
-                try
-                {
-                    return bookmarksTables.getName( currentIndex );
-                }
-                catch ( ArrayIndexOutOfBoundsException exc )
-                {
-                    return "";
-                }
-            }
-
-            public int getStart()
-            {
-                return first.getStart();
-            }
-
-            public void setName( String name )
-            {
-                int currentIndex = bookmarksTables
-                        .getDescriptorFirstIndex( first );
-                bookmarksTables.setName( currentIndex, name );
-            }
-        };
+        return new BookmarkImpl( first );
     }
 
     public Bookmark getBookmark( int index )
@@ -143,6 +100,11 @@ public class BookmarksImpl implements Bookmarks
         for ( int lookupIndex = startLookupIndex; lookupIndex < endLookupIndex; lookupIndex++ )
         {
             int s = sortedStartPositions[lookupIndex];
+            if ( s < startInclusive )
+                continue;
+            if ( s >= endExclusive )
+                break;
+
             List<Bookmark> startedAt = getBookmarksAt( s );
             if ( startedAt != null )
                 result.put( Integer.valueOf( s ), startedAt );
@@ -186,4 +148,87 @@ public class BookmarksImpl implements Bookmarks
         this.sortedDescriptors = result;
         this.sortedStartPositions = indices;
     }
+
+    private final class BookmarkImpl implements Bookmark
+    {
+        private final GenericPropertyNode first;
+
+        private BookmarkImpl( GenericPropertyNode first )
+        {
+            this.first = first;
+        }
+
+        @Override
+        public boolean equals( Object obj )
+        {
+            if ( this == obj )
+                return true;
+            if ( obj == null )
+                return false;
+            if ( getClass() != obj.getClass() )
+                return false;
+            BookmarkImpl other = (BookmarkImpl) obj;
+            if ( first == null )
+            {
+                if ( other.first != null )
+                    return false;
+            }
+            else if ( !first.equals( other.first ) )
+                return false;
+            return true;
+        }
+
+        public int getEnd()
+        {
+            int currentIndex = bookmarksTables.getDescriptorFirstIndex( first );
+            try
+            {
+                GenericPropertyNode descriptorLim = bookmarksTables
+                        .getDescriptorLim( currentIndex );
+                return descriptorLim.getStart();
+            }
+            catch ( IndexOutOfBoundsException exc )
+            {
+                return first.getEnd();
+            }
+        }
+
+        public String getName()
+        {
+            int currentIndex = bookmarksTables.getDescriptorFirstIndex( first );
+            try
+            {
+                return bookmarksTables.getName( currentIndex );
+            }
+            catch ( ArrayIndexOutOfBoundsException exc )
+            {
+                return "";
+            }
+        }
+
+        public int getStart()
+        {
+            return first.getStart();
+        }
+
+        @Override
+        public int hashCode()
+        {
+            return 31 + ( first == null ? 0 : first.hashCode() );
+        }
+
+        public void setName( String name )
+        {
+            int currentIndex = bookmarksTables.getDescriptorFirstIndex( first );
+            bookmarksTables.setName( currentIndex, name );
+        }
+
+        @Override
+        public String toString()
+        {
+            return "Bookmark [" + getStart() + "; " + getEnd() + "): name: "
+                    + getName();
+        }
+
+    }
 }
index d71c1a821d318725a20018c5c9aff035ee15e46b..175be9058e071f84cae33e83b9c94538d42ffb61 100644 (file)
@@ -187,6 +187,7 @@ public class TestWordToHtmlConverter extends TestCase
         String result = getHtmlText( "pageref.doc" );
 
         assertContains( result, "<a href=\"#userref\">" );
+        assertContains( result, "<a name=\"userref\">" );
         assertContains( result, "1" );
     }
 }