git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1148824 13f79535-47bb-0310-9956-ffa450edef68tags/REL_3_8_BETA4
@@ -16,6 +16,12 @@ | |||
==================================================================== */ | |||
package org.apache.poi.hwpf.converter; | |||
import java.util.ArrayList; | |||
import java.util.Collections; | |||
import java.util.LinkedHashSet; | |||
import java.util.List; | |||
import java.util.Map; | |||
import java.util.Set; | |||
import java.util.regex.Matcher; | |||
import java.util.regex.Pattern; | |||
@@ -23,10 +29,11 @@ import org.apache.poi.hpsf.SummaryInformation; | |||
import org.apache.poi.hwpf.HWPFDocument; | |||
import org.apache.poi.hwpf.HWPFDocumentCore; | |||
import org.apache.poi.hwpf.converter.FontReplacer.Triplet; | |||
import org.apache.poi.hwpf.model.FieldsDocumentPart; | |||
import org.apache.poi.hwpf.model.Field; | |||
import org.apache.poi.hwpf.model.FieldsDocumentPart; | |||
import org.apache.poi.hwpf.model.ListFormatOverride; | |||
import org.apache.poi.hwpf.model.ListTables; | |||
import org.apache.poi.hwpf.usermodel.Bookmark; | |||
import org.apache.poi.hwpf.usermodel.CharacterRun; | |||
import org.apache.poi.hwpf.usermodel.Paragraph; | |||
import org.apache.poi.hwpf.usermodel.Picture; | |||
@@ -51,6 +58,8 @@ public abstract class AbstractWordConverter | |||
private static final POILogger logger = POILogFactory | |||
.getLogger( AbstractWordConverter.class ); | |||
private final Set<Bookmark> bookmarkStack = new LinkedHashSet<Bookmark>(); | |||
private FontReplacer fontReplacer = new DefaultFontReplacer(); | |||
protected Triplet getCharacterRunTriplet( CharacterRun characterRun ) | |||
@@ -73,7 +82,16 @@ public abstract class AbstractWordConverter | |||
protected abstract void outputCharacters( Element block, | |||
CharacterRun characterRun, String text ); | |||
protected boolean processCharacters( HWPFDocumentCore hwpfDocument, | |||
/** | |||
* Wrap range into bookmark(s) and process it. All bookmarks have starts | |||
* equal to range start and ends equal to range end. Usually it's only one | |||
* bookmark. | |||
*/ | |||
protected abstract void processBookmarks( HWPFDocumentCore wordDocument, | |||
Element currentBlock, Range range, int currentTableLevel, | |||
List<Bookmark> rangeBookmarks ); | |||
protected boolean processCharacters( HWPFDocumentCore document, | |||
int currentTableLevel, Range range, final Element block ) | |||
{ | |||
if ( range == null ) | |||
@@ -81,6 +99,22 @@ public abstract class AbstractWordConverter | |||
boolean haveAnyText = false; | |||
if ( document instanceof HWPFDocument ) | |||
{ | |||
final HWPFDocument doc = (HWPFDocument) document; | |||
Map<Integer, List<Bookmark>> rangeBookmarks = doc.getBookmarks() | |||
.getBookmarksStartedBetween( range.getStartOffset(), | |||
range.getEndOffset() ); | |||
if ( rangeBookmarks != null && !rangeBookmarks.isEmpty() ) | |||
{ | |||
boolean processedAny = processRangeBookmarks( doc, | |||
currentTableLevel, range, block, rangeBookmarks ); | |||
if ( processedAny ) | |||
return true; | |||
} | |||
} | |||
for ( int c = 0; c < range.numCharacterRuns(); c++ ) | |||
{ | |||
CharacterRun characterRun = range.getCharacterRun( c ); | |||
@@ -88,11 +122,11 @@ public abstract class AbstractWordConverter | |||
if ( characterRun == null ) | |||
throw new AssertionError(); | |||
if ( hwpfDocument instanceof HWPFDocument | |||
&& ( (HWPFDocument) hwpfDocument ).getPicturesTable() | |||
if ( document instanceof HWPFDocument | |||
&& ( (HWPFDocument) document ).getPicturesTable() | |||
.hasPicture( characterRun ) ) | |||
{ | |||
HWPFDocument newFormat = (HWPFDocument) hwpfDocument; | |||
HWPFDocument newFormat = (HWPFDocument) document; | |||
Picture picture = newFormat.getPicturesTable().extractPicture( | |||
characterRun, true ); | |||
@@ -107,15 +141,15 @@ public abstract class AbstractWordConverter | |||
if ( text.getBytes()[0] == FIELD_BEGIN_MARK ) | |||
{ | |||
if ( hwpfDocument instanceof HWPFDocument ) | |||
if ( document instanceof HWPFDocument ) | |||
{ | |||
Field aliveField = ( (HWPFDocument) hwpfDocument ) | |||
Field aliveField = ( (HWPFDocument) document ) | |||
.getFieldsTables().lookupFieldByStartOffset( | |||
FieldsDocumentPart.MAIN, | |||
characterRun.getStartOffset() ); | |||
if ( aliveField != null ) | |||
{ | |||
processField( ( (HWPFDocument) hwpfDocument ), range, | |||
processField( ( (HWPFDocument) document ), range, | |||
currentTableLevel, aliveField, block ); | |||
int continueAfter = aliveField.getFieldEndOffset(); | |||
@@ -130,8 +164,8 @@ public abstract class AbstractWordConverter | |||
} | |||
} | |||
int skipTo = tryDeadField( hwpfDocument, range, | |||
currentTableLevel, c, block ); | |||
int skipTo = tryDeadField( document, range, currentTableLevel, | |||
c, block ); | |||
if ( skipTo != c ) | |||
{ | |||
@@ -337,6 +371,129 @@ public abstract class AbstractWordConverter | |||
Element parentFopElement, int currentTableLevel, | |||
Paragraph paragraph, String bulletText ); | |||
private boolean processRangeBookmarks( HWPFDocumentCore document, | |||
int currentTableLevel, Range range, final Element block, | |||
Map<Integer, List<Bookmark>> rangeBookmakrs ) | |||
{ | |||
final int startOffset = range.getStartOffset(); | |||
final int endOffset = range.getEndOffset(); | |||
int beforeBookmarkStart = startOffset; | |||
for ( Map.Entry<Integer, List<Bookmark>> entry : rangeBookmakrs | |||
.entrySet() ) | |||
{ | |||
final List<Bookmark> startedAt = entry.getValue(); | |||
final List<Bookmark> bookmarks; | |||
if ( entry.getKey().intValue() == startOffset | |||
&& !bookmarkStack.isEmpty() ) | |||
{ | |||
/* | |||
* we need to filter out some bookmarks because already | |||
* processing them in caller methods | |||
*/ | |||
List<Bookmark> filtered = new ArrayList<Bookmark>( | |||
startedAt.size() ); | |||
for ( Bookmark bookmark : startedAt ) | |||
{ | |||
if ( this.bookmarkStack.contains( bookmark ) ) | |||
continue; | |||
filtered.add( bookmark ); | |||
} | |||
if ( filtered.isEmpty() ) | |||
// no bookmarks - skip to next start point | |||
continue; | |||
bookmarks = filtered; | |||
} | |||
else | |||
{ | |||
bookmarks = startedAt; | |||
} | |||
// TODO: test me | |||
/* | |||
* we processing only bookmarks with max size, they shall be first | |||
* in sorted list. Other bookmarks will be processed by called | |||
* method | |||
*/ | |||
final Bookmark firstBookmark = bookmarks.iterator().next(); | |||
final int startBookmarkOffset = firstBookmark.getStart(); | |||
final int endBookmarkOffset = Math.min( firstBookmark.getEnd(), | |||
range.getEndOffset() ); | |||
List<Bookmark> toProcess = new ArrayList<Bookmark>( | |||
bookmarks.size() ); | |||
for ( Bookmark bookmark : bookmarks ) | |||
{ | |||
if ( Math.min( bookmark.getEnd(), range.getEndOffset() ) != endBookmarkOffset ) | |||
break; | |||
toProcess.add( bookmark ); | |||
} | |||
if ( beforeBookmarkStart != startBookmarkOffset ) | |||
{ | |||
// we have range before bookmark | |||
Range beforeBookmarkRange = new Range( beforeBookmarkStart, | |||
startBookmarkOffset, range ) | |||
{ | |||
@Override | |||
public String toString() | |||
{ | |||
return "BeforeBookmarkRange (" + super.toString() + ")"; | |||
} | |||
}; | |||
processCharacters( document, currentTableLevel, | |||
beforeBookmarkRange, block ); | |||
} | |||
Range bookmarkRange = new Range( startBookmarkOffset, | |||
endBookmarkOffset, range ) | |||
{ | |||
@Override | |||
public String toString() | |||
{ | |||
return "BookmarkRange (" + super.toString() + ")"; | |||
} | |||
}; | |||
bookmarkStack.addAll( toProcess ); | |||
try | |||
{ | |||
processBookmarks( document, block, bookmarkRange, | |||
currentTableLevel, | |||
Collections.unmodifiableList( toProcess ) ); | |||
} | |||
finally | |||
{ | |||
bookmarkStack.removeAll( toProcess ); | |||
} | |||
beforeBookmarkStart = endBookmarkOffset; | |||
} | |||
if ( beforeBookmarkStart == startOffset ) | |||
{ | |||
return false; | |||
} | |||
if ( beforeBookmarkStart != endOffset ) | |||
{ | |||
// we have range after last bookmark | |||
Range afterLastBookmarkRange = new Range( beforeBookmarkStart, | |||
endOffset, range ) | |||
{ | |||
@Override | |||
public String toString() | |||
{ | |||
return "AfterBookmarkRange (" + super.toString() + ")"; | |||
} | |||
}; | |||
processCharacters( document, currentTableLevel, | |||
afterLastBookmarkRange, block ); | |||
} | |||
return true; | |||
} | |||
protected abstract void processSection( HWPFDocumentCore wordDocument, | |||
Section section, int s ); | |||
@@ -89,6 +89,13 @@ public class HtmlDocumentFacade | |||
return document.createElement( "div" ); | |||
} | |||
public Element createBookmark( String name ) | |||
{ | |||
final Element basicLink = document.createElement( "a" ); | |||
basicLink.setAttribute( "name", name ); | |||
return basicLink; | |||
} | |||
public Element createHeader1() | |||
{ | |||
return document.createElement( "h1" ); |
@@ -18,6 +18,7 @@ package org.apache.poi.hwpf.converter; | |||
import java.io.File; | |||
import java.io.FileWriter; | |||
import java.util.List; | |||
import java.util.Stack; | |||
import javax.xml.parsers.DocumentBuilderFactory; | |||
@@ -31,6 +32,7 @@ import org.apache.poi.hpsf.SummaryInformation; | |||
import org.apache.poi.hwpf.HWPFDocument; | |||
import org.apache.poi.hwpf.HWPFDocumentCore; | |||
import org.apache.poi.hwpf.converter.FontReplacer.Triplet; | |||
import org.apache.poi.hwpf.usermodel.Bookmark; | |||
import org.apache.poi.hwpf.usermodel.CharacterRun; | |||
import org.apache.poi.hwpf.usermodel.Paragraph; | |||
import org.apache.poi.hwpf.usermodel.Picture; | |||
@@ -51,28 +53,6 @@ import org.w3c.dom.Text; | |||
public class WordToFoConverter extends AbstractWordConverter | |||
{ | |||
/** | |||
* Holds properties values, applied to current <tt>fo:block</tt> element. | |||
* Those properties shall not be doubled in children <tt>fo:inline</tt> | |||
* elements. | |||
*/ | |||
private static class BlockProperies | |||
{ | |||
final boolean pBold; | |||
final String pFontName; | |||
final int pFontSize; | |||
final boolean pItalic; | |||
public BlockProperies( String pFontName, int pFontSize, boolean pBold, | |||
boolean pItalic ) | |||
{ | |||
this.pFontName = pFontName; | |||
this.pFontSize = pFontSize; | |||
this.pBold = pBold; | |||
this.pItalic = pItalic; | |||
} | |||
} | |||
private static final POILogger logger = POILogFactory | |||
.getLogger( WordToFoConverter.class ); | |||
@@ -237,6 +217,24 @@ public class WordToFoConverter extends AbstractWordConverter | |||
inline.appendChild( textNode ); | |||
} | |||
@Override | |||
protected void processBookmarks( HWPFDocumentCore wordDocument, | |||
Element currentBlock, Range range, int currentTableLevel, | |||
List<Bookmark> rangeBookmarks ) | |||
{ | |||
Element parent = currentBlock; | |||
for ( Bookmark bookmark : rangeBookmarks ) | |||
{ | |||
Element bookmarkElement = foDocumentFacade.createInline(); | |||
bookmarkElement.setAttribute( "id", bookmark.getName() ); | |||
parent.appendChild( bookmarkElement ); | |||
parent = bookmarkElement; | |||
} | |||
if ( range != null ) | |||
processCharacters( wordDocument, currentTableLevel, range, parent ); | |||
} | |||
@Override | |||
protected void processDocumentInformation( | |||
SummaryInformation summaryInformation ) | |||
@@ -509,4 +507,26 @@ public class WordToFoConverter extends AbstractWordConverter | |||
} | |||
} | |||
/** | |||
* Holds properties values, applied to current <tt>fo:block</tt> element. | |||
* Those properties shall not be doubled in children <tt>fo:inline</tt> | |||
* elements. | |||
*/ | |||
private static class BlockProperies | |||
{ | |||
final boolean pBold; | |||
final String pFontName; | |||
final int pFontSize; | |||
final boolean pItalic; | |||
public BlockProperies( String pFontName, int pFontSize, boolean pBold, | |||
boolean pItalic ) | |||
{ | |||
this.pFontName = pFontName; | |||
this.pFontSize = pFontSize; | |||
this.pBold = pBold; | |||
this.pItalic = pItalic; | |||
} | |||
} | |||
} |
@@ -18,6 +18,7 @@ package org.apache.poi.hwpf.converter; | |||
import java.io.File; | |||
import java.io.FileWriter; | |||
import java.util.List; | |||
import java.util.Stack; | |||
import javax.xml.parsers.DocumentBuilderFactory; | |||
@@ -31,6 +32,7 @@ import org.apache.poi.hpsf.SummaryInformation; | |||
import org.apache.poi.hwpf.HWPFDocument; | |||
import org.apache.poi.hwpf.HWPFDocumentCore; | |||
import org.apache.poi.hwpf.converter.FontReplacer.Triplet; | |||
import org.apache.poi.hwpf.usermodel.Bookmark; | |||
import org.apache.poi.hwpf.usermodel.CharacterRun; | |||
import org.apache.poi.hwpf.usermodel.Paragraph; | |||
import org.apache.poi.hwpf.usermodel.Picture; | |||
@@ -234,6 +236,24 @@ public class WordToHtmlConverter extends AbstractWordConverter | |||
basicLink ); | |||
} | |||
@Override | |||
protected void processBookmarks( HWPFDocumentCore wordDocument, | |||
Element currentBlock, Range range, int currentTableLevel, | |||
List<Bookmark> rangeBookmarks ) | |||
{ | |||
Element parent = currentBlock; | |||
for ( Bookmark bookmark : rangeBookmarks ) | |||
{ | |||
Element bookmarkElement = htmlDocumentFacade | |||
.createBookmark( bookmark.getName() ); | |||
parent.appendChild( bookmarkElement ); | |||
parent = bookmarkElement; | |||
} | |||
if ( range != null ) | |||
processCharacters( wordDocument, currentTableLevel, range, parent ); | |||
} | |||
/** | |||
* This method shall store image bytes in external file and convert it if | |||
* necessary. Images shall be stored using PNG format. Other formats may be |
@@ -1,12 +1,33 @@ | |||
/* ==================================================================== | |||
Licensed to the Apache Software Foundation (ASF) under one or more | |||
contributor license agreements. See the NOTICE file distributed with | |||
this work for additional information regarding copyright ownership. | |||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||
(the "License"); you may not use this file except in compliance with | |||
the License. You may obtain a copy of the License at | |||
http://www.apache.org/licenses/LICENSE-2.0 | |||
Unless required by applicable law or agreed to in writing, software | |||
distributed under the License is distributed on an "AS IS" BASIS, | |||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
See the License for the specific language governing permissions and | |||
limitations under the License. | |||
==================================================================== */ | |||
package org.apache.poi.hwpf.usermodel; | |||
/** | |||
* User friendly interface to access information about document bookmarks | |||
* | |||
* @author Sergey Vladimirov (vlsergey {at} gmail {doc} com) | |||
*/ | |||
public interface Bookmark | |||
{ | |||
public int getEnd(); | |||
int getEnd(); | |||
public String getName(); | |||
String getName(); | |||
public int getStart(); | |||
int getStart(); | |||
public void setName( String name ); | |||
void setName( String name ); | |||
} |
@@ -50,50 +50,7 @@ public class BookmarksImpl implements Bookmarks | |||
private Bookmark getBookmark( final GenericPropertyNode first ) | |||
{ | |||
return new Bookmark() | |||
{ | |||
public int getEnd() | |||
{ | |||
int currentIndex = bookmarksTables | |||
.getDescriptorFirstIndex( first ); | |||
try | |||
{ | |||
GenericPropertyNode descriptorLim = bookmarksTables | |||
.getDescriptorLim( currentIndex ); | |||
return descriptorLim.getStart(); | |||
} | |||
catch ( IndexOutOfBoundsException exc ) | |||
{ | |||
return first.getEnd(); | |||
} | |||
} | |||
public String getName() | |||
{ | |||
int currentIndex = bookmarksTables | |||
.getDescriptorFirstIndex( first ); | |||
try | |||
{ | |||
return bookmarksTables.getName( currentIndex ); | |||
} | |||
catch ( ArrayIndexOutOfBoundsException exc ) | |||
{ | |||
return ""; | |||
} | |||
} | |||
public int getStart() | |||
{ | |||
return first.getStart(); | |||
} | |||
public void setName( String name ) | |||
{ | |||
int currentIndex = bookmarksTables | |||
.getDescriptorFirstIndex( first ); | |||
bookmarksTables.setName( currentIndex, name ); | |||
} | |||
}; | |||
return new BookmarkImpl( first ); | |||
} | |||
public Bookmark getBookmark( int index ) | |||
@@ -143,6 +100,11 @@ public class BookmarksImpl implements Bookmarks | |||
for ( int lookupIndex = startLookupIndex; lookupIndex < endLookupIndex; lookupIndex++ ) | |||
{ | |||
int s = sortedStartPositions[lookupIndex]; | |||
if ( s < startInclusive ) | |||
continue; | |||
if ( s >= endExclusive ) | |||
break; | |||
List<Bookmark> startedAt = getBookmarksAt( s ); | |||
if ( startedAt != null ) | |||
result.put( Integer.valueOf( s ), startedAt ); | |||
@@ -186,4 +148,87 @@ public class BookmarksImpl implements Bookmarks | |||
this.sortedDescriptors = result; | |||
this.sortedStartPositions = indices; | |||
} | |||
private final class BookmarkImpl implements Bookmark | |||
{ | |||
private final GenericPropertyNode first; | |||
private BookmarkImpl( GenericPropertyNode first ) | |||
{ | |||
this.first = first; | |||
} | |||
@Override | |||
public boolean equals( Object obj ) | |||
{ | |||
if ( this == obj ) | |||
return true; | |||
if ( obj == null ) | |||
return false; | |||
if ( getClass() != obj.getClass() ) | |||
return false; | |||
BookmarkImpl other = (BookmarkImpl) obj; | |||
if ( first == null ) | |||
{ | |||
if ( other.first != null ) | |||
return false; | |||
} | |||
else if ( !first.equals( other.first ) ) | |||
return false; | |||
return true; | |||
} | |||
public int getEnd() | |||
{ | |||
int currentIndex = bookmarksTables.getDescriptorFirstIndex( first ); | |||
try | |||
{ | |||
GenericPropertyNode descriptorLim = bookmarksTables | |||
.getDescriptorLim( currentIndex ); | |||
return descriptorLim.getStart(); | |||
} | |||
catch ( IndexOutOfBoundsException exc ) | |||
{ | |||
return first.getEnd(); | |||
} | |||
} | |||
public String getName() | |||
{ | |||
int currentIndex = bookmarksTables.getDescriptorFirstIndex( first ); | |||
try | |||
{ | |||
return bookmarksTables.getName( currentIndex ); | |||
} | |||
catch ( ArrayIndexOutOfBoundsException exc ) | |||
{ | |||
return ""; | |||
} | |||
} | |||
public int getStart() | |||
{ | |||
return first.getStart(); | |||
} | |||
@Override | |||
public int hashCode() | |||
{ | |||
return 31 + ( first == null ? 0 : first.hashCode() ); | |||
} | |||
public void setName( String name ) | |||
{ | |||
int currentIndex = bookmarksTables.getDescriptorFirstIndex( first ); | |||
bookmarksTables.setName( currentIndex, name ); | |||
} | |||
@Override | |||
public String toString() | |||
{ | |||
return "Bookmark [" + getStart() + "; " + getEnd() + "): name: " | |||
+ getName(); | |||
} | |||
} | |||
} |
@@ -187,6 +187,7 @@ public class TestWordToHtmlConverter extends TestCase | |||
String result = getHtmlText( "pageref.doc" ); | |||
assertContains( result, "<a href=\"#userref\">" ); | |||
assertContains( result, "<a name=\"userref\">" ); | |||
assertContains( result, "1" ); | |||
} | |||
} |