import org.apache.poi.hwpf.model.ListTables;
import org.apache.poi.hwpf.usermodel.Bookmark;
import org.apache.poi.hwpf.usermodel.CharacterRun;
+import org.apache.poi.hwpf.usermodel.Notes;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.Range;
private static final POILogger logger = POILogFactory
.getLogger( AbstractWordConverter.class );
+ private static final byte SPECCHAR_AUTONUMBERED_FOOTNOTE_REFERENCE = 2;
+
private final Set<Bookmark> bookmarkStack = new LinkedHashSet<Bookmark>();
private FontReplacer fontReplacer = new DefaultFontReplacer();
if ( text.getBytes().length == 0 )
continue;
+ if ( characterRun.isSpecialCharacter() )
+ {
+ if ( text.charAt( 0 ) == SPECCHAR_AUTONUMBERED_FOOTNOTE_REFERENCE
+ && ( document instanceof HWPFDocument ) )
+ {
+ HWPFDocument doc = (HWPFDocument) document;
+ processNoteAnchor( doc, characterRun, block );
+ continue;
+ }
+ }
+
if ( text.getBytes()[0] == FIELD_BEGIN_MARK )
{
if ( document instanceof HWPFDocument )
processDocumentInformation( summaryInformation );
}
- final Range range = wordDocument.getRange();
+ processDocumentPart( wordDocument, wordDocument.getRange() );
+ }
+
+ protected abstract void processDocumentInformation(
+ SummaryInformation summaryInformation );
+
+ protected void processDocumentPart( HWPFDocumentCore wordDocument,
+ final Range range )
+ {
for ( int s = 0; s < range.numSections(); s++ )
{
processSection( wordDocument, range.getSection( s ), s );
}
}
- protected abstract void processDocumentInformation(
- SummaryInformation summaryInformation );
+ protected abstract void processEndnoteAutonumbered( HWPFDocument doc,
+ int noteIndex, Element block, Range endnoteTextRange );
protected void processField( HWPFDocument hwpfDocument, Range parentRange,
int currentTableLevel, Field field, Element currentBlock )
return field;
}
+ protected abstract void processFootnoteAutonumbered( HWPFDocument doc,
+ int noteIndex, Element block, Range footnoteTextRange );
+
protected abstract void processHyperlink( HWPFDocumentCore wordDocument,
Element currentBlock, Range textRange, int currentTableLevel,
String hyperlink );
protected abstract void processLineBreak( Element block,
CharacterRun characterRun );
+ protected void processNoteAnchor( HWPFDocument doc,
+ CharacterRun characterRun, final Element block )
+ {
+ {
+ Notes footnotes = doc.getFootnotes();
+ int noteIndex = footnotes
+ .getNoteIndexByAnchorPosition( characterRun
+ .getStartOffset() );
+ if ( noteIndex != -1 )
+ {
+ Range footnoteRange = doc.getFootnoteRange();
+ int rangeStartOffset = footnoteRange.getStartOffset();
+ int noteTextStartOffset = footnotes
+ .getNoteTextStartOffset( noteIndex );
+ int noteTextEndOffset = footnotes
+ .getNoteTextEndOffset( noteIndex );
+
+ Range noteTextRange = new Range( rangeStartOffset
+ + noteTextStartOffset, rangeStartOffset
+ + noteTextEndOffset, doc );
+
+ processFootnoteAutonumbered( doc, noteIndex, block,
+ noteTextRange );
+ return;
+ }
+ }
+ {
+ Notes endnotes = doc.getEndnotes();
+ int noteIndex = endnotes.getNoteIndexByAnchorPosition( characterRun
+ .getStartOffset() );
+ if ( noteIndex != -1 )
+ {
+ Range endnoteRange = doc.getEndnoteRange();
+ int rangeStartOffset = endnoteRange.getStartOffset();
+ int noteTextStartOffset = endnotes
+ .getNoteTextStartOffset( noteIndex );
+ int noteTextEndOffset = endnotes
+ .getNoteTextEndOffset( noteIndex );
+
+ Range noteTextRange = new Range( rangeStartOffset
+ + noteTextStartOffset, rangeStartOffset
+ + noteTextEndOffset, doc );
+
+ processEndnoteAutonumbered( doc, noteIndex, block,
+ noteTextRange );
+ return;
+ }
+ }
+ }
+
protected abstract void processPageref( HWPFDocumentCore wordDocument,
Element currentBlock, Range textRange, int currentTableLevel,
String pageref );
Element parentFopElement, int currentTableLevel,
Paragraph paragraph, String bulletText );
+ protected void processParagraphes( HWPFDocumentCore wordDocument,
+ Element flow, Range range, int currentTableLevel )
+ {
+ final ListTables listTables = wordDocument.getListTables();
+ int currentListInfo = 0;
+
+ final int paragraphs = range.numParagraphs();
+ for ( int p = 0; p < paragraphs; p++ )
+ {
+ Paragraph paragraph = range.getParagraph( p );
+
+ if ( paragraph.isInTable()
+ && paragraph.getTableLevel() != currentTableLevel )
+ {
+ if ( paragraph.getTableLevel() < currentTableLevel )
+ throw new IllegalStateException(
+ "Trying to process table cell with higher level ("
+ + paragraph.getTableLevel()
+ + ") than current table level ("
+ + currentTableLevel
+ + ") as inner table part" );
+
+ Table table = range.getTable( paragraph );
+ processTable( wordDocument, flow, table );
+
+ p += table.numParagraphs();
+ p--;
+ continue;
+ }
+
+ if ( paragraph.getIlfo() != currentListInfo )
+ {
+ currentListInfo = paragraph.getIlfo();
+ }
+
+ if ( currentListInfo != 0 )
+ {
+ if ( listTables != null )
+ {
+ final ListFormatOverride listFormatOverride = listTables
+ .getOverride( paragraph.getIlfo() );
+
+ String label = AbstractWordUtils.getBulletText( listTables,
+ paragraph, listFormatOverride.getLsid() );
+
+ processParagraph( wordDocument, flow, currentTableLevel,
+ paragraph, label );
+ }
+ else
+ {
+ logger.log( POILogger.WARN,
+ "Paragraph #" + paragraph.getStartOffset() + "-"
+ + paragraph.getEndOffset()
+ + " has reference to list structure #"
+ + currentListInfo
+ + ", but listTables not defined in file" );
+
+ processParagraph( wordDocument, flow, currentTableLevel,
+ paragraph, AbstractWordUtils.EMPTY );
+ }
+ }
+ else
+ {
+ processParagraph( wordDocument, flow, currentTableLevel,
+ paragraph, AbstractWordUtils.EMPTY );
+ }
+ }
+
+ }
+
private boolean processRangeBookmarks( HWPFDocumentCore document,
int currentTableLevel, Range range, final Element block,
Map<Integer, List<Bookmark>> rangeBookmakrs )
protected abstract void processSection( HWPFDocumentCore wordDocument,
Section section, int s );
- protected void processSectionParagraphes( HWPFDocumentCore wordDocument,
- Element flow, Range range, int currentTableLevel )
- {
- final ListTables listTables = wordDocument.getListTables();
- int currentListInfo = 0;
-
- final int paragraphs = range.numParagraphs();
- for ( int p = 0; p < paragraphs; p++ )
- {
- Paragraph paragraph = range.getParagraph( p );
-
- if ( paragraph.isInTable()
- && paragraph.getTableLevel() != currentTableLevel )
- {
- if ( paragraph.getTableLevel() < currentTableLevel )
- throw new IllegalStateException(
- "Trying to process table cell with higher level ("
- + paragraph.getTableLevel()
- + ") than current table level ("
- + currentTableLevel
- + ") as inner table part" );
-
- Table table = range.getTable( paragraph );
- processTable( wordDocument, flow, table );
-
- p += table.numParagraphs();
- p--;
- continue;
- }
-
- if ( paragraph.getIlfo() != currentListInfo )
- {
- currentListInfo = paragraph.getIlfo();
- }
-
- if ( currentListInfo != 0 )
- {
- if ( listTables != null )
- {
- final ListFormatOverride listFormatOverride = listTables
- .getOverride( paragraph.getIlfo() );
-
- String label = AbstractWordUtils.getBulletText( listTables,
- paragraph, listFormatOverride.getLsid() );
-
- processParagraph( wordDocument, flow, currentTableLevel,
- paragraph, label );
- }
- else
- {
- logger.log( POILogger.WARN,
- "Paragraph #" + paragraph.getStartOffset() + "-"
- + paragraph.getEndOffset()
- + " has reference to list structure #"
- + currentListInfo
- + ", but listTables not defined in file" );
-
- processParagraph( wordDocument, flow, currentTableLevel,
- paragraph, AbstractWordUtils.EMPTY );
- }
- }
- else
- {
- processParagraph( wordDocument, flow, currentTableLevel,
- paragraph, AbstractWordUtils.EMPTY );
- }
- }
-
- }
-
protected void processSingleSection( HWPFDocumentCore wordDocument,
Section section )
{
return result;
}
+ public Element createFootnoteBody()
+ {
+ return document.createElementNS( NS_XSLFO, "fo:footnote-body" );
+ }
+
public Element createInline()
{
return document.createElementNS( NS_XSLFO, "fo:inline" );
}
}
+ @Override
+ protected void processEndnoteAutonumbered( HWPFDocument doc, int noteIndex,
+ Element block, Range endnoteTextRange )
+ {
+ // TODO: add endnote implementation?
+ processFootnoteAutonumbered( doc, noteIndex, block, endnoteTextRange );
+ }
+
+ @Override
+ protected void processFootnoteAutonumbered( HWPFDocument doc,
+ int noteIndex, Element block, Range footnoteTextRange )
+ {
+ String textIndex = String.valueOf( noteIndex + 1 );
+
+ {
+ Element inline = foDocumentFacade.createInline();
+ inline.setTextContent( textIndex );
+ inline.setAttribute( "baseline-shift", "super" );
+ inline.setAttribute( "font-size", "smaller" );
+ block.appendChild( inline );
+ }
+
+ Element footnoteBody = foDocumentFacade.createFootnoteBody();
+ Element footnoteBlock = foDocumentFacade.createBlock();
+ footnoteBody.appendChild( footnoteBlock );
+ block.appendChild( footnoteBody );
+
+ {
+ Element inline = foDocumentFacade.createInline();
+ inline.setTextContent( textIndex );
+ inline.setAttribute( "baseline-shift", "super" );
+ inline.setAttribute( "font-size", "smaller" );
+ footnoteBlock.appendChild( inline );
+ }
+
+ processCharacters( doc, Integer.MIN_VALUE, footnoteTextRange,
+ footnoteBlock );
+ }
+
static Document process( File docFile ) throws Exception
{
final HWPFDocumentCore hwpfDocument = WordToFoUtils.loadDoc( docFile );
Element flow = foDocumentFacade.addFlowToPageSequence( pageSequence,
"xsl-region-body" );
- processSectionParagraphes( wordDocument, flow, section,
- Integer.MIN_VALUE );
+ processParagraphes( wordDocument, flow, section, Integer.MIN_VALUE );
}
protected void processTable( HWPFDocumentCore wordDocument, Element flow,
+ count );
}
- processSectionParagraphes( wordDocument, tableCellElement,
- tableCell, table.getTableLevel() );
+ processParagraphes( wordDocument, tableCellElement, tableCell,
+ table.getTableLevel() );
if ( !tableCellElement.hasChildNodes() )
{
private final HtmlDocumentFacade htmlDocumentFacade;
+ private Element notes = null;
+
/**
* Creates new instance of {@link WordToHtmlConverter}. Can be used for
* output several {@link HWPFDocument}s into single HTML document.
span.appendChild( textNode );
}
+ @Override
+ protected void processBookmarks( HWPFDocumentCore wordDocument,
+ Element currentBlock, Range range, int currentTableLevel,
+ List<Bookmark> rangeBookmarks )
+ {
+ Element parent = currentBlock;
+ for ( Bookmark bookmark : rangeBookmarks )
+ {
+ Element bookmarkElement = htmlDocumentFacade
+ .createBookmark( bookmark.getName() );
+ parent.appendChild( bookmarkElement );
+ parent = bookmarkElement;
+ }
+
+ if ( range != null )
+ processCharacters( wordDocument, currentTableLevel, range, parent );
+ }
+
+ @Override
+ public void processDocument( HWPFDocumentCore wordDocument )
+ {
+ super.processDocument( wordDocument );
+
+ if ( notes != null )
+ htmlDocumentFacade.getBody().appendChild( notes );
+ }
+
@Override
protected void processDocumentInformation(
SummaryInformation summaryInformation )
.addDescription( summaryInformation.getComments() );
}
+ @Override
+ protected void processEndnoteAutonumbered( HWPFDocument doc, int noteIndex,
+ Element block, Range endnoteTextRange )
+ {
+ processNoteAutonumbered( doc, "end", noteIndex, block, endnoteTextRange );
+ }
+
+ @Override
+ protected void processFootnoteAutonumbered( HWPFDocument doc,
+ int noteIndex, Element block, Range footnoteTextRange )
+ {
+ processNoteAutonumbered( doc, "foot", noteIndex, block,
+ footnoteTextRange );
+ }
+
@Override
protected void processHyperlink( HWPFDocumentCore wordDocument,
Element currentBlock, Range textRange, int currentTableLevel,
basicLink );
}
- @Override
- protected void processBookmarks( HWPFDocumentCore wordDocument,
- Element currentBlock, Range range, int currentTableLevel,
- List<Bookmark> rangeBookmarks )
- {
- Element parent = currentBlock;
- for ( Bookmark bookmark : rangeBookmarks )
- {
- Element bookmarkElement = htmlDocumentFacade
- .createBookmark( bookmark.getName() );
- parent.appendChild( bookmarkElement );
- parent = bookmarkElement;
- }
-
- if ( range != null )
- processCharacters( wordDocument, currentTableLevel, range, parent );
- }
-
/**
* This method shall store image bytes in external file and convert it if
* necessary. Images shall be stored using PNG format. Other formats may be
block.appendChild( htmlDocumentFacade.createLineBreak() );
}
+ protected void processNoteAutonumbered( HWPFDocument doc, String type,
+ int noteIndex, Element block, Range noteTextRange )
+ {
+ String textIndex = String.valueOf( noteIndex + 1 );
+
+ final String forwardNoteLink = type + "note_" + textIndex;
+ final String backwardNoteLink = type + "note_back_" + textIndex;
+
+ Element anchor = htmlDocumentFacade.createHyperlink( "#"
+ + forwardNoteLink );
+ anchor.setAttribute( "name", backwardNoteLink );
+ anchor.setAttribute( "class", type + "noteanchor" );
+ anchor.setTextContent( textIndex );
+ block.appendChild( anchor );
+
+ if ( notes == null )
+ {
+ notes = htmlDocumentFacade.createBlock();
+ notes.setAttribute( "class", "notes" );
+ }
+
+ Element note = htmlDocumentFacade.createBlock();
+ note.setAttribute( "class", type + "note" );
+ notes.appendChild( note );
+
+ Element bookmark = htmlDocumentFacade.createBookmark( forwardNoteLink );
+ bookmark.setAttribute( "href", "#" + backwardNoteLink );
+ bookmark.setTextContent( textIndex );
+ note.appendChild( bookmark );
+
+ Element span = htmlDocumentFacade.getDocument().createElement( "span" );
+ span.setAttribute( "class", type + "notetext" );
+ note.appendChild( span );
+
+ processCharacters( doc, Integer.MIN_VALUE, noteTextRange, span );
+ }
+
protected void processPageref( HWPFDocumentCore hwpfDocument,
Element currentBlock, Range textRange, int currentTableLevel,
String pageref )
div.getTagName(), "d", getSectionStyle( section ) ) );
htmlDocumentFacade.body.appendChild( div );
- processSectionParagraphes( wordDocument, div, section,
- Integer.MIN_VALUE );
+ processParagraphes( wordDocument, div, section, Integer.MIN_VALUE );
}
@Override
.setAttribute( "class", htmlDocumentFacade.getOrCreateCssClass(
"body", "b", getSectionStyle( section ) ) );
- processSectionParagraphes( wordDocument, htmlDocumentFacade.body,
- section, Integer.MIN_VALUE );
+ processParagraphes( wordDocument, htmlDocumentFacade.body, section,
+ Integer.MIN_VALUE );
}
protected void processTable( HWPFDocumentCore hwpfDocument, Element flow,
tableCellElement.setAttribute( "rowspan", "" + count );
}
- processSectionParagraphes( hwpfDocument, tableCellElement,
- tableCell, table.getTableLevel() );
+ processParagraphes( hwpfDocument, tableCellElement, tableCell,
+ table.getTableLevel() );
if ( !tableCellElement.hasChildNodes() )
{
if ( true )
{
- System.out.println( new Range( chpx.getStart(), chpx.getEnd(),
+ String text = new Range( chpx.getStart(), chpx.getEnd(),
_doc.getOverallRange() )
{
public String toString()
{
return "CHPX range (" + super.toString() + ")";
}
- }.text() );
+ }.text();
+ StringBuilder stringBuilder = new StringBuilder();
+ for ( char c : text.toCharArray() )
+ {
+ if ( c < 30 )
+ stringBuilder.append( "\\0x" + Integer.toHexString( c ) );
+ else
+ stringBuilder.append( c );
+ }
+ System.out.println( stringBuilder );
}
}
}
*/
int getNotesCount();
+ /**
+ * Returns index of note (if exists, otherwise -1) with specified anchor
+ * position
+ */
+ int getNoteIndexByAnchorPosition( int anchorPosition );
+
/**
* Returns the end offset of the text corresponding to the reference within
* the footnote text address space
==================================================================== */
package org.apache.poi.hwpf.usermodel;
+import java.util.HashMap;
+import java.util.Map;
+
import org.apache.poi.hwpf.model.NotesTables;
/**
*/
public class NotesImpl implements Notes
{
+ private Map<Integer, Integer> anchorToIndexMap = null;
+
private final NotesTables notesTables;
public NotesImpl( NotesTables notesTables )
return notesTables.getDescriptor( index ).getStart();
}
+ public int getNoteIndexByAnchorPosition( int anchorPosition )
+ {
+ updateAnchorToIndexMap();
+
+ Integer index = anchorToIndexMap
+ .get( Integer.valueOf( anchorPosition ) );
+ if ( index == null )
+ return -1;
+
+ return index.intValue();
+ }
+
public int getNotesCount()
{
return notesTables.getDescriptorsCount();
{
return notesTables.getTextPosition( index ).getStart();
}
+
+ private void updateAnchorToIndexMap()
+ {
+ if ( anchorToIndexMap != null )
+ return;
+
+ Map<Integer, Integer> result = new HashMap<Integer, Integer>();
+ for ( int n = 0; n < notesTables.getDescriptorsCount(); n++ )
+ {
+ int anchorPosition = notesTables.getDescriptor( n ).getStart();
+ result.put( Integer.valueOf( anchorPosition ), Integer.valueOf( n ) );
+ }
+ this.anchorToIndexMap = result;
+ }
}
import javax.xml.transform.stream.StreamResult;
import junit.framework.TestCase;
+
import org.apache.poi.POIDataSamples;
import org.apache.poi.hwpf.HWPFDocument;
*/
public class TestWordToFoConverter extends TestCase
{
+ private static void assertContains( String result, final String substring )
+ {
+ if ( !result.contains( substring ) )
+ fail( "Substring \"" + substring
+ + "\" not found in the following string: \"" + result
+ + "\"" );
+ }
+
private static String getFoText( final String sampleFileName )
throws Exception
{
.contains( "<pdf:Keywords xmlns:pdf=\"http://ns.adobe.com/pdf/1.3/\">This is document keywords</pdf:Keywords>" ) );
}
+ public void testEndnote() throws Exception
+ {
+ String result = getFoText( "endingnote.doc" );
+
+ assertContains( result,
+ "<fo:inline baseline-shift=\"super\" font-size=\"smaller\">1</fo:inline>" );
+ assertContains( result, "Ending note text" );
+ }
+
public void testEquation() throws Exception
{
final String sampleFileName = "equation.doc";
.contains( "<!--Image link to '0.emf' can be here-->" ) );
}
- public void testInnerTable() throws Exception
+ public void testHyperlink() throws Exception
{
- final String sampleFileName = "innertable.doc";
+ final String sampleFileName = "hyperlink.doc";
String result = getFoText( sampleFileName );
assertTrue( result
- .contains( "padding-end=\"0.0in\" padding-start=\"0.0in\" width=\"1.0770833in\"" ) );
+ .contains( "<fo:basic-link external-destination=\"http://testuri.org/\">" ) );
+ assertTrue( result.contains( "Hyperlink text" ) );
}
- public void testHyperlink() throws Exception
+ public void testInnerTable() throws Exception
{
- final String sampleFileName = "hyperlink.doc";
+ final String sampleFileName = "innertable.doc";
String result = getFoText( sampleFileName );
assertTrue( result
- .contains( "<fo:basic-link external-destination=\"http://testuri.org/\">" ) );
- assertTrue( result.contains( "Hyperlink text" ) );
+ .contains( "padding-end=\"0.0in\" padding-start=\"0.0in\" width=\"1.0770833in\"" ) );
}
public void testPageref() throws Exception
{
String result = getHtmlText( "endingnote.doc" );
- assertContains( result, "<a href=\"#userref\">" );
- assertContains( result, "<a name=\"userref\">" );
- assertContains( result, "1" );
+ assertContains( result,
+ "<a class=\"endnoteanchor\" href=\"#endnote_1\" name=\"endnote_back_1\">1</a>" );
+ assertContains( result,
+ "<a href=\"#endnote_back_1\" name=\"endnote_1\">1</a>" );
+ assertContains( result, "Ending note text" );
}
public void testEquation() throws Exception