git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1145075 13f79535-47bb-0310-9956-ffa450edef68tags/REL_3_8_BETA4
@@ -214,15 +214,10 @@ public final class HWPFDocument extends HWPFDocumentCore | |||
_cft = new ComplexFileTable(_mainStream, _tableStream, _fib.getFcClx(), fcMin); | |||
_tpt = _cft.getTextPieceTable(); | |||
// Word XP and later all put in a zero filled buffer in | |||
// front of the text. This screws up the system for offsets, | |||
// which assume we always start at zero. This is an adjustment. | |||
int cpMin = _tpt.getCpMin(); | |||
// Now load the rest of the properties, which need to be adjusted | |||
// for where text really begin | |||
_cbt = new CHPBinTable(_mainStream, _tableStream, _fib.getFcPlcfbteChpx(), _fib.getLcbPlcfbteChpx(), _tpt, true); | |||
_pbt = new PAPBinTable(_mainStream, _tableStream, _dataStream, _fib.getFcPlcfbtePapx(), _fib.getLcbPlcfbtePapx(), cpMin, _tpt, true); | |||
_pbt = new PAPBinTable(_mainStream, _tableStream, _dataStream, _fib.getFcPlcfbtePapx(), _fib.getLcbPlcfbtePapx(), _tpt, true); | |||
// Read FSPA and Escher information | |||
_fspa = new FSPATable(_tableStream, _fib.getFcPlcspaMom(), _fib.getLcbPlcspaMom(), getTextTable().getTextPieces()); |
@@ -43,6 +43,8 @@ public class HtmlDocumentFacade | |||
html.appendChild( head ); | |||
html.appendChild( body ); | |||
body.setAttribute( "style", "white-space-collapsing: preserve; " ); | |||
} | |||
public void addAuthor( String value ) |
@@ -172,7 +172,8 @@ public class WordToHtmlUtils extends AbstractWordUtils | |||
style.append( "break-before: page; " ); | |||
} | |||
style.append( "hyphenate: " + paragraph.isAutoHyphenated() + "; " ); | |||
style.append( "hyphenate: " | |||
+ ( paragraph.isAutoHyphenated() ? "auto" : "none" ) + "; " ); | |||
if ( paragraph.keepOnPage() ) | |||
{ | |||
@@ -183,9 +184,6 @@ public class WordToHtmlUtils extends AbstractWordUtils | |||
{ | |||
style.append( "keep-with-next.within-page: always; " ); | |||
} | |||
style.append( "linefeed-treatment: preserve; " ); | |||
style.append( "white-space-collapse: false; " ); | |||
} | |||
public static void addTableCellProperties( TableRow tableRow, |
@@ -21,6 +21,8 @@ import java.util.ArrayList; | |||
import java.util.List; | |||
import org.apache.poi.util.LittleEndian; | |||
import org.apache.poi.util.POILogFactory; | |||
import org.apache.poi.util.POILogger; | |||
/** | |||
* Represents a CHP fkp. The style properties for paragraph and character runs | |||
@@ -40,6 +42,9 @@ import org.apache.poi.util.LittleEndian; | |||
*/ | |||
public final class CHPFormattedDiskPage extends FormattedDiskPage | |||
{ | |||
private static final POILogger logger = POILogFactory | |||
.getLogger( CHPFormattedDiskPage.class ); | |||
private static final int FC_SIZE = 4; | |||
private ArrayList<CHPX> _chpxList = new ArrayList<CHPX>(); | |||
@@ -79,11 +84,20 @@ public final class CHPFormattedDiskPage extends FormattedDiskPage | |||
int startAt = getStart(x); | |||
int endAt = getEnd(x); | |||
if (ignoreChpxWithoutTextPieces && !tpt.isIndexInTable( startAt, endAt ) ) { | |||
_chpxList.add(null); | |||
} else { | |||
if (!ignoreChpxWithoutTextPieces || tpt.isIndexInTable( startAt, endAt ) ) | |||
{ | |||
_chpxList.add(new CHPX(startAt, endAt, tpt, getGrpprl(x))); | |||
} | |||
else | |||
{ | |||
logger.log( POILogger.WARN, "CHPX [", | |||
Integer.valueOf( startAt ), "; ", | |||
Integer.valueOf( endAt ), | |||
") (bytes) doesn't have corresponding text pieces " | |||
+ "and will be skipped" ); | |||
_chpxList.add(null); | |||
} | |||
} | |||
} | |||
@@ -21,8 +21,6 @@ import java.util.Collections; | |||
import org.apache.poi.poifs.common.POIFSConstants; | |||
import org.apache.poi.util.LittleEndian; | |||
import org.apache.poi.util.POILogFactory; | |||
import org.apache.poi.util.POILogger; | |||
/** | |||
* This class holds all of the character formatting | |||
@@ -34,9 +32,6 @@ import org.apache.poi.util.POILogger; | |||
*/ | |||
public final class OldCHPBinTable extends CHPBinTable | |||
{ | |||
private static final POILogger logger = POILogFactory | |||
.getLogger( OldCHPBinTable.class ); | |||
/** | |||
* Constructor used to read an old-style binTable | |||
* in from a Word document. | |||
@@ -67,15 +62,8 @@ public final class OldCHPBinTable extends CHPBinTable | |||
for (int y = 0; y < fkpSize; y++) | |||
{ | |||
CHPX chpx = cfkp.getCHPX(y); | |||
if (chpx != null && tpt.isIndexInTable( chpx.getStartBytes(), chpx.getEndBytes() )) { | |||
if (chpx != null) | |||
_textRuns.add(chpx); | |||
} else { | |||
if ( chpx != null ) | |||
logger.log( POILogger.WARN, "CHPX [", | |||
chpx.getStartBytes(), "; ", chpx.getEndBytes(), | |||
") (bytes) doesn't have corresponding text pieces " | |||
+ "and will be skipped" ); | |||
} | |||
} | |||
} | |||
Collections.sort( _textRuns, PropertyNode.StartComparator.instance ); |
@@ -21,8 +21,6 @@ import java.util.Collections; | |||
import org.apache.poi.poifs.common.POIFSConstants; | |||
import org.apache.poi.util.LittleEndian; | |||
import org.apache.poi.util.POILogFactory; | |||
import org.apache.poi.util.POILogger; | |||
/** | |||
* This class holds all of the paragraph formatting | |||
@@ -34,8 +32,6 @@ import org.apache.poi.util.POILogger; | |||
*/ | |||
public final class OldPAPBinTable extends PAPBinTable | |||
{ | |||
private static final POILogger logger = POILogFactory | |||
.getLogger( OldPAPBinTable.class ); | |||
public OldPAPBinTable(byte[] documentStream, int offset, | |||
int size, int fcMin, TextPieceTable tpt) | |||
@@ -51,21 +47,15 @@ public final class OldPAPBinTable extends PAPBinTable | |||
int pageOffset = POIFSConstants.SMALLER_BIG_BLOCK_SIZE * pageNum; | |||
PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(documentStream, | |||
documentStream, pageOffset, fcMin, tpt); | |||
documentStream, pageOffset, tpt, true); | |||
int fkpSize = pfkp.size(); | |||
for (int y = 0; y < fkpSize; y++) | |||
{ | |||
PAPX papx = pfkp.getPAPX(y); | |||
if (papx != null && tpt.isIndexInTable( papx.getStartBytes(), papx.getEndBytes() )) { | |||
if (papx != null) { | |||
_paragraphs.add(papx); | |||
} else { | |||
if ( papx != null ) | |||
logger.log( POILogger.WARN, "PAPX [", | |||
papx.getStartBytes(), "; ", papx.getEndBytes(), | |||
") (bytes) doesn't have corresponding text pieces " | |||
+ "and will be skipped" ); | |||
} | |||
} | |||
} |
@@ -56,13 +56,12 @@ public class PAPBinTable | |||
byte[] dataStream, int offset, int size, int fcMin, | |||
TextPieceTable tpt ) | |||
{ | |||
this( documentStream, tableStream, dataStream, offset, size, fcMin, | |||
tpt, true ); | |||
this( documentStream, tableStream, dataStream, offset, size, tpt, true ); | |||
} | |||
public PAPBinTable( byte[] documentStream, byte[] tableStream, | |||
byte[] dataStream, int offset, int size, int fcMin, | |||
TextPieceTable tpt, boolean ignorePapxWithoutTextPieces ) | |||
byte[] dataStream, int offset, int size, TextPieceTable tpt, | |||
boolean ignorePapxWithoutTextPieces ) | |||
{ | |||
PlexOfCps binTable = new PlexOfCps(tableStream, offset, size, 4); | |||
this.tpt = tpt; | |||
@@ -76,7 +75,7 @@ public class PAPBinTable | |||
int pageOffset = POIFSConstants.SMALLER_BIG_BLOCK_SIZE * pageNum; | |||
PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(documentStream, | |||
dataStream, pageOffset, fcMin, tpt); | |||
dataStream, pageOffset, tpt, ignorePapxWithoutTextPieces); | |||
int fkpSize = pfkp.size(); | |||
@@ -84,8 +83,7 @@ public class PAPBinTable | |||
{ | |||
PAPX papx = pfkp.getPAPX(y); | |||
//we don't need PAPX if they are references nowhere | |||
if (!ignorePapxWithoutTextPieces || tpt.isIndexInTable( papx.getStartBytes(), papx.getEndBytes() )) | |||
if (papx != null) | |||
_paragraphs.add(papx); | |||
} | |||
} |
@@ -22,6 +22,8 @@ import java.util.Arrays; | |||
import java.util.List; | |||
import org.apache.poi.util.LittleEndian; | |||
import org.apache.poi.util.POILogFactory; | |||
import org.apache.poi.util.POILogger; | |||
/** | |||
* Represents a PAP FKP. The style properties for paragraph and character runs | |||
@@ -40,6 +42,8 @@ import org.apache.poi.util.LittleEndian; | |||
* @author Ryan Ackley | |||
*/ | |||
public final class PAPFormattedDiskPage extends FormattedDiskPage { | |||
private static final POILogger logger = POILogFactory | |||
.getLogger( PAPFormattedDiskPage.class ); | |||
private static final int BX_SIZE = 13; | |||
private static final int FC_SIZE = 4; | |||
@@ -56,17 +60,45 @@ public final class PAPFormattedDiskPage extends FormattedDiskPage { | |||
/** | |||
* Creates a PAPFormattedDiskPage from a 512 byte array | |||
* | |||
* @deprecated Use | |||
* {@link #PAPFormattedDiskPage(byte[],byte[],int,int,TextPieceTable,boolean)} | |||
* instead | |||
*/ | |||
public PAPFormattedDiskPage(byte[] documentStream, byte[] dataStream, int offset, int fcMin, TextPieceTable tpt) | |||
public PAPFormattedDiskPage( byte[] documentStream, byte[] dataStream, | |||
int offset, int fcMin, TextPieceTable tpt ) | |||
{ | |||
super(documentStream, offset); | |||
for (int x = 0; x < _crun; x++) { | |||
int startAt = getStart(x); | |||
int endAt = getEnd(x); | |||
_papxList.add(new PAPX(startAt, endAt, tpt, getGrpprl(x), getParagraphHeight(x), dataStream)); | |||
} | |||
_fkp = null; | |||
_dataStream = dataStream; | |||
this( documentStream, dataStream, offset, tpt, true ); | |||
} | |||
/** | |||
* Creates a PAPFormattedDiskPage from a 512 byte array | |||
*/ | |||
public PAPFormattedDiskPage( byte[] documentStream, byte[] dataStream, | |||
int offset, TextPieceTable tpt, boolean ignorePapxWithoutTextPieces ) | |||
{ | |||
super( documentStream, offset ); | |||
for ( int x = 0; x < _crun; x++ ) | |||
{ | |||
int startAt = getStart( x ); | |||
int endAt = getEnd( x ); | |||
if ( !ignorePapxWithoutTextPieces | |||
|| tpt.isIndexInTable( startAt, endAt ) ) | |||
_papxList.add( new PAPX( startAt, endAt, tpt, getGrpprl( x ), | |||
getParagraphHeight( x ), dataStream ) ); | |||
else | |||
{ | |||
logger.log( POILogger.WARN, "PAPX [", | |||
Integer.valueOf( startAt ), "; ", | |||
Integer.valueOf( endAt ), | |||
") (bytes) doesn't have corresponding text pieces " | |||
+ "and will be skipped" ); | |||
_papxList.add( null ); | |||
} | |||
} | |||
_fkp = null; | |||
_dataStream = dataStream; | |||
} | |||
/** |
@@ -26,6 +26,7 @@ import javax.xml.transform.dom.DOMSource; | |||
import javax.xml.transform.stream.StreamResult; | |||
import junit.framework.TestCase; | |||
import org.apache.poi.POIDataSamples; | |||
import org.apache.poi.hwpf.HWPFDocument; | |||
@@ -68,6 +69,13 @@ public class TestWordToHtmlConverter extends TestCase | |||
assertTrue( result.substring( 0, 2000 ).contains( "<table>" ) ); | |||
} | |||
public void testBug33519() throws Exception | |||
{ | |||
String result = getHtmlText( "Bug33519.doc" ); | |||
assertTrue( result.contains( "Планински турове" ) ); | |||
assertTrue( result.contains( "Явор Асенов" ) ); | |||
} | |||
public void testBug46610_2() throws Exception | |||
{ | |||
String result = getHtmlText( "Bug46610_2.doc" ); |
@@ -21,6 +21,7 @@ import java.io.ByteArrayOutputStream; | |||
import java.util.ArrayList; | |||
import junit.framework.TestCase; | |||
import org.apache.poi.hwpf.HWPFDocFixture; | |||
import org.apache.poi.hwpf.model.io.HWPFFileSystem; | |||
@@ -38,9 +39,8 @@ public final class TestPAPBinTable | |||
FileInformationBlock fib = _hWPFDocFixture._fib; | |||
byte[] mainStream = _hWPFDocFixture._mainStream; | |||
byte[] tableStream = _hWPFDocFixture._tableStream; | |||
int fcMin = fib.getFcMin(); | |||
_pAPBinTable = new PAPBinTable(mainStream, tableStream, null, fib.getFcPlcfbtePapx(), fib.getLcbPlcfbtePapx(), fcMin, fakeTPT, false); | |||
_pAPBinTable = new PAPBinTable(mainStream, tableStream, null, fib.getFcPlcfbtePapx(), fib.getLcbPlcfbtePapx(), fakeTPT, false); | |||
HWPFFileSystem fileSys = new HWPFFileSystem(); | |||
@@ -51,7 +51,7 @@ public final class TestPAPBinTable | |||
byte[] newTableStream = tableOut.toByteArray(); | |||
byte[] newMainStream = mainOut.toByteArray(); | |||
PAPBinTable newBinTable = new PAPBinTable(newMainStream, newTableStream, null,0, newTableStream.length, 0, fakeTPT, false); | |||
PAPBinTable newBinTable = new PAPBinTable(newMainStream, newTableStream, null,0, newTableStream.length, fakeTPT, false); | |||
ArrayList oldTextRuns = _pAPBinTable.getParagraphs(); | |||
ArrayList newTextRuns = newBinTable.getParagraphs(); |