_cft = new ComplexFileTable(_mainStream, _tableStream, _fib.getFcClx(), fcMin);
_tpt = _cft.getTextPieceTable();
- // Word XP and later all put in a zero filled buffer in
- // front of the text. This screws up the system for offsets,
- // which assume we always start at zero. This is an adjustment.
- int cpMin = _tpt.getCpMin();
-
// Now load the rest of the properties, which need to be adjusted
// for where text really begin
_cbt = new CHPBinTable(_mainStream, _tableStream, _fib.getFcPlcfbteChpx(), _fib.getLcbPlcfbteChpx(), _tpt, true);
- _pbt = new PAPBinTable(_mainStream, _tableStream, _dataStream, _fib.getFcPlcfbtePapx(), _fib.getLcbPlcfbtePapx(), cpMin, _tpt, true);
+ _pbt = new PAPBinTable(_mainStream, _tableStream, _dataStream, _fib.getFcPlcfbtePapx(), _fib.getLcbPlcfbtePapx(), _tpt, true);
// Read FSPA and Escher information
_fspa = new FSPATable(_tableStream, _fib.getFcPlcspaMom(), _fib.getLcbPlcspaMom(), getTextTable().getTextPieces());
html.appendChild( head );
html.appendChild( body );
+
+ body.setAttribute( "style", "white-space-collapsing: preserve; " );
}
public void addAuthor( String value )
style.append( "break-before: page; " );
}
- style.append( "hyphenate: " + paragraph.isAutoHyphenated() + "; " );
+ style.append( "hyphenate: "
+ + ( paragraph.isAutoHyphenated() ? "auto" : "none" ) + "; " );
if ( paragraph.keepOnPage() )
{
{
style.append( "keep-with-next.within-page: always; " );
}
-
- style.append( "linefeed-treatment: preserve; " );
- style.append( "white-space-collapse: false; " );
}
public static void addTableCellProperties( TableRow tableRow,
import java.util.List;
import org.apache.poi.util.LittleEndian;
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
/**
* Represents a CHP fkp. The style properties for paragraph and character runs
*/
public final class CHPFormattedDiskPage extends FormattedDiskPage
{
+ private static final POILogger logger = POILogFactory
+ .getLogger( CHPFormattedDiskPage.class );
+
private static final int FC_SIZE = 4;
private ArrayList<CHPX> _chpxList = new ArrayList<CHPX>();
int startAt = getStart(x);
int endAt = getEnd(x);
- if (ignoreChpxWithoutTextPieces && !tpt.isIndexInTable( startAt, endAt ) ) {
- _chpxList.add(null);
- } else {
+ if (!ignoreChpxWithoutTextPieces || tpt.isIndexInTable( startAt, endAt ) )
+ {
_chpxList.add(new CHPX(startAt, endAt, tpt, getGrpprl(x)));
}
+ else
+ {
+ logger.log( POILogger.WARN, "CHPX [",
+ Integer.valueOf( startAt ), "; ",
+ Integer.valueOf( endAt ),
+ ") (bytes) doesn't have corresponding text pieces "
+ + "and will be skipped" );
+
+ _chpxList.add(null);
+ }
}
}
import org.apache.poi.poifs.common.POIFSConstants;
import org.apache.poi.util.LittleEndian;
-import org.apache.poi.util.POILogFactory;
-import org.apache.poi.util.POILogger;
/**
* This class holds all of the character formatting
*/
public final class OldCHPBinTable extends CHPBinTable
{
- private static final POILogger logger = POILogFactory
- .getLogger( OldCHPBinTable.class );
-
/**
* Constructor used to read an old-style binTable
* in from a Word document.
for (int y = 0; y < fkpSize; y++)
{
CHPX chpx = cfkp.getCHPX(y);
- if (chpx != null && tpt.isIndexInTable( chpx.getStartBytes(), chpx.getEndBytes() )) {
+ if (chpx != null)
_textRuns.add(chpx);
- } else {
- if ( chpx != null )
- logger.log( POILogger.WARN, "CHPX [",
- chpx.getStartBytes(), "; ", chpx.getEndBytes(),
- ") (bytes) doesn't have corresponding text pieces "
- + "and will be skipped" );
- }
}
}
Collections.sort( _textRuns, PropertyNode.StartComparator.instance );
import org.apache.poi.poifs.common.POIFSConstants;
import org.apache.poi.util.LittleEndian;
-import org.apache.poi.util.POILogFactory;
-import org.apache.poi.util.POILogger;
/**
* This class holds all of the paragraph formatting
*/
public final class OldPAPBinTable extends PAPBinTable
{
- private static final POILogger logger = POILogFactory
- .getLogger( OldPAPBinTable.class );
public OldPAPBinTable(byte[] documentStream, int offset,
int size, int fcMin, TextPieceTable tpt)
int pageOffset = POIFSConstants.SMALLER_BIG_BLOCK_SIZE * pageNum;
PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(documentStream,
- documentStream, pageOffset, fcMin, tpt);
+ documentStream, pageOffset, tpt, true);
int fkpSize = pfkp.size();
for (int y = 0; y < fkpSize; y++)
{
PAPX papx = pfkp.getPAPX(y);
- if (papx != null && tpt.isIndexInTable( papx.getStartBytes(), papx.getEndBytes() )) {
+ if (papx != null) {
_paragraphs.add(papx);
- } else {
- if ( papx != null )
- logger.log( POILogger.WARN, "PAPX [",
- papx.getStartBytes(), "; ", papx.getEndBytes(),
- ") (bytes) doesn't have corresponding text pieces "
- + "and will be skipped" );
}
}
}
byte[] dataStream, int offset, int size, int fcMin,
TextPieceTable tpt )
{
- this( documentStream, tableStream, dataStream, offset, size, fcMin,
- tpt, true );
+ this( documentStream, tableStream, dataStream, offset, size, tpt, true );
}
public PAPBinTable( byte[] documentStream, byte[] tableStream,
- byte[] dataStream, int offset, int size, int fcMin,
- TextPieceTable tpt, boolean ignorePapxWithoutTextPieces )
+ byte[] dataStream, int offset, int size, TextPieceTable tpt,
+ boolean ignorePapxWithoutTextPieces )
{
PlexOfCps binTable = new PlexOfCps(tableStream, offset, size, 4);
this.tpt = tpt;
int pageOffset = POIFSConstants.SMALLER_BIG_BLOCK_SIZE * pageNum;
PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(documentStream,
- dataStream, pageOffset, fcMin, tpt);
+ dataStream, pageOffset, tpt, ignorePapxWithoutTextPieces);
int fkpSize = pfkp.size();
{
PAPX papx = pfkp.getPAPX(y);
- //we don't need PAPX if they are references nowhere
- if (!ignorePapxWithoutTextPieces || tpt.isIndexInTable( papx.getStartBytes(), papx.getEndBytes() ))
+ if (papx != null)
_paragraphs.add(papx);
}
}
import java.util.List;
import org.apache.poi.util.LittleEndian;
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
/**
* Represents a PAP FKP. The style properties for paragraph and character runs
* @author Ryan Ackley
*/
public final class PAPFormattedDiskPage extends FormattedDiskPage {
+ private static final POILogger logger = POILogFactory
+ .getLogger( PAPFormattedDiskPage.class );
private static final int BX_SIZE = 13;
private static final int FC_SIZE = 4;
/**
* Creates a PAPFormattedDiskPage from a 512 byte array
+ *
+ * @deprecated Use
+ * {@link #PAPFormattedDiskPage(byte[],byte[],int,int,TextPieceTable,boolean)}
+ * instead
*/
- public PAPFormattedDiskPage(byte[] documentStream, byte[] dataStream, int offset, int fcMin, TextPieceTable tpt)
+ public PAPFormattedDiskPage( byte[] documentStream, byte[] dataStream,
+ int offset, int fcMin, TextPieceTable tpt )
{
- super(documentStream, offset);
- for (int x = 0; x < _crun; x++) {
- int startAt = getStart(x);
- int endAt = getEnd(x);
- _papxList.add(new PAPX(startAt, endAt, tpt, getGrpprl(x), getParagraphHeight(x), dataStream));
- }
- _fkp = null;
- _dataStream = dataStream;
+ this( documentStream, dataStream, offset, tpt, true );
+ }
+
+ /**
+ * Creates a PAPFormattedDiskPage from a 512 byte array
+ */
+ public PAPFormattedDiskPage( byte[] documentStream, byte[] dataStream,
+ int offset, TextPieceTable tpt, boolean ignorePapxWithoutTextPieces )
+ {
+ super( documentStream, offset );
+ for ( int x = 0; x < _crun; x++ )
+ {
+ int startAt = getStart( x );
+ int endAt = getEnd( x );
+ if ( !ignorePapxWithoutTextPieces
+ || tpt.isIndexInTable( startAt, endAt ) )
+ _papxList.add( new PAPX( startAt, endAt, tpt, getGrpprl( x ),
+ getParagraphHeight( x ), dataStream ) );
+ else
+ {
+ logger.log( POILogger.WARN, "PAPX [",
+ Integer.valueOf( startAt ), "; ",
+ Integer.valueOf( endAt ),
+ ") (bytes) doesn't have corresponding text pieces "
+ + "and will be skipped" );
+
+ _papxList.add( null );
+ }
+ }
+ _fkp = null;
+ _dataStream = dataStream;
}
/**
import javax.xml.transform.stream.StreamResult;
import junit.framework.TestCase;
+
import org.apache.poi.POIDataSamples;
import org.apache.poi.hwpf.HWPFDocument;
assertTrue( result.substring( 0, 2000 ).contains( "<table>" ) );
}
+ public void testBug33519() throws Exception
+ {
+ String result = getHtmlText( "Bug33519.doc" );
+ assertTrue( result.contains( "Планински турове" ) );
+ assertTrue( result.contains( "Явор Асенов" ) );
+ }
+
public void testBug46610_2() throws Exception
{
String result = getHtmlText( "Bug46610_2.doc" );
import java.util.ArrayList;
import junit.framework.TestCase;
+
import org.apache.poi.hwpf.HWPFDocFixture;
import org.apache.poi.hwpf.model.io.HWPFFileSystem;
FileInformationBlock fib = _hWPFDocFixture._fib;
byte[] mainStream = _hWPFDocFixture._mainStream;
byte[] tableStream = _hWPFDocFixture._tableStream;
- int fcMin = fib.getFcMin();
- _pAPBinTable = new PAPBinTable(mainStream, tableStream, null, fib.getFcPlcfbtePapx(), fib.getLcbPlcfbtePapx(), fcMin, fakeTPT, false);
+ _pAPBinTable = new PAPBinTable(mainStream, tableStream, null, fib.getFcPlcfbtePapx(), fib.getLcbPlcfbtePapx(), fakeTPT, false);
HWPFFileSystem fileSys = new HWPFFileSystem();
byte[] newTableStream = tableOut.toByteArray();
byte[] newMainStream = mainOut.toByteArray();
- PAPBinTable newBinTable = new PAPBinTable(newMainStream, newTableStream, null,0, newTableStream.length, 0, fakeTPT, false);
+ PAPBinTable newBinTable = new PAPBinTable(newMainStream, newTableStream, null,0, newTableStream.length, fakeTPT, false);
ArrayList oldTextRuns = _pAPBinTable.getParagraphs();
ArrayList newTextRuns = newBinTable.getParagraphs();