//initCharacterProperties();
//initParagraphProperties();
}
- private void initCharacterProperties(int charOffset, PlexOfCps charPlcf, int end)
+ private void initCharacterProperties(int charOffset, PlexOfCps charPlcf, int start, int end)
{
//Initialize paragraph property stuff
//int currentCharPage = _charParsingState.getCurrentPage();
charStart = fkp.getStart(currentChpxIndex);
charEnd = fkp.getEnd(currentChpxIndex);
byte[] chpx = fkp.getGrpprl(currentChpxIndex);
- _listener.characterRun(new ChpxNode(charStart, charEnd, chpx));
+ _listener.characterRun(new ChpxNode(Math.max(charStart, start), Math.min(charEnd, end), chpx));
+
if (charEnd < end)
{
currentChpxIndex++;
}
while(currentPageIndex < charPlcfLen);
}
- private void initParagraphProperties(int parOffset, PlexOfCps parPlcf, int charOffset, PlexOfCps charPlcf, int end)
+ private void initParagraphProperties(int parOffset, PlexOfCps parPlcf, int charOffset, PlexOfCps charPlcf, int start, int end)
{
//Initialize paragraph property stuff
//int currentParPage = _parParsingState.getCurrentPage();
int parStart = fkp.getStart(currentPapxIndex);
int parEnd = fkp.getEnd(currentPapxIndex);
byte[] papx = fkp.getGrpprl(currentPapxIndex);
- _listener.paragraph(new PapxNode(parStart, parEnd, papx));
- initCharacterProperties(charOffset, charPlcf, end);
+ _listener.paragraph(new PapxNode(Math.max(parStart, start), Math.min(parEnd, end), papx));
+ initCharacterProperties(charOffset, charPlcf, Math.max(start, parStart), Math.min(parEnd, end));
if (parEnd < end)
{
currentPapxIndex++;
private void initSectionProperties()
{
+ int ccpText = _fib.getCcpText();
+ int ccpFtn = _fib.getCcpFtn();
//sections
int fcMin = _fib.getFcMin();
PlexOfCps plcfsed = new PlexOfCps(plcfsedSize, 12);
int arraySize = plcfsed.length();
+ int start = fcMin;
+ int end = fcMin + ccpText;
+ int x = 0;
+ int sectionEnd = 0;
+
+ //do the main body sections
+ while (x < arraySize)
+ {
+ int sectionStart = LittleEndian.getInt(_tableBuffer, plcfsedFC + plcfsed.getIntOffset(x)) + fcMin;
+ sectionEnd = LittleEndian.getInt(_tableBuffer, plcfsedFC + plcfsed.getIntOffset(x + 1)) + fcMin;
+ int sepxStart = LittleEndian.getInt(_tableBuffer, plcfsedFC + plcfsed.getStructOffset(x) + 2);
+ int sepxSize = LittleEndian.getShort(_mainDocument, sepxStart);
+
+ byte[] sepx = new byte[sepxSize];
+ System.arraycopy(_mainDocument, sepxStart + 2, sepx, 0, sepxSize);
+ SepxNode node = new SepxNode(x + 1, sectionStart, sectionEnd, sepx);
+ _listener.bodySection(node);
+ initParagraphProperties(parOffset, parPlcf, charOffset, charPlcf, sectionStart, Math.min(end, sectionEnd));
- for(int x = 0; x < arraySize; x++)
+ if (sectionEnd > end)
+ {
+ break;
+ }
+ else
+ {
+ x++;
+ }
+ }
+ //do the header sections
+ for (; x < arraySize; x++)// && sectionEnd <= end; x++)
{
int sectionStart = LittleEndian.getInt(_tableBuffer, plcfsedFC + plcfsed.getIntOffset(x)) + fcMin;
- int sectionEnd = LittleEndian.getInt(_tableBuffer, plcfsedFC + plcfsed.getIntOffset(x + 1)) + fcMin;
+ sectionEnd = LittleEndian.getInt(_tableBuffer, plcfsedFC + plcfsed.getIntOffset(x + 1)) + fcMin;
int sepxStart = LittleEndian.getInt(_tableBuffer, plcfsedFC + plcfsed.getStructOffset(x) + 2);
int sepxSize = LittleEndian.getShort(_mainDocument, sepxStart);
byte[] sepx = new byte[sepxSize];
System.arraycopy(_mainDocument, sepxStart + 2, sepx, 0, sepxSize);
SepxNode node = new SepxNode(x + 1, sectionStart, sectionEnd, sepx);
- _listener.section(node);
+ _listener.hdrSection(node);
+ initParagraphProperties(parOffset, parPlcf, charOffset, charPlcf, Math.max(sectionStart, end), sectionEnd);
- initParagraphProperties(parOffset, parPlcf, charOffset, charPlcf, sectionEnd);
}
+ _listener.endSections();
}
/**
* Initializes the DocumentProperties object unique to this document.
public class HDFObjectModel implements HDFLowLevelParsingListener
{
+ /** "WordDocument" from the POIFS */
+ private byte[] _mainDocument;
+
/** The DOP*/
private DocumentProperties _dop;
/**the StyleSheet*/
/** Font info */
private FontTable _fonts;
+ /** text offset in main stream */
+ int _fcMin;
+
/** text pieces */
BTreeSet _text = new BTreeSet();
/** document sections */
public HDFObjectModel()
{
}
-
+ public void mainDocument(byte[] mainDocument)
+ {
+ _mainDocument = mainDocument;
+ }
+ public void tableStream(byte[] tableStream)
+ {
+ }
+ public void miscellaneous(int fcMin, int ccpText, int ccpFtn, int fcPlcfhdd, int lcbPlcfhdd)
+ {
+ _fcMin = fcMin;
+ }
public void document(DocumentProperties dop)
{
_dop = dop;
}
- public void section(SepxNode sepx)
+ public void bodySection(SepxNode sepx)
{
_sections.add(sepx);
}
+ public void hdrSection(SepxNode sepx)
+ {
+ _sections.add(sepx);
+ }
+ public void endSections()
+ {
+ }
public void paragraph(PapxNode papx)
{
_paragraphs.add(papx);