this.structure = field;
}
+ Structure( int start, int end )
+ {
+ this.start = start;
+ this.end = end;
+ this.structure = null;
+ }
+
public int compareTo( Structure o )
{
return start < o.start ? -1 : start == o.start ? 0 : 1;
private static final POILogger logger = POILogFactory
.getLogger( AbstractWordConverter.class );
+ private static final Pattern PATTERN_HYPERLINK_EXTERNAL = Pattern
+ .compile( "^[ \\t\\r\\n]*HYPERLINK \"(.*)\".*$" );
+
+ private static final Pattern PATTERN_HYPERLINK_LOCAL = Pattern
+ .compile( "^[ \\t\\r\\n]*HYPERLINK \\\\l \"(.*)\"[ ](.*)$" );
+
+ private static final Pattern PATTERN_PAGEREF = Pattern
+ .compile( "^[ \\t\\r\\n]*PAGEREF ([^ ]*)[ \\t\\r\\n]*\\\\h.*$" );
+
private static final byte SPECCHAR_AUTONUMBERED_FOOTNOTE_REFERENCE = 2;
private static final byte SPECCHAR_DRAWN_OBJECT = 8;
}
// TODO: dead fields?
+ int skipUntil = -1;
for ( int c = 0; c < range.numCharacterRuns(); c++ )
{
CharacterRun characterRun = range.getCharacterRun( c );
if ( characterRun == null )
throw new AssertionError();
+ if ( characterRun.getStartOffset() < skipUntil )
+ continue;
+ String text = characterRun.text();
+ if ( text == null || text.length() == 0
+ || text.charAt( 0 ) != FIELD_BEGIN_MARK )
+ continue;
+
Field aliveField = ( (HWPFDocument) wordDocument ).getFields()
.getFieldByStartOffset( FieldsDocumentPart.MAIN,
characterRun.getStartOffset() );
if ( aliveField != null )
{
addToStructures( structures, new Structure( aliveField ) );
+ skipUntil = aliveField.getFieldEndOffset() + 1;
+ }
+ else
+ {
+ int[] separatorEnd = tryDeadField_lookupFieldSeparatorEnd(
+ wordDocument, range, c );
+ if ( separatorEnd != null )
+ {
+ addToStructures( structures, new Structure(
+ characterRun.getStartOffset(),
+ separatorEnd[1] + 1 ) );
+ c = separatorEnd[1];
+ }
}
}
}
Element currentBlock, Range range, int currentTableLevel,
int beginMark, int separatorMark, int endMark )
{
+ if ( beginMark + 1 < separatorMark && separatorMark + 1 < endMark )
+ {
+ Range formulaRange = new Range( range.getCharacterRun(
+ beginMark + 1 ).getStartOffset(), range.getCharacterRun(
+ separatorMark - 1 ).getEndOffset(), range )
+ {
+ @Override
+ public String toString()
+ {
+ return "Dead field formula subrange: " + super.toString();
+ }
+ };
+ Range valueRange = new Range( range.getCharacterRun(
+ separatorMark + 1 ).getStartOffset(), range
+ .getCharacterRun( endMark - 1 ).getEndOffset(), range )
+ {
+ @Override
+ public String toString()
+ {
+ return "Dead field value subrange: " + super.toString();
+ }
+ };
+ String formula = formulaRange.text();
+ final Matcher matcher = PATTERN_HYPERLINK_LOCAL.matcher( formula );
+ if ( matcher.matches() )
+ {
+ String localref = matcher.group( 1 );
+ processPageref( wordDocument, currentBlock, valueRange,
+ currentTableLevel, localref );
+ return;
+ }
+ }
+
StringBuilder debug = new StringBuilder( "Unsupported field type: \n" );
for ( int i = beginMark; i <= endMark; i++ )
{
if ( firstSubrange != null )
{
String formula = firstSubrange.text();
- Pattern pagerefPattern = Pattern
- .compile( "[ \\t\\r\\n]*PAGEREF ([^ ]*)[ \\t\\r\\n]*\\\\h[ \\t\\r\\n]*" );
- Matcher matcher = pagerefPattern.matcher( formula );
+ Matcher matcher = PATTERN_PAGEREF.matcher( formula );
if ( matcher.find() )
{
String pageref = matcher.group( 1 );
if ( firstSubrange != null )
{
String formula = firstSubrange.text();
- Pattern hyperlinkPattern = Pattern
- .compile( "[ \\t\\r\\n]*HYPERLINK \"(.*)\"[ \\t\\r\\n]*" );
- Matcher matcher = hyperlinkPattern.matcher( formula );
- if ( matcher.find() )
+ Matcher matcher = PATTERN_HYPERLINK_EXTERNAL.matcher( formula );
+ if ( matcher.matches() )
{
String hyperlink = matcher.group( 1 );
processHyperlink( wordDocument, currentBlock,
currentTableLevel, hyperlink );
return;
}
+ matcher.usePattern( PATTERN_HYPERLINK_LOCAL );
+ if ( matcher.matches() )
+ {
+ String hyperlink = matcher.group( 1 );
+ Range textRange = null;
+ String text = matcher.group( 2 );
+ if ( AbstractWordUtils.isNotEmpty( text ) )
+ {
+ textRange = new Range( firstSubrange.getStartOffset()
+ + matcher.start( 2 ),
+ firstSubrange.getStartOffset()
+ + matcher.end( 2 ), firstSubrange )
+ {
+ @Override
+ public String toString()
+ {
+ return "Local hyperlink text";
+ }
+ };
+ }
+ processPageref( wordDocument, currentBlock, textRange,
+ currentTableLevel, hyperlink );
+ return;
+ }
}
break;
}
}
+ protected abstract void processImage( Element currentBlock,
+ boolean inlined, Picture picture, String url );
+
@Internal
protected abstract void processImageWithoutPicturesManager(
Element currentBlock, boolean inlined, Picture picture );
- protected abstract void processImage( Element currentBlock,
- boolean inlined, Picture picture, String url );
-
protected abstract void processLineBreak( Element block,
CharacterRun characterRun );
protected int tryDeadField( HWPFDocumentCore wordDocument, Range range,
int currentTableLevel, int beginMark, Element currentBlock )
+ {
+ int[] separatorEnd = tryDeadField_lookupFieldSeparatorEnd(
+ wordDocument, range, beginMark );
+ if ( separatorEnd == null )
+ return beginMark;
+
+ processDeadField( wordDocument, currentBlock, range, currentTableLevel,
+ beginMark, separatorEnd[0], separatorEnd[1] );
+
+ return separatorEnd[1];
+ }
+
+ private int[] tryDeadField_lookupFieldSeparatorEnd(
+ HWPFDocumentCore wordDocument, Range range, int beginMark )
{
int separatorMark = -1;
int endMark = -1;
if ( text.getBytes().length == 0 )
continue;
- if ( text.getBytes()[0] == FIELD_BEGIN_MARK )
+ final byte firstByte = text.getBytes()[0];
+ if ( firstByte == FIELD_BEGIN_MARK )
{
- // nested?
- Field possibleField = processDeadField( wordDocument, range,
- currentTableLevel, characterRun.getStartOffset(),
- currentBlock );
- if ( possibleField != null )
- {
- c = possibleField.getFieldEndOffset();
- }
- else
+ int[] nested = tryDeadField_lookupFieldSeparatorEnd(
+ wordDocument, range, c );
+ if ( nested != null )
{
- continue;
+ c = nested[1];
}
+ continue;
}
- if ( text.getBytes()[0] == FIELD_SEPARATOR_MARK )
+ if ( firstByte == FIELD_SEPARATOR_MARK )
{
if ( separatorMark != -1 )
{
- // double;
- return beginMark;
+ // double; incorrect format
+ return null;
}
separatorMark = c;
if ( endMark != -1 )
{
// double;
- return beginMark;
+ return null;
}
endMark = c;
break;
}
-
}
if ( separatorMark == -1 || endMark == -1 )
- return beginMark;
-
- processDeadField( wordDocument, currentBlock, range, currentTableLevel,
- beginMark, separatorMark, endMark );
+ return null;
- return endMark;
+ return new int[] { separatorMark, endMark };
}
}