]> source.dussan.org Git - poi.git/commitdiff
simplify work with fields
authorSergey Vladimirov <sergey@apache.org>
Fri, 8 Jul 2011 14:33:01 +0000 (14:33 +0000)
committerSergey Vladimirov <sergey@apache.org>
Fri, 8 Jul 2011 14:33:01 +0000 (14:33 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1144337 13f79535-47bb-0310-9956-ffa450edef68

src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordConverter.java
src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordUtils.java
src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoConverter.java
src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java

index f60514c5d7cfa39845e94eafecbc5ee03804cf65..eb0198758cf2acd2121a68f02ad0bae34637f771 100644 (file)
 ==================================================================== */
 package org.apache.poi.hwpf.converter;
 
-import java.util.List;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
+import org.apache.poi.ss.formula.functions.Match;
+
 import org.apache.poi.hpsf.SummaryInformation;
 import org.apache.poi.hwpf.HWPFDocument;
 import org.apache.poi.hwpf.HWPFDocumentCore;
+import org.apache.poi.hwpf.model.Field;
+import org.apache.poi.hwpf.model.FieldsTables;
 import org.apache.poi.hwpf.model.ListFormatOverride;
 import org.apache.poi.hwpf.model.ListTables;
 import org.apache.poi.hwpf.usermodel.CharacterRun;
@@ -55,14 +58,16 @@ public abstract class AbstractWordConverter
             CharacterRun characterRun, String text );
 
     protected boolean processCharacters( HWPFDocumentCore hwpfDocument,
-            int currentTableLevel, Paragraph paragraph, final Element block,
-            List<CharacterRun> characterRuns, final int start, final int end )
+            int currentTableLevel, Range range, final Element block )
     {
+        if (range == null)
+            return false;
+
         boolean haveAnyText = false;
 
-        for ( int c = start; c < end; c++ )
+        for ( int c = 0; c < range.numCharacterRuns(); c++ )
         {
-            CharacterRun characterRun = characterRuns.get( c );
+            CharacterRun characterRun = range.getCharacterRun( c );
 
             if ( characterRun == null )
                 throw new AssertionError();
@@ -86,8 +91,23 @@ public abstract class AbstractWordConverter
 
             if ( text.getBytes()[0] == FIELD_BEGIN_MARK )
             {
-                int skipTo = tryField( hwpfDocument, paragraph,
-                        currentTableLevel, characterRuns, c, block );
+                if ( hwpfDocument instanceof HWPFDocument )
+                {
+                    Field aliveField = ( (HWPFDocument) hwpfDocument )
+                            .getFieldsTables().lookupFieldByStartOffset(
+                                    FieldsTables.PLCFFLDMOM,
+                                    characterRun.getStartOffset() );
+                    if ( aliveField != null )
+                    {
+                        processField( ( (HWPFDocument) hwpfDocument ), range,
+                                currentTableLevel, aliveField, block );
+                        c = aliveField.getEndOffset();
+                        continue;
+                    }
+                }
+
+                int skipTo = tryDeadField( hwpfDocument, range,
+                        currentTableLevel, c, block );
 
                 if ( skipTo != c )
                 {
@@ -145,91 +165,48 @@ public abstract class AbstractWordConverter
     protected abstract void processDocumentInformation(
             SummaryInformation summaryInformation );
 
-    protected void processField( HWPFDocumentCore wordDocument,
-            Element currentBlock, Paragraph paragraph, int currentTableLevel,
-            List<CharacterRun> characterRuns, int beginMark, int separatorMark,
-            int endMark )
+    protected void processDeadField( HWPFDocumentCore wordDocument,
+            Element currentBlock, Range range, int currentTableLevel,
+            int beginMark, int separatorMark, int endMark )
     {
-
-        Pattern hyperlinkPattern = Pattern
-                .compile( "[ \\t\\r\\n]*HYPERLINK \"(.*)\"[ \\t\\r\\n]*" );
-        Pattern pagerefPattern = Pattern
-                .compile( "[ \\t\\r\\n]*PAGEREF ([^ ]*)[ \\t\\r\\n]*\\\\h[ \\t\\r\\n]*" );
-
-        if ( separatorMark - beginMark > 1 )
-        {
-            int index = beginMark + 1;
-            CharacterRun firstAfterBegin = null;
-            while ( index < separatorMark )
-            {
-                firstAfterBegin = paragraph.getCharacterRun( index );
-                if ( firstAfterBegin == null )
-                {
-                    logger.log( POILogger.WARN,
-                            "Paragraph " + paragraph.getStartOffset() + "--"
-                                    + paragraph.getEndOffset()
-                                    + " contains null CharacterRun #" + index );
-                    index++;
-                    continue;
-                }
-                break;
-            }
-
-            if ( firstAfterBegin != null )
-            {
-                final Matcher hyperlinkMatcher = hyperlinkPattern
-                        .matcher( firstAfterBegin.text() );
-                if ( hyperlinkMatcher.matches() )
-                {
-                    String hyperlink = hyperlinkMatcher.group( 1 );
-                    processHyperlink( wordDocument, currentBlock, paragraph,
-                            characterRuns, currentTableLevel, hyperlink,
-                            separatorMark + 1, endMark );
-                    return;
-                }
-
-                final Matcher pagerefMatcher = pagerefPattern
-                        .matcher( firstAfterBegin.text() );
-                if ( pagerefMatcher.matches() )
-                {
-                    String pageref = pagerefMatcher.group( 1 );
-                    processPageref( wordDocument, currentBlock, paragraph,
-                            characterRuns, currentTableLevel, pageref,
-                            separatorMark + 1, endMark );
-                    return;
-                }
-            }
-        }
-
         StringBuilder debug = new StringBuilder( "Unsupported field type: \n" );
         for ( int i = beginMark; i <= endMark; i++ )
         {
             debug.append( "\t" );
-            debug.append( paragraph.getCharacterRun( i ) );
+            debug.append( range.getCharacterRun( i ) );
             debug.append( "\n" );
         }
         logger.log( POILogger.WARN, debug );
 
+        Range deadFieldValueSubrage = new Range( range.getCharacterRun(
+                separatorMark ).getStartOffset() + 1, range.getCharacterRun(
+                endMark ).getStartOffset(), range )
+        {
+            @Override
+            public String toString()
+            {
+                return "DeadFieldValueSubrange (" + super.toString() + ")";
+            }
+        };
+
         // just output field value
         if ( separatorMark + 1 < endMark )
-            processCharacters( wordDocument, currentTableLevel, paragraph,
-                    currentBlock, characterRuns, separatorMark + 1, endMark );
+            processCharacters( wordDocument, currentTableLevel,
+                    deadFieldValueSubrage, currentBlock );
 
         return;
     }
 
     protected abstract void processHyperlink( HWPFDocumentCore wordDocument,
-            Element currentBlock, Paragraph paragraph,
-            List<CharacterRun> characterRuns, int currentTableLevel,
-            String hyperlink, int i, int endMark );
+            Element currentBlock, Range textRange, int currentTableLevel,
+            String hyperlink );
 
     protected abstract void processImage( Element currentBlock,
             boolean inlined, Picture picture );
 
     protected abstract void processPageref( HWPFDocumentCore wordDocument,
-            Element currentBlock, Paragraph paragraph,
-            List<CharacterRun> characterRuns, int currentTableLevel,
-            String pageref, int beginTextInclusive, int endTextExclusive );
+            Element currentBlock, Range textRange, int currentTableLevel,
+            String pageref );
 
     protected abstract void processParagraph( HWPFDocumentCore wordDocument,
             Element parentFopElement, int currentTableLevel,
@@ -317,20 +294,107 @@ public abstract class AbstractWordConverter
     protected abstract void processTable( HWPFDocumentCore wordDocument,
             Element flow, Table table );
 
-    protected int tryField( HWPFDocumentCore wordDocument, Paragraph paragraph,
-            int currentTableLevel, List<CharacterRun> characterRuns,
-            int beginMark, Element currentBlock )
+    protected Field processField( HWPFDocumentCore wordDocument,
+            Range charactersRange, int currentTableLevel, int startOffset,
+            Element currentBlock )
+    {
+        if ( !( wordDocument instanceof HWPFDocument ) )
+            return null;
+
+        HWPFDocument hwpfDocument = (HWPFDocument) wordDocument;
+        Field field = hwpfDocument.getFieldsTables().lookupFieldByStartOffset(
+                FieldsTables.PLCFFLDMOM, startOffset );
+        if ( field == null )
+            return null;
+
+        processField( hwpfDocument, charactersRange, currentTableLevel, field,
+                currentBlock );
+
+        return field;
+    }
+
+    protected void processField( HWPFDocument hwpfDocument, Range parentRange,
+            int currentTableLevel, Field field, Element currentBlock )
+    {
+        switch ( field.getType() )
+        {
+        case 37: // page reference
+        {
+            final Range firstSubrange = field.firstSubrange( parentRange );
+            if ( firstSubrange != null )
+            {
+                String formula = firstSubrange.text();
+                Pattern pagerefPattern = Pattern
+                        .compile( "[ \\t\\r\\n]*PAGEREF ([^ ]*)[ \\t\\r\\n]*\\\\h[ \\t\\r\\n]*" );
+                Matcher matcher = pagerefPattern.matcher( formula );
+                if ( matcher.find() )
+                {
+                    String pageref = matcher.group( 1 );
+                    processPageref( hwpfDocument, currentBlock,
+                            field.secondSubrange( parentRange ),
+                            currentTableLevel, pageref );
+                    return;
+                }
+            }
+            break;
+        }
+        case 88: // hyperlink
+        {
+            final Range firstSubrange = field.firstSubrange( parentRange );
+            if ( firstSubrange != null )
+            {
+                String formula = firstSubrange.text();
+                Pattern hyperlinkPattern = Pattern
+                        .compile( "[ \\t\\r\\n]*HYPERLINK \"(.*)\"[ \\t\\r\\n]*" );
+                Matcher matcher = hyperlinkPattern.matcher( formula );
+                if ( matcher.find() )
+                {
+                    String hyperlink = matcher.group( 1 );
+                    processHyperlink( hwpfDocument, currentBlock,
+                            field.secondSubrange( parentRange ),
+                            currentTableLevel, hyperlink );
+                    return;
+                }
+            }
+            break;
+        }
+        }
+
+        logger.log( POILogger.WARN, parentRange + " contains " + field
+                + " with unsupported type or format" );
+        processCharacters( hwpfDocument, currentTableLevel,
+                field.secondSubrange( parentRange ), currentBlock );
+    }
+
+    protected int tryDeadField( HWPFDocumentCore wordDocument, Range range,
+            int currentTableLevel, int beginMark, Element currentBlock )
     {
         int separatorMark = -1;
         int endMark = -1;
-        for ( int c = beginMark + 1; c < paragraph.numCharacterRuns(); c++ )
+        for ( int c = beginMark + 1; c < range.numCharacterRuns(); c++ )
         {
-            CharacterRun characterRun = paragraph.getCharacterRun( c );
+            CharacterRun characterRun = range.getCharacterRun( c );
 
             String text = characterRun.text();
             if ( text.getBytes().length == 0 )
                 continue;
 
+            if ( text.getBytes()[0] == FIELD_BEGIN_MARK )
+            {
+                // nested?
+                Field possibleField = processField( wordDocument, range,
+                        currentTableLevel, characterRun.getStartOffset(),
+                        currentBlock );
+                if ( possibleField != null )
+                {
+                    c = possibleField.getEndOffset();
+                }
+                else
+                {
+                    continue;
+                }
+            }
+
             if ( text.getBytes()[0] == FIELD_SEPARATOR_MARK )
             {
                 if ( separatorMark != -1 )
@@ -360,8 +424,8 @@ public abstract class AbstractWordConverter
         if ( separatorMark == -1 || endMark == -1 )
             return beginMark;
 
-        processField( wordDocument, currentBlock, paragraph, currentTableLevel,
-                characterRuns, beginMark, separatorMark, endMark );
+        processDeadField( wordDocument, currentBlock, range, currentTableLevel,
+                beginMark, separatorMark, endMark );
 
         return endMark;
     }
index 75e097c1021876e11a60898507157e6aa7af4043..b74ddc043426ff0337924ddc21c106ec7de7bbe0 100644 (file)
@@ -22,19 +22,14 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.lang.reflect.Constructor;
 import java.lang.reflect.Field;
-import java.lang.reflect.Method;
-import java.util.ArrayList;
-import java.util.List;
 
 import org.apache.poi.hwpf.HWPFDocument;
 import org.apache.poi.hwpf.HWPFDocumentCore;
 import org.apache.poi.hwpf.HWPFOldDocument;
 import org.apache.poi.hwpf.OldWordFileFormatException;
-import org.apache.poi.hwpf.model.CHPX;
 import org.apache.poi.hwpf.model.ListLevel;
 import org.apache.poi.hwpf.model.ListTables;
 import org.apache.poi.hwpf.usermodel.BorderCode;
-import org.apache.poi.hwpf.usermodel.CharacterRun;
 import org.apache.poi.hwpf.usermodel.Paragraph;
 import org.apache.poi.hwpf.usermodel.Range;
 import org.apache.poi.hwpf.usermodel.Section;
@@ -55,35 +50,6 @@ public class AbstractWordUtils
         return str1 == null ? str2 == null : str1.equals( str2 );
     }
 
-    // XXX incorporate into Range
-    static List<CharacterRun> findCharacterRuns( Range range )
-    {
-        final int min = range.getStartOffset();
-        final int max = range.getEndOffset();
-
-        List<CharacterRun> result = new ArrayList<CharacterRun>();
-        List<CHPX> chpxs = getCharacters( range );
-        for ( int i = 0; i < chpxs.size(); i++ )
-        {
-            CHPX chpx = chpxs.get( i );
-            if ( chpx == null )
-                continue;
-
-            if ( Math.max( min, chpx.getStart() ) <= Math.min( max,
-                    chpx.getEnd() ) )
-            {
-                final CharacterRun characterRun = getCharacterRun( range, chpx );
-
-                if ( characterRun == null )
-                    continue;
-
-                result.add( characterRun );
-            }
-        }
-
-        return result;
-    }
-
     public static String getBorderType( BorderCode borderCode )
     {
         if ( borderCode == null )
@@ -196,35 +162,6 @@ public class AbstractWordUtils
         return bulletBuffer.toString();
     }
 
-    private static CharacterRun getCharacterRun( Range range, CHPX chpx )
-    {
-        try
-        {
-            Method method = Range.class.getDeclaredMethod( "getCharacterRun",
-                    CHPX.class );
-            method.setAccessible( true );
-            return (CharacterRun) method.invoke( range, chpx );
-        }
-        catch ( Exception exc )
-        {
-            throw new Error( exc );
-        }
-    }
-
-    private static List<CHPX> getCharacters( Range range )
-    {
-        try
-        {
-            Field field = Range.class.getDeclaredField( "_characters" );
-            field.setAccessible( true );
-            return (List<CHPX>) field.get( range );
-        }
-        catch ( Exception exc )
-        {
-            throw new Error( exc );
-        }
-    }
-
     public static String getColor( int ico )
     {
         switch ( ico )
index 04c5ad2833b1f92970418b650b67b81369c44ba2..3be52d8d0a0924e37a213c318789a6b92993a76c 100644 (file)
@@ -18,7 +18,6 @@ package org.apache.poi.hwpf.converter;
 
 import java.io.File;
 import java.io.FileWriter;
-import java.util.List;
 import java.util.Stack;
 
 import javax.xml.parsers.DocumentBuilderFactory;
@@ -34,6 +33,7 @@ import org.apache.poi.hwpf.HWPFDocumentCore;
 import org.apache.poi.hwpf.usermodel.CharacterRun;
 import org.apache.poi.hwpf.usermodel.Paragraph;
 import org.apache.poi.hwpf.usermodel.Picture;
+import org.apache.poi.hwpf.usermodel.Range;
 import org.apache.poi.hwpf.usermodel.Section;
 import org.apache.poi.hwpf.usermodel.SectionProperties;
 import org.apache.poi.hwpf.usermodel.Table;
@@ -248,19 +248,17 @@ public class WordToFoConverter extends AbstractWordConverter
             foDocumentFacade.setDescription( summaryInformation.getComments() );
     }
 
-    protected void processHyperlink( HWPFDocumentCore hwpfDocument,
-            Element currentBlock, Paragraph paragraph,
-            List<CharacterRun> characterRuns, int currentTableLevel,
-            String hyperlink, int beginTextInclusive, int endTextExclusive )
+    protected void processHyperlink( HWPFDocumentCore wordDocument,
+            Element currentBlock, Range textRange, int currentTableLevel,
+            String hyperlink )
     {
         Element basicLink = foDocumentFacade
                 .createBasicLinkExternal( hyperlink );
         currentBlock.appendChild( basicLink );
 
-        if ( beginTextInclusive < endTextExclusive )
-            processCharacters( hwpfDocument, currentTableLevel, paragraph,
-                    basicLink, characterRuns, beginTextInclusive,
-                    endTextExclusive );
+        if ( textRange != null )
+            processCharacters( wordDocument, currentTableLevel, textRange,
+                    basicLink );
     }
 
     /**
@@ -292,17 +290,15 @@ public class WordToFoConverter extends AbstractWordConverter
     }
 
     protected void processPageref( HWPFDocumentCore hwpfDocument,
-            Element currentBlock, Paragraph paragraph,
-            List<CharacterRun> characterRuns, int currentTableLevel,
-            String pageref, int beginTextInclusive, int endTextExclusive )
+            Element currentBlock, Range textRange, int currentTableLevel,
+            String pageref )
     {
         Element basicLink = foDocumentFacade.createBasicLinkInternal( pageref );
         currentBlock.appendChild( basicLink );
 
-        if ( beginTextInclusive < endTextExclusive )
-            processCharacters( hwpfDocument, currentTableLevel, paragraph,
-                    basicLink, characterRuns, beginTextInclusive,
-                    endTextExclusive );
+        if ( textRange != null )
+            processCharacters( hwpfDocument, currentTableLevel, textRange,
+                    basicLink );
     }
 
     protected void processParagraph( HWPFDocumentCore hwpfDocument,
@@ -356,10 +352,8 @@ public class WordToFoConverter extends AbstractWordConverter
                 haveAnyText |= bulletText.trim().length() != 0;
             }
 
-            List<CharacterRun> characterRuns = WordToFoUtils
-                    .findCharacterRuns( paragraph );
             haveAnyText = processCharacters( hwpfDocument, currentTableLevel,
-                    paragraph, block, characterRuns, 0, characterRuns.size() );
+                    paragraph, block );
 
             if ( !haveAnyText )
             {
index 7122b863eaca3f8fb769fa7e6d39ab5d72e87436..d9d77023264c6f3eeb94b4326a52862d5ef48fdf 100644 (file)
@@ -18,7 +18,6 @@ package org.apache.poi.hwpf.converter;
 
 import java.io.File;
 import java.io.FileWriter;
-import java.util.List;
 import java.util.Stack;
 
 import javax.xml.parsers.DocumentBuilderFactory;
@@ -34,6 +33,7 @@ import org.apache.poi.hwpf.HWPFDocumentCore;
 import org.apache.poi.hwpf.usermodel.CharacterRun;
 import org.apache.poi.hwpf.usermodel.Paragraph;
 import org.apache.poi.hwpf.usermodel.Picture;
+import org.apache.poi.hwpf.usermodel.Range;
 import org.apache.poi.hwpf.usermodel.Section;
 import org.apache.poi.hwpf.usermodel.SectionProperties;
 import org.apache.poi.hwpf.usermodel.Table;
@@ -226,18 +226,17 @@ public class WordToHtmlConverter extends AbstractWordConverter
                     .addDescription( summaryInformation.getComments() );
     }
 
+    @Override
     protected void processHyperlink( HWPFDocumentCore wordDocument,
-            Element currentBlock, Paragraph paragraph,
-            List<CharacterRun> characterRuns, int currentTableLevel,
-            String hyperlink, int beginTextInclusive, int endTextExclusive )
+            Element currentBlock, Range textRange, int currentTableLevel,
+            String hyperlink )
     {
         Element basicLink = htmlDocumentFacade.createHyperlink( hyperlink );
         currentBlock.appendChild( basicLink );
 
-        if ( beginTextInclusive < endTextExclusive )
-            processCharacters( wordDocument, currentTableLevel, paragraph,
-                    basicLink, characterRuns, beginTextInclusive,
-                    endTextExclusive );
+        if ( textRange != null )
+            processCharacters( wordDocument, currentTableLevel, textRange,
+                    basicLink );
     }
 
     /**
@@ -266,17 +265,15 @@ public class WordToHtmlConverter extends AbstractWordConverter
     }
 
     protected void processPageref( HWPFDocumentCore hwpfDocument,
-            Element currentBlock, Paragraph paragraph,
-            List<CharacterRun> characterRuns, int currentTableLevel,
-            String pageref, int beginTextInclusive, int endTextExclusive )
+            Element currentBlock, Range textRange, int currentTableLevel,
+            String pageref )
     {
         Element basicLink = htmlDocumentFacade.createHyperlink( "#" + pageref );
         currentBlock.appendChild( basicLink );
 
-        if ( beginTextInclusive < endTextExclusive )
-            processCharacters( hwpfDocument, currentTableLevel, paragraph,
-                    basicLink, characterRuns, beginTextInclusive,
-                    endTextExclusive );
+        if ( textRange != null )
+            processCharacters( hwpfDocument, currentTableLevel, textRange,
+                    basicLink );
     }
 
     protected void processParagraph( HWPFDocumentCore hwpfDocument,
@@ -322,10 +319,8 @@ public class WordToHtmlConverter extends AbstractWordConverter
                 pElement.appendChild( textNode );
             }
 
-            List<CharacterRun> characterRuns = WordToHtmlUtils
-                    .findCharacterRuns( paragraph );
             processCharacters( hwpfDocument, currentTableLevel, paragraph,
-                    pElement, characterRuns, 0, characterRuns.size() );
+                    pElement );
         }
         finally
         {