]> source.dussan.org Git - poi.git/commitdiff
avoid exceptions when using POI in Tika, see BUgs 51771 and 51770
authorYegor Kozlov <yegor@apache.org>
Mon, 12 Sep 2011 10:19:50 +0000 (10:19 +0000)
committerYegor Kozlov <yegor@apache.org>
Mon, 12 Sep 2011 10:19:50 +0000 (10:19 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1169679 13f79535-47bb-0310-9956-ffa450edef68

src/documentation/content/xdocs/status.xml
src/java/org/apache/poi/hssf/record/EscherAggregate.java
src/java/org/apache/poi/hssf/usermodel/HSSFShapeGroup.java
src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFPicture.java
src/ooxml/testcases/org/apache/poi/xwpf/usermodel/TestXWPFPictureData.java
src/scratchpad/src/org/apache/poi/hslf/model/OLEShape.java
test-data/document/Bug51170.docx [new file with mode: 0644]

index b0aef8b445dd3e5e63ba949ebd52fba93edf76d8..47bcd10b5080d3cc42aea8f5b9803554e21f221e 100644 (file)
@@ -34,6 +34,8 @@
 
     <changes>
         <release version="3.8-beta5" date="2011-??-??">
+           <action dev="poi-developers" type="add">51196 - prevent NPE in XWPFPicture.getPictureData() </action>
+           <action dev="poi-developers" type="add">51771 - prevent NPE when getting object data from OLEShape in HSLF</action>
            <action dev="poi-developers" type="add">51196 - more progress with Chart APi in XSSF</action>
            <action dev="poi-developers" type="fix">51785 - Allow XSSF setForceFormulaRecalculation to work with the minimal ooxml-schemas jar</action>
            <action dev="poi-developers" type="fix">51772 - IllegalArgumentException Parsing MS Word 97 - 2003</action>
index bc893b4c3b2c4d8ffeb5cff2a019805bdc50a68f..abd67e72cb4046f6e9b1b7ccdea2d06baca12080 100644 (file)
@@ -18,7 +18,6 @@
 package org.apache.poi.hssf.record;
 
 import java.util.ArrayList;
-import java.util.Collection;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
@@ -26,6 +25,7 @@ import java.util.Map;
 
 import org.apache.poi.ddf.DefaultEscherRecordFactory;
 import org.apache.poi.ddf.EscherBoolProperty;
+import org.apache.poi.ddf.EscherChildAnchorRecord;
 import org.apache.poi.ddf.EscherClientAnchorRecord;
 import org.apache.poi.ddf.EscherClientDataRecord;
 import org.apache.poi.ddf.EscherContainerRecord;
@@ -33,7 +33,6 @@ import org.apache.poi.ddf.EscherDgRecord;
 import org.apache.poi.ddf.EscherDggRecord;
 import org.apache.poi.ddf.EscherOptRecord;
 import org.apache.poi.ddf.EscherProperties;
-import org.apache.poi.ddf.EscherProperty;
 import org.apache.poi.ddf.EscherRecord;
 import org.apache.poi.ddf.EscherRecordFactory;
 import org.apache.poi.ddf.EscherSerializationListener;
@@ -46,14 +45,16 @@ import org.apache.poi.hssf.model.CommentShape;
 import org.apache.poi.hssf.model.ConvertAnchor;
 import org.apache.poi.hssf.model.DrawingManager2;
 import org.apache.poi.hssf.model.TextboxShape;
+import org.apache.poi.hssf.usermodel.HSSFAnchor;
+import org.apache.poi.hssf.usermodel.HSSFChildAnchor;
 import org.apache.poi.hssf.usermodel.HSSFClientAnchor;
 import org.apache.poi.hssf.usermodel.HSSFPatriarch;
 import org.apache.poi.hssf.usermodel.HSSFPicture;
 import org.apache.poi.hssf.usermodel.HSSFShape;
 import org.apache.poi.hssf.usermodel.HSSFShapeContainer;
 import org.apache.poi.hssf.usermodel.HSSFShapeGroup;
-import org.apache.poi.hssf.usermodel.HSSFTextbox;
 import org.apache.poi.hssf.usermodel.HSSFSimpleShape;
+import org.apache.poi.hssf.usermodel.HSSFTextbox;
 import org.apache.poi.util.POILogFactory;
 import org.apache.poi.util.POILogger;
 
@@ -584,28 +585,42 @@ public final class EscherAggregate extends AbstractEscherHolderRecord {
                        );
                }
 
+               convertRecordsToUserModelRecursive(tcc, patriarch, null);
+
+               // Now, clear any trace of what records make up
+               //  the patriarch
+               // Otherwise, everything will go horribly wrong
+               //  when we try to write out again....
+//             clearEscherRecords();
+               drawingManager.getDgg().setFileIdClusters(new EscherDggRecord.FileIdCluster[0]);
+
+               // TODO: Support converting our records
+               // back into shapes
+               // log.log(POILogger.WARN, "Not processing objects into Patriarch!");
+       }
+
+       private static void convertRecordsToUserModelRecursive(List tcc, HSSFShapeContainer container, HSSFShape parent) {
                // Now process the containers for each group
                //  and objects
                for(int i=1; i<tcc.size(); i++) {
-                       EscherContainerRecord shapeContainer =
-                               (EscherContainerRecord)tcc.get(i);
-                       //System.err.println("\n\n*****\n\n");
-                       //System.err.println(shapeContainer);
+                       EscherContainerRecord shapeContainer = (EscherContainerRecord)tcc.get(i);
 
                        // Could be a group, or a base object
-
                        if (shapeContainer.getRecordId() == EscherContainerRecord.SPGR_CONTAINER)
                        {
                                // Group
-                               if (shapeContainer.getChildRecords().size() > 0)
+                               final int shapeChildren = shapeContainer.getChildRecords().size();
+                               if (shapeChildren > 0)
                                {
-                                       HSSFShapeGroup group = new HSSFShapeGroup( null,
-                                                       new HSSFClientAnchor() );
-                                       patriarch.getChildren().add( group );
+                                       HSSFShapeGroup group = new HSSFShapeGroup( parent, new HSSFClientAnchor() );
+                                       addToParentOrContainer(group, container, parent);
 
-                                       EscherContainerRecord groupContainer = (EscherContainerRecord) shapeContainer
-                                                       .getChild( 0 );
+                                       EscherContainerRecord groupContainer = (EscherContainerRecord) shapeContainer.getChild( 0 );
                                        convertRecordsToUserModel( groupContainer, group );
+                                       
+                                       if (shapeChildren>1){
+                                               convertRecordsToUserModelRecursive(shapeContainer.getChildRecords(), container, group);
+                                       }
                                } else
                                {
                                        log.log( POILogger.WARN,
@@ -621,9 +636,9 @@ public final class EscherAggregate extends AbstractEscherHolderRecord {
                                switch (type)
                                {
                                case ST_TEXTBOX:
-                                       HSSFTextbox box = new HSSFTextbox( null,
+                                       HSSFTextbox box = new HSSFTextbox( parent,
                                                        new HSSFClientAnchor() );
-                                       patriarch.addShape( box );
+                                       addToParentOrContainer(box, container, parent);
 
                                        convertRecordsToUserModel( shapeContainer, box );
                                        break;
@@ -645,14 +660,34 @@ public final class EscherAggregate extends AbstractEscherHolderRecord {
                                                EscherClientAnchorRecord anchorRecord = (EscherClientAnchorRecord) getEscherChild(
                                                                shapeContainer,
                                                                EscherClientAnchorRecord.RECORD_ID );
-                        HSSFClientAnchor anchor = toClientAnchor(anchorRecord);
 
-                                               HSSFPicture picture = new HSSFPicture( null, anchor );
+                                               EscherChildAnchorRecord childRecord = (EscherChildAnchorRecord) getEscherChild(
+                                                               shapeContainer,
+                                                               EscherChildAnchorRecord.RECORD_ID );
+
+                                               if (anchorRecord!=null && childRecord!=null){
+                                                       log.log( POILogger.WARN, "Picture with both CLIENT and CHILD anchor: "+ type );
+                                               }
+                                       
+                                               HSSFAnchor anchor;
+                                               if (anchorRecord!=null){
+                                                       anchor = toClientAnchor(anchorRecord);
+                                               }else{
+                                                       anchor = toChildAnchor(childRecord);
+                                               }
+
+                                               HSSFPicture picture = new HSSFPicture( parent, anchor );
                                                picture.setPictureIndex( pictureIndex );
-                                               patriarch.addShape( picture );
+
+                                               addToParentOrContainer(picture, container, parent);
                                        }
                                        break;
                                default:
+                                       final HSSFSimpleShape shape = new HSSFSimpleShape( parent,
+                                                       new HSSFClientAnchor() );
+                                       addToParentOrContainer(shape, container, parent);
+                                       convertRecordsToUserModel( shapeContainer, shape);
+                                       
                                        log.log( POILogger.WARN, "Unhandled shape type: "
                                                        + type );
                                        break;
@@ -663,20 +698,19 @@ public final class EscherAggregate extends AbstractEscherHolderRecord {
                        }
 
                }
+       }
 
-               // Now, clear any trace of what records make up
-               //  the patriarch
-               // Otherwise, everything will go horribly wrong
-               //  when we try to write out again....
-//             clearEscherRecords();
-               drawingManager.getDgg().setFileIdClusters(new EscherDggRecord.FileIdCluster[0]);
+    private static void addToParentOrContainer(HSSFShape shape, HSSFShapeContainer container, HSSFShape parent) {
 
-               // TODO: Support converting our records
-               // back into shapes
-               // log.log(POILogger.WARN, "Not processing objects into Patriarch!");
+       if (parent instanceof HSSFShapeGroup)
+               ((HSSFShapeGroup) parent).addShape(shape);
+       else if (container instanceof HSSFPatriarch)
+               ((HSSFPatriarch) container).addShape(shape);
+       else
+               container.getChildren().add(shape);
        }
 
-    private HSSFClientAnchor toClientAnchor(EscherClientAnchorRecord anchorRecord){
+       private static HSSFClientAnchor toClientAnchor(EscherClientAnchorRecord anchorRecord){
         HSSFClientAnchor anchor = new HSSFClientAnchor();
         anchor.setAnchorType(anchorRecord.getFlag());
         anchor.setCol1( anchorRecord.getCol1() );
@@ -690,7 +724,21 @@ public final class EscherAggregate extends AbstractEscherHolderRecord {
         return anchor;
     }
 
-       private void convertRecordsToUserModel(EscherContainerRecord shapeContainer, Object model) {
+    private static HSSFChildAnchor toChildAnchor(EscherChildAnchorRecord anchorRecord){
+        HSSFChildAnchor anchor = new HSSFChildAnchor();
+//        anchor.setAnchorType(anchorRecord.getFlag());
+//        anchor.setCol1( anchorRecord.getCol1() );
+//        anchor.setCol2( anchorRecord.getCol2() );
+        anchor.setDx1( anchorRecord.getDx1() );
+        anchor.setDx2( anchorRecord.getDx2() );
+        anchor.setDy1( anchorRecord.getDy1() );
+        anchor.setDy2( anchorRecord.getDy2() );
+//        anchor.setRow1( anchorRecord.getRow1() );
+//        anchor.setRow2( anchorRecord.getRow2() );
+        return anchor;
+    }
+
+       private static void convertRecordsToUserModel(EscherContainerRecord shapeContainer, Object model) {
                for(Iterator<EscherRecord> it = shapeContainer.getChildIterator(); it.hasNext();) {
                        EscherRecord r = it.next();
                        if(r instanceof EscherSpgrRecord) {
@@ -728,6 +776,10 @@ public final class EscherAggregate extends AbstractEscherHolderRecord {
                        }
                        else if(r instanceof EscherSpRecord) {
                                // Use flags if needed
+                               final EscherSpRecord spr = (EscherSpRecord) r;
+                               if (model instanceof HSSFShape){
+                                       final HSSFShape s = (HSSFShape) model;
+                               }
                        }
                        else if(r instanceof EscherOptRecord) {
                                // Use properties if needed
index d21604f3bbf7aca9e7cfce4e18192d2ea90d5d31..905dad3fc2622940c26cb90f8223a71be9771bed 100644 (file)
@@ -56,6 +56,11 @@ public class HSSFShapeGroup
         return group;
     }
 
+    public void addShape(HSSFShape shape){
+        shape._patriarch = this._patriarch;
+        shapes.add(shape);
+    }
+
     /**
      * Create a new simple shape under this group.
      * @param anchor    the position of the shape.
@@ -177,4 +182,4 @@ public class HSSFShapeGroup
         }
         return count;
     }
-}
\ No newline at end of file
+}
index d613030f88c0f02c40018c4fb5d432aff5f49cb8..7dec18513f8f43c75d9b64817826202da65b4874 100644 (file)
@@ -18,6 +18,7 @@ package org.apache.poi.xwpf.usermodel;
 
 import org.apache.poi.POIXMLDocumentPart;
 import org.apache.poi.openxml4j.opc.PackageRelationship;
+import org.openxmlformats.schemas.drawingml.x2006.main.CTBlipFillProperties;
 import org.openxmlformats.schemas.drawingml.x2006.picture.CTPicture;
 
 
@@ -58,7 +59,14 @@ public class XWPFPicture {
      * Note - not all kinds of picture have data
      */
     public XWPFPictureData getPictureData(){
-        String blipId = ctPic.getBlipFill().getBlip().getEmbed();
+        CTBlipFillProperties blipProps = ctPic.getBlipFill();
+
+        if(blipProps == null || !blipProps.isSetBlip()) {
+            // return null if Blip data is missing
+            return null;
+        }
+
+        String blipId = blipProps.getBlip().getEmbed();
         POIXMLDocumentPart part = run.getParagraph().getPart();
         if (part != null)
         {
index 72aa4c115e07078eb9062b06be772ac62670164b..89dfcb1453d0ace3ee3b33895393a17f2229aefa 100644 (file)
@@ -129,4 +129,26 @@ public class TestXWPFPictureData extends TestCase {
     public void testGetChecksum() {
         
     }
+
+    public void testBug51770() throws InvalidFormatException, IOException {
+        XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("Bug51170.docx");
+        XWPFHeaderFooterPolicy policy = doc.getHeaderFooterPolicy();
+        XWPFHeader header = policy.getDefaultHeader();
+        for (XWPFParagraph paragraph : header.getParagraphs()) {
+            for (XWPFRun run : paragraph.getRuns()) {
+                for (XWPFPicture picture : run.getEmbeddedPictures()) {
+                    if (paragraph.getDocument() != null) {
+                        System.out.println(picture.getCTPicture());
+                        XWPFPictureData data = picture.getPictureData();
+                        if(data != null) System.out.println(data.getFileName());
+                    }
+                }
+            }
+        }
+
+    }
+
+    private void process(XWPFParagraph paragraph){
+
+    }
 }
index b6b4db67340987461e3e2585e252524b40bb82c1..717f7a06be15c360ecbd8c2502638d8279446218 100644 (file)
@@ -83,16 +83,17 @@ public final class OLEShape extends Picture {
         ObjectData[] ole = ppt.getEmbeddedObjects();
 
         //persist reference
-        int ref = getExEmbed().getExOleObjAtom().getObjStgDataRef();
-
+        ExEmbed exEmbed = getExEmbed();
         ObjectData data = null;
+        if(exEmbed != null) {
+            int ref = exEmbed.getExOleObjAtom().getObjStgDataRef();
 
-        for (int i = 0; i < ole.length; i++) {
-            if(ole[i].getExOleObjStg().getPersistId() == ref) {
-                data=ole[i];
+            for (int i = 0; i < ole.length; i++) {
+                if(ole[i].getExOleObjStg().getPersistId() == ref) {
+                    data=ole[i];
+                }
             }
         }
-
         if (data==null) {
             logger.log(POILogger.WARN, "OLE data not found");
         }
diff --git a/test-data/document/Bug51170.docx b/test-data/document/Bug51170.docx
new file mode 100644 (file)
index 0000000..c712cdc
Binary files /dev/null and b/test-data/document/Bug51170.docx differ