diff options
author | Marius Volkhart <mariusvolkhart@apache.org> | 2021-02-28 23:16:14 +0000 |
---|---|---|
committer | Marius Volkhart <mariusvolkhart@apache.org> | 2021-02-28 23:16:14 +0000 |
commit | d1c9a07860e365db4e335d24ad67e87544bdcceb (patch) | |
tree | 76c8d1fa79182df3aba38df998a41ab3e10a9c34 /src | |
parent | 77ec895ee01e8213d0b9f36f4c05dc839648294f (diff) | |
download | poi-d1c9a07860e365db4e335d24ad67e87544bdcceb.tar.gz poi-d1c9a07860e365db4e335d24ad67e87544bdcceb.zip |
Add the ability to edit HSLFPictureData contents
Pictures can now be edited by calling HSLFPictureData#setData(byte[]). The byte[] should contain the image data as an image viewer might read it.
To enable this functionality, a tighter coupling between the EscherBSERecords of the slideshow and the HSLFPictureData was required. This ensures that changes in image data size are accurately recorded in the records.
In the course of coupling the records and the HSLFPictureData, various scenarios arose where a mapping of records to pictures was non-trivial. Accordingly, the HSLFSlideShowImpl#matchPicturesAndRecords(...) function was added to perform a more sophisticated matching pass. This function is heavily exercised by org.apache.poi.hslf.usermodel.TestBugs.testFile[5] and PPTX2PNG.render[2], as well as the new TestPictures#testSlideshowWithIncorrectOffsets().
Closes #225
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1887017 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'src')
18 files changed, 1031 insertions, 129 deletions
diff --git a/src/java/org/apache/poi/sl/usermodel/PictureData.java b/src/java/org/apache/poi/sl/usermodel/PictureData.java index 83b54d75c7..df2ca79b01 100644 --- a/src/java/org/apache/poi/sl/usermodel/PictureData.java +++ b/src/java/org/apache/poi/sl/usermodel/PictureData.java @@ -105,6 +105,10 @@ public interface PictureData { /** * Sets the binary picture data + * <p> + * The format of the data must match the format of {@link #getType()}. Failure to match the picture data may result + * in data loss. + * * @param data picture data */ void setData(byte[] data) throws IOException; diff --git a/src/scratchpad/src/org/apache/poi/hslf/blip/Bitmap.java b/src/scratchpad/src/org/apache/poi/hslf/blip/Bitmap.java index afc8cd5481..bb5904da55 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/blip/Bitmap.java +++ b/src/scratchpad/src/org/apache/poi/hslf/blip/Bitmap.java @@ -20,13 +20,17 @@ package org.apache.poi.hslf.blip; import java.awt.Dimension; import java.awt.image.BufferedImage; import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; import java.io.IOException; import javax.imageio.ImageIO; +import org.apache.poi.ddf.EscherBSERecord; +import org.apache.poi.ddf.EscherContainerRecord; import org.apache.poi.hslf.usermodel.HSLFPictureData; +import org.apache.poi.hslf.usermodel.HSLFSlideShow; import org.apache.poi.util.IOUtils; +import org.apache.poi.util.Internal; +import org.apache.poi.util.Removal; import org.apache.poi.util.Units; /** @@ -35,6 +39,29 @@ import org.apache.poi.util.Units; */ public abstract class Bitmap extends HSLFPictureData { + /** + * @deprecated Use {@link HSLFSlideShow#addPicture(byte[], PictureType)} or one of it's overloads to create new + * {@link Bitmap}. This API led to detached {@link Bitmap} instances (See Bugzilla + * 46122) and prevented adding additional functionality. + */ + @Deprecated + @Removal(version = "5.3") + public Bitmap() { + this(new EscherContainerRecord(), new EscherBSERecord()); + } + + /** + * Creates a new instance. + * + * @param recordContainer Record tracking all pictures. Should be attached to the slideshow that this picture is + * linked to. + * @param bse Record referencing this picture. Should be attached to the slideshow that this picture is linked to. + */ + @Internal + protected Bitmap(EscherContainerRecord recordContainer, EscherBSERecord bse) { + super(recordContainer, bse); + } + @Override public byte[] getData(){ byte[] rawdata = getRawData(); @@ -43,17 +70,22 @@ public abstract class Bitmap extends HSLFPictureData { } @Override - public void setData(byte[] data) throws IOException { + protected byte[] formatImageForSlideshow(byte[] data) { byte[] checksum = getChecksum(data); - ByteArrayOutputStream out = new ByteArrayOutputStream(); - out.write(checksum); + byte[] rawData = new byte[checksum.length * getUIDInstanceCount() + 1 + data.length]; + int offset = 0; + + System.arraycopy(checksum, 0, rawData, offset, checksum.length); + offset += checksum.length; + if (getUIDInstanceCount() == 2) { - out.write(checksum); + System.arraycopy(checksum, 0, rawData, offset, checksum.length); + offset += checksum.length; } - out.write(0); - out.write(data); - setRawData(out.toByteArray()); + offset++; + System.arraycopy(data, 0, rawData, offset, data.length); + return rawData; } @Override diff --git a/src/scratchpad/src/org/apache/poi/hslf/blip/DIB.java b/src/scratchpad/src/org/apache/poi/hslf/blip/DIB.java index 8a091bd7ab..1840d55409 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/blip/DIB.java +++ b/src/scratchpad/src/org/apache/poi/hslf/blip/DIB.java @@ -17,10 +17,13 @@ package org.apache.poi.hslf.blip; -import java.io.IOException; - +import org.apache.poi.ddf.EscherBSERecord; +import org.apache.poi.ddf.EscherContainerRecord; +import org.apache.poi.hslf.usermodel.HSLFSlideShow; import org.apache.poi.util.IOUtils; +import org.apache.poi.util.Internal; import org.apache.poi.util.LittleEndian; +import org.apache.poi.util.Removal; /** * Represents a DIB picture data in a PPT file @@ -35,6 +38,29 @@ public final class DIB extends Bitmap { */ private static final int HEADER_SIZE = 14; + /** + * @deprecated Use {@link HSLFSlideShow#addPicture(byte[], PictureType)} or one of it's overloads to create new + * {@link DIB}. This API led to detached {@link DIB} instances (See Bugzilla + * 46122) and prevented adding additional functionality. + */ + @Deprecated + @Removal(version = "5.3") + public DIB() { + this(new EscherContainerRecord(), new EscherBSERecord()); + } + + /** + * Creates a new instance. + * + * @param recordContainer Record tracking all pictures. Should be attached to the slideshow that this picture is + * linked to. + * @param bse Record referencing this picture. Should be attached to the slideshow that this picture is linked to. + */ + @Internal + public DIB(EscherContainerRecord recordContainer, EscherBSERecord bse) { + super(recordContainer, bse); + } + @Override public PictureType getType(){ return PictureType.DIB; @@ -100,9 +126,9 @@ public final class DIB extends Bitmap { } @Override - public void setData(byte[] data) throws IOException { + protected byte[] formatImageForSlideshow(byte[] data) { //cut off the bitmap file-header byte[] dib = IOUtils.safelyClone(data, HEADER_SIZE, data.length-HEADER_SIZE, data.length); - super.setData(dib); + return super.formatImageForSlideshow(dib); } } diff --git a/src/scratchpad/src/org/apache/poi/hslf/blip/EMF.java b/src/scratchpad/src/org/apache/poi/hslf/blip/EMF.java index ba90372f85..e6bf60da16 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/blip/EMF.java +++ b/src/scratchpad/src/org/apache/poi/hslf/blip/EMF.java @@ -24,9 +24,14 @@ import java.io.IOException; import java.io.InputStream; import java.util.zip.InflaterInputStream; +import org.apache.poi.ddf.EscherBSERecord; +import org.apache.poi.ddf.EscherContainerRecord; import org.apache.poi.hslf.exceptions.HSLFException; +import org.apache.poi.hslf.usermodel.HSLFSlideShow; import org.apache.poi.sl.image.ImageHeaderEMF; import org.apache.poi.util.IOUtils; +import org.apache.poi.util.Internal; +import org.apache.poi.util.Removal; import org.apache.poi.util.Units; /** @@ -34,6 +39,29 @@ import org.apache.poi.util.Units; */ public final class EMF extends Metafile { + /** + * @deprecated Use {@link HSLFSlideShow#addPicture(byte[], PictureType)} or one of it's overloads to create new + * {@link EMF}. This API led to detached {@link EMF} instances (See Bugzilla + * 46122) and prevented adding additional functionality. + */ + @Deprecated + @Removal(version = "5.3") + public EMF() { + this(new EscherContainerRecord(), new EscherBSERecord()); + } + + /** + * Creates a new instance. + * + * @param recordContainer Record tracking all pictures. Should be attached to the slideshow that this picture is + * linked to. + * @param bse Record referencing this picture. Should be attached to the slideshow that this picture is linked to. + */ + @Internal + public EMF(EscherContainerRecord recordContainer, EscherBSERecord bse) { + super(recordContainer, bse); + } + @Override public byte[] getData(){ try { @@ -60,11 +88,11 @@ public final class EMF extends Metafile { } @Override - public void setData(byte[] data) throws IOException { + protected byte[] formatImageForSlideshow(byte[] data) { byte[] compressed = compress(data, 0, data.length); ImageHeaderEMF nHeader = new ImageHeaderEMF(data, 0); - + Header header = new Header(); header.setWmfSize(data.length); header.setBounds(nHeader.getBounds()); @@ -73,15 +101,22 @@ public final class EMF extends Metafile { header.setZipSize(compressed.length); byte[] checksum = getChecksum(data); - ByteArrayOutputStream out = new ByteArrayOutputStream(); - out.write(checksum); + byte[] rawData = new byte[checksum.length * getUIDInstanceCount() + header.getSize() + compressed.length]; + int offset = 0; + + System.arraycopy(checksum, 0, rawData, offset, checksum.length); + offset += checksum.length; + if (getUIDInstanceCount() == 2) { - out.write(checksum); + System.arraycopy(checksum, 0, rawData, offset, checksum.length); + offset += checksum.length; } - header.write(out); - out.write(compressed); - setRawData(out.toByteArray()); + header.write(rawData, offset); + offset += header.getSize(); + System.arraycopy(compressed, 0, rawData, offset, compressed.length); + + return rawData; } @Override diff --git a/src/scratchpad/src/org/apache/poi/hslf/blip/JPEG.java b/src/scratchpad/src/org/apache/poi/hslf/blip/JPEG.java index 08ba6b73e7..cf90746d6b 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/blip/JPEG.java +++ b/src/scratchpad/src/org/apache/poi/hslf/blip/JPEG.java @@ -18,6 +18,12 @@ package org.apache.poi.hslf.blip; +import org.apache.poi.ddf.EscherBSERecord; +import org.apache.poi.ddf.EscherContainerRecord; +import org.apache.poi.hslf.usermodel.HSLFSlideShow; +import org.apache.poi.util.Internal; +import org.apache.poi.util.Removal; + /** * Represents a JPEG picture data in a PPT file */ @@ -26,6 +32,29 @@ public final class JPEG extends Bitmap { public enum ColorSpace { rgb, cymk } private ColorSpace colorSpace = ColorSpace.rgb; + + /** + * @deprecated Use {@link HSLFSlideShow#addPicture(byte[], PictureType)} or one of it's overloads to create new + * {@link JPEG}. This API led to detached {@link JPEG} instances (See Bugzilla + * 46122) and prevented adding additional functionality. + */ + @Deprecated + @Removal(version = "5.3") + public JPEG() { + this(new EscherContainerRecord(), new EscherBSERecord()); + } + + /** + * Creates a new instance. + * + * @param recordContainer Record tracking all pictures. Should be attached to the slideshow that this picture is + * linked to. + * @param bse Record referencing this picture. Should be attached to the slideshow that this picture is linked to. + */ + @Internal + public JPEG(EscherContainerRecord recordContainer, EscherBSERecord bse) { + super(recordContainer, bse); + } @Override public PictureType getType(){ diff --git a/src/scratchpad/src/org/apache/poi/hslf/blip/Metafile.java b/src/scratchpad/src/org/apache/poi/hslf/blip/Metafile.java index 6ebd4a0a61..ea855d460e 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/blip/Metafile.java +++ b/src/scratchpad/src/org/apache/poi/hslf/blip/Metafile.java @@ -25,9 +25,15 @@ import java.io.IOException; import java.io.OutputStream; import java.util.zip.DeflaterOutputStream; +import org.apache.poi.ddf.EscherBSERecord; +import org.apache.poi.ddf.EscherContainerRecord; import org.apache.poi.hslf.usermodel.HSLFPictureData; +import org.apache.poi.hslf.usermodel.HSLFSlideShow; +import org.apache.poi.util.Internal; +import org.apache.poi.util.LittleEndian; import org.apache.poi.util.LittleEndianInputStream; import org.apache.poi.util.LittleEndianOutputStream; +import org.apache.poi.util.Removal; import org.apache.poi.util.Units; /** @@ -37,6 +43,29 @@ import org.apache.poi.util.Units; public abstract class Metafile extends HSLFPictureData { /** + * @deprecated Use {@link HSLFSlideShow#addPicture(byte[], PictureType)} or one of it's overloads to create new + * {@link Metafile}. This API led to detached {@link Metafile} instances (See Bugzilla + * 46122) and prevented adding additional functionality. + */ + @Deprecated + @Removal(version = "5.3") + public Metafile() { + this(new EscherContainerRecord(), new EscherBSERecord()); + } + + /** + * Creates a new instance. + * + * @param recordContainer Record tracking all pictures. Should be attached to the slideshow that this picture is + * linked to. + * @param bse Record referencing this picture. Should be attached to the slideshow that this picture is linked to. + */ + @Internal + protected Metafile(EscherContainerRecord recordContainer, EscherBSERecord bse) { + super(recordContainer, bse); + } + + /** * A structure which represents a 34-byte header preceding the compressed metafile data */ public static class Header{ @@ -117,6 +146,44 @@ public abstract class Metafile extends HSLFPictureData { leos.writeByte(filter); } + void write(byte[] destination, int offset) { + //hmf + LittleEndian.putInt(destination, offset, wmfsize); + offset += 4; + + //left + LittleEndian.putInt(destination, offset, bounds.x); + offset += 4; + + //top + LittleEndian.putInt(destination, offset, bounds.y); + offset += 4; + + //right + LittleEndian.putInt(destination, offset, bounds.x + bounds.width); + offset += 4; + + //bottom + LittleEndian.putInt(destination, offset, bounds.y + bounds.height); + offset += 4; + + //inch + LittleEndian.putInt(destination, offset, size.width); + offset += 4; + + //inch + LittleEndian.putInt(destination, offset, size.height); + offset += 4; + + LittleEndian.putInt(destination, offset, zipsize); + offset += 4; + + destination[offset] = (byte) compression; + offset++; + + destination[offset] = (byte) filter; + } + public int getSize(){ return 34; } @@ -146,11 +213,16 @@ public abstract class Metafile extends HSLFPictureData { } } - protected static byte[] compress(byte[] bytes, int offset, int length) throws IOException { + protected static byte[] compress(byte[] bytes, int offset, int length) { ByteArrayOutputStream out = new ByteArrayOutputStream(); - DeflaterOutputStream deflater = new DeflaterOutputStream( out ); - deflater.write(bytes, offset, length); - deflater.close(); + try (DeflaterOutputStream deflater = new DeflaterOutputStream(out)) { + deflater.write(bytes, offset, length); + } catch (IOException e) { + // IOException won't get thrown by the DeflaterOutputStream in this configuration because: + // 1. ByteArrayOutputStream doesn't throw an IOException during writes. + // 2. The DeflaterOutputStream is not finished until we're done writing. + throw new AssertionError("Won't happen", e); + } return out.toByteArray(); } diff --git a/src/scratchpad/src/org/apache/poi/hslf/blip/PICT.java b/src/scratchpad/src/org/apache/poi/hslf/blip/PICT.java index 33dcde3b0e..a6c2765ec7 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/blip/PICT.java +++ b/src/scratchpad/src/org/apache/poi/hslf/blip/PICT.java @@ -26,9 +26,14 @@ import java.util.zip.InflaterInputStream; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.apache.poi.ddf.EscherBSERecord; +import org.apache.poi.ddf.EscherContainerRecord; import org.apache.poi.hslf.exceptions.HSLFException; +import org.apache.poi.hslf.usermodel.HSLFSlideShow; import org.apache.poi.sl.image.ImageHeaderPICT; import org.apache.poi.util.IOUtils; +import org.apache.poi.util.Internal; +import org.apache.poi.util.Removal; import org.apache.poi.util.Units; import static org.apache.logging.log4j.util.Unbox.box; @@ -39,6 +44,28 @@ import static org.apache.logging.log4j.util.Unbox.box; public final class PICT extends Metafile { private static final Logger LOG = LogManager.getLogger(PICT.class); + /** + * @deprecated Use {@link HSLFSlideShow#addPicture(byte[], PictureType)} or one of it's overloads to create new + * {@link PICT}. This API led to detached {@link PICT} instances (See Bugzilla + * 46122) and prevented adding additional functionality. + */ + @Deprecated + @Removal(version = "5.3") + public PICT() { + this(new EscherContainerRecord(), new EscherBSERecord()); + } + + /** + * Creates a new instance. + * + * @param recordContainer Record tracking all pictures. Should be attached to the slideshow that this picture is + * linked to. + * @param bse Record referencing this picture. Should be attached to the slideshow that this picture is linked to. + */ + @Internal + public PICT(EscherContainerRecord recordContainer, EscherBSERecord bse) { + super(recordContainer, bse); + } @Override public byte[] getData(){ @@ -93,7 +120,7 @@ public final class PICT extends Metafile { } @Override - public void setData(byte[] data) throws IOException { + protected byte[] formatImageForSlideshow(byte[] data) { // skip the first 512 bytes - they are MAC specific crap final int nOffset = ImageHeaderPICT.PICT_HEADER_OFFSET; ImageHeaderPICT nHeader = new ImageHeaderPICT(data, nOffset); @@ -108,15 +135,22 @@ public final class PICT extends Metafile { header.setDimension(new Dimension(Units.toEMU(nDim.getWidth()), Units.toEMU(nDim.getHeight()))); byte[] checksum = getChecksum(data); - ByteArrayOutputStream out = new ByteArrayOutputStream(); - out.write(checksum); + byte[] rawData = new byte[checksum.length * getUIDInstanceCount() + header.getSize() + compressed.length]; + int offset = 0; + + System.arraycopy(checksum, 0, rawData, offset, checksum.length); + offset += checksum.length; + if (getUIDInstanceCount() == 2) { - out.write(checksum); + System.arraycopy(checksum, 0, rawData, offset, checksum.length); + offset += checksum.length; } - header.write(out); - out.write(compressed); - setRawData(out.toByteArray()); + header.write(rawData, offset); + offset += header.getSize(); + System.arraycopy(compressed, 0, rawData, offset, compressed.length); + + return rawData; } @Override diff --git a/src/scratchpad/src/org/apache/poi/hslf/blip/PNG.java b/src/scratchpad/src/org/apache/poi/hslf/blip/PNG.java index 8e05aa7611..197cb3d828 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/blip/PNG.java +++ b/src/scratchpad/src/org/apache/poi/hslf/blip/PNG.java @@ -17,13 +17,41 @@ package org.apache.poi.hslf.blip; +import org.apache.poi.ddf.EscherBSERecord; +import org.apache.poi.ddf.EscherContainerRecord; +import org.apache.poi.hslf.usermodel.HSLFSlideShow; import org.apache.poi.sl.image.ImageHeaderPNG; +import org.apache.poi.util.Internal; +import org.apache.poi.util.Removal; /** * Represents a PNG picture data in a PPT file */ public final class PNG extends Bitmap { + /** + * @deprecated Use {@link HSLFSlideShow#addPicture(byte[], PictureType)} or one of it's overloads to create new + * {@link PNG}. This API led to detached {@link PNG} instances (See Bugzilla + * 46122) and prevented adding additional functionality. + */ + @Deprecated + @Removal(version = "5.3") + public PNG() { + this(new EscherContainerRecord(), new EscherBSERecord()); + } + + /** + * Creates a new instance. + * + * @param recordContainer Record tracking all pictures. Should be attached to the slideshow that this picture is + * linked to. + * @param bse Record referencing this picture. Should be attached to the slideshow that this picture is linked to. + */ + @Internal + public PNG(EscherContainerRecord recordContainer, EscherBSERecord bse) { + super(recordContainer, bse); + } + @Override public byte[] getData() { return new ImageHeaderPNG(super.getData()).extractPNG(); diff --git a/src/scratchpad/src/org/apache/poi/hslf/blip/WMF.java b/src/scratchpad/src/org/apache/poi/hslf/blip/WMF.java index 1d00322911..55b74bdb6f 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/blip/WMF.java +++ b/src/scratchpad/src/org/apache/poi/hslf/blip/WMF.java @@ -24,9 +24,14 @@ import java.io.IOException; import java.io.InputStream; import java.util.zip.InflaterInputStream; +import org.apache.poi.ddf.EscherBSERecord; +import org.apache.poi.ddf.EscherContainerRecord; import org.apache.poi.hslf.exceptions.HSLFException; +import org.apache.poi.hslf.usermodel.HSLFSlideShow; import org.apache.poi.sl.image.ImageHeaderWMF; import org.apache.poi.util.IOUtils; +import org.apache.poi.util.Internal; +import org.apache.poi.util.Removal; import org.apache.poi.util.Units; /** @@ -34,6 +39,29 @@ import org.apache.poi.util.Units; */ public final class WMF extends Metafile { + /** + * @deprecated Use {@link HSLFSlideShow#addPicture(byte[], PictureType)} or one of it's overloads to create new + * {@link WMF}. This API led to detached {@link WMF} instances (See Bugzilla + * 46122) and prevented adding additional functionality. + */ + @Deprecated + @Removal(version = "5.3") + public WMF() { + this(new EscherContainerRecord(), new EscherBSERecord()); + } + + /** + * Creates a new instance. + * + * @param recordContainer Record tracking all pictures. Should be attached to the slideshow that this picture is + * linked to. + * @param bse Record referencing this picture. Should be attached to the slideshow that this picture is linked to. + */ + @Internal + public WMF(EscherContainerRecord recordContainer, EscherBSERecord bse) { + super(recordContainer, bse); + } + @Override public byte[] getData(){ try { @@ -64,7 +92,7 @@ public final class WMF extends Metafile { } @Override - public void setData(byte[] data) throws IOException { + protected byte[] formatImageForSlideshow(byte[] data) { int pos = 0; ImageHeaderWMF nHeader = new ImageHeaderWMF(data, pos); pos += nHeader.getLength(); @@ -79,15 +107,22 @@ public final class WMF extends Metafile { header.setZipSize(compressed.length); byte[] checksum = getChecksum(data); - ByteArrayOutputStream out = new ByteArrayOutputStream(); - out.write(checksum); + byte[] rawData = new byte[checksum.length * getUIDInstanceCount() + header.getSize() + compressed.length]; + int offset = 0; + + System.arraycopy(checksum, 0, rawData, offset, checksum.length); + offset += checksum.length; + if (getUIDInstanceCount() == 2) { - out.write(checksum); + System.arraycopy(checksum, 0, rawData, offset, checksum.length); + offset += checksum.length; } - header.write(out); - out.write(compressed); - setRawData(out.toByteArray()); + header.write(rawData, offset); + offset += header.getSize(); + System.arraycopy(compressed, 0, rawData, offset, compressed.length); + + return rawData; } @Override diff --git a/src/scratchpad/src/org/apache/poi/hslf/dev/PPTXMLDump.java b/src/scratchpad/src/org/apache/poi/hslf/dev/PPTXMLDump.java index 6b13a07920..4e431976a3 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/dev/PPTXMLDump.java +++ b/src/scratchpad/src/org/apache/poi/hslf/dev/PPTXMLDump.java @@ -173,7 +173,6 @@ public final class PPTXMLDump { return; } - byte[] pictdata = IOUtils.safelyClone(data, pos + PICT_HEADER_SIZE, size, MAX_RECORD_LENGTH); pos += PICT_HEADER_SIZE + size; padding++; @@ -183,7 +182,7 @@ public final class PPTXMLDump { dump(out, header, 0, header.length, padding, true); write(out, "</header>" + CR, padding); write(out, "<imgdata>" + CR, padding); - dump(out, pictdata, 0, Math.min(pictdata.length, 100), padding, true); + dump(out, data, 0, Math.min(size, 100), padding, true); write(out, "</imgdata>" + CR, padding); padding--; write(out, "</picture>" + CR, padding); diff --git a/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFFill.java b/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFFill.java index 7fa9323209..5275669b76 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFFill.java +++ b/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFFill.java @@ -570,7 +570,9 @@ public final class HSLFFill { } else { EscherBSERecord bse = (EscherBSERecord)lst.get(idx - 1); for (HSLFPictureData pd : pict) { - if (pd.getOffset() == bse.getOffset()){ + + // Reference equals is safe because these BSE belong to the same slideshow + if (pd.bse == bse) { return pd; } } diff --git a/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFPictureData.java b/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFPictureData.java index 2f3371d714..5b418f98c3 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFPictureData.java +++ b/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFPictureData.java @@ -23,11 +23,18 @@ import java.io.OutputStream; import java.security.MessageDigest; import java.util.Arrays; import java.util.Collections; +import java.util.Comparator; import java.util.LinkedHashMap; +import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.function.Supplier; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.apache.poi.common.usermodel.GenericRecord; +import org.apache.poi.ddf.EscherBSERecord; +import org.apache.poi.ddf.EscherContainerRecord; import org.apache.poi.ddf.EscherRecordTypes; import org.apache.poi.hslf.blip.DIB; import org.apache.poi.hslf.blip.EMF; @@ -38,8 +45,10 @@ import org.apache.poi.hslf.blip.WMF; import org.apache.poi.poifs.crypt.CryptoFunctions; import org.apache.poi.poifs.crypt.HashAlgorithm; import org.apache.poi.sl.usermodel.PictureData; +import org.apache.poi.util.Internal; import org.apache.poi.util.LittleEndian; import org.apache.poi.util.LittleEndianConsts; +import org.apache.poi.util.Removal; import org.apache.poi.util.Units; /** @@ -47,19 +56,37 @@ import org.apache.poi.util.Units; */ public abstract class HSLFPictureData implements PictureData, GenericRecord { + private static final Logger LOGGER = LogManager.getLogger(HSLFPictureData.class); + /** * Size of the image checksum calculated using MD5 algorithm. */ protected static final int CHECKSUM_SIZE = 16; /** - * Binary data of the picture - */ - private byte[] rawdata; + * Size of the image preamble in bytes. + * <p> + * The preamble describes how the image should be decoded. All image types have the same preamble format. The + * preamble has little endian encoding. Below is a diagram of the preamble contents. + * + * <pre> + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Signature | Picture Type | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Formatted Length | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * </pre> + */ + static final int PREAMBLE_SIZE = 8; + /** - * The offset to the picture in the stream + * Binary data of the picture, formatted as it will be stored in the {@link HSLFSlideShow}. + * <p> + * This does not include the {@link #PREAMBLE_SIZE preamble}. */ - private int offset; + private byte[] formattedData; /** * The instance type/signatures defines if one or two UID instances will be included @@ -72,6 +99,43 @@ public abstract class HSLFPictureData implements PictureData, GenericRecord { private int index = -1; /** + * {@link EscherRecordTypes#BSTORE_CONTAINER BStore} record tracking all pictures. Should be attached to the + * slideshow that this picture is linked to. + */ + final EscherContainerRecord bStore; + + /** + * Record referencing this picture. Should be attached to the slideshow that this picture is linked to. + */ + final EscherBSERecord bse; + + /** + * @deprecated Use {@link HSLFSlideShow#addPicture(byte[], PictureType)} or one of it's overloads to create new + * {@link HSLFPictureData}. This API led to detached {@link HSLFPictureData} instances (See Bugzilla + * 46122) and prevented adding additional functionality. + */ + @Deprecated + @Removal(version = "5.3") + public HSLFPictureData() { + this(new EscherContainerRecord(), new EscherBSERecord()); + LOGGER.atWarn().log("The no-arg constructor is deprecated. Some functionality such as updating pictures won't " + + "work."); + } + + /** + * Creates a new instance. + * + * @param bStore {@link EscherRecordTypes#BSTORE_CONTAINER BStore} record tracking all pictures. Should be attached + * to the slideshow that this picture is linked to. + * @param bse Record referencing this picture. Should be attached to the slideshow that this picture is linked to. + */ + @Internal + protected HSLFPictureData(EscherContainerRecord bStore, EscherBSERecord bse) { + this.bStore = Objects.requireNonNull(bStore); + this.bse = Objects.requireNonNull(bse); + } + + /** * Blip signature. */ protected abstract int getSignature(); @@ -95,17 +159,34 @@ public abstract class HSLFPictureData implements PictureData, GenericRecord { } /** - * Returns the raw binary data of this Picture excluding the first 8 bytes - * which hold image signature and size of the image data. + * Returns the formatted, binary data of this picture excluding the {@link #PREAMBLE_SIZE preamble} bytes. + * <p> + * Primarily intended for internal POI use. Use {@link #getData()} to retrieve the picture represented by this + * object. * - * @return picture data + * @return Picture data formatted for the HSLF format. + * @see #getData() + * @see #formatImageForSlideshow(byte[]) */ public byte[] getRawData(){ - return rawdata; + return formattedData; } + /** + * Sets the formatted data for this picture. + * <p> + * Primarily intended for internal POI use. Use {@link #setData(byte[])} to change the picture represented by this + * object. + * + * @param data Picture data formatted for the HSLF format. Excludes the {@link #PREAMBLE_SIZE preamble}. + * @see #setData(byte[]) + * @see #formatImageForSlideshow(byte[]) + * @deprecated Set image data using {@link #setData(byte[])}. + */ + @Deprecated + @Removal(version = "5.3") public void setRawData(byte[] data){ - rawdata = (data == null) ? null : data.clone(); + formattedData = (data == null) ? null : data.clone(); } /** @@ -114,7 +195,7 @@ public abstract class HSLFPictureData implements PictureData, GenericRecord { * @return offset in the 'Pictures' stream */ public int getOffset(){ - return offset; + return bse.getOffset(); } /** @@ -122,21 +203,25 @@ public abstract class HSLFPictureData implements PictureData, GenericRecord { * We need to set it when a new picture is created. * * @param offset in the 'Pictures' stream + * @deprecated This function was only intended for POI internal use. If you have a use case you're concerned about, + * please open an issue in the POI issue tracker. */ + @Deprecated + @Removal(version = "5.3") public void setOffset(int offset){ - this.offset = offset; + LOGGER.atWarn().log("HSLFPictureData#setOffset is deprecated."); } /** * Returns 16-byte checksum of this picture */ public byte[] getUID(){ - return Arrays.copyOf(rawdata, 16); + return Arrays.copyOf(formattedData, CHECKSUM_SIZE); } @Override public byte[] getChecksum() { - return getChecksum(getData()); + return getUID(); } /** @@ -173,25 +258,105 @@ public abstract class HSLFPictureData implements PictureData, GenericRecord { } /** - * Create an instance of <code>PictureData</code> by type. + * Create an instance of {@link HSLFPictureData} by type. * - * @param type type of the picture data. - * Must be one of the static constants defined in the <code>Picture<code> class. - * @return concrete instance of <code>PictureData</code> + * @param type type of picture. + * @return concrete instance of {@link HSLFPictureData}. + * @deprecated Use {@link HSLFSlideShow#addPicture(byte[], PictureType)} or one of it's overloads to create new + * {@link HSLFPictureData}. This API led to detached {@link HSLFPictureData} instances (See Bugzilla + * 46122) and prevented adding additional functionality. */ + @Deprecated + @Removal(version = "5.3") public static HSLFPictureData create(PictureType type){ - HSLFPictureData pict; - switch (type){ - case EMF: pict = new EMF(); break; - case WMF: pict = new WMF(); break; - case PICT: pict = new PICT(); break; - case JPEG: pict = new JPEG(); break; - case PNG: pict = new PNG(); break; - case DIB: pict = new DIB(); break; + LOGGER.atWarn().log("HSLFPictureData#create(PictureType) is deprecated. Some functionality such " + + "as updating pictures won't work."); + + // This record code is a stub. It exists only for API compatibility. + EscherContainerRecord record = new EscherContainerRecord(); + EscherBSERecord bse = new EscherBSERecord(); + return new HSLFSlideShowImpl.PictureFactory(record, type, new byte[0], 0, 0) + .setRecord(bse) + .build(); + } + + /** + * Creates a new instance of the given image type using data already formatted for storage inside the slideshow. + * <p> + * This function is most handy when parsing an existing slideshow, as the picture data are already formatted. + * @param type Image type. + * @param recordContainer Record tracking all pictures. Should be attached to the slideshow that this picture is + * linked to. + * @param bse Record referencing this picture. Should be attached to the slideshow that this picture is linked to. + * @param data Image data formatted for storage in the slideshow. This does not include the + * {@link #PREAMBLE_SIZE preamble}. + * @param signature Image format-specific signature. See subclasses for signature details. + * @return New instance. + * + * @see #createFromImageData(PictureType, EscherContainerRecord, EscherBSERecord, byte[]) + */ + static HSLFPictureData createFromSlideshowData( + PictureType type, + EscherContainerRecord recordContainer, + EscherBSERecord bse, + byte[] data, + int signature + ) { + HSLFPictureData instance = newInstance(type, recordContainer, bse); + instance.setSignature(signature); + instance.formattedData = data; + return instance; + } + + /** + * Creates a new instance of the given image type using data already formatted for storage inside the slideshow. + * <p> + * This function is most handy when adding new pictures to a slideshow, as the image data provided by users is not + * yet formatted. + * + * @param type Image type. + * @param recordContainer Record tracking all pictures. Should be attached to the slideshow that this picture is + * linked to. + * @param bse Record referencing this picture. Should be attached to the slideshow that this picture is linked to. + * @param data Original image data. If these bytes were written to a disk, a common image viewer would be able to + * render the image. + * @return New instance. + * + * @see #createFromSlideshowData(PictureType, EscherContainerRecord, EscherBSERecord, byte[], int) + * @see #setData(byte[]) + */ + static HSLFPictureData createFromImageData( + PictureType type, + EscherContainerRecord recordContainer, + EscherBSERecord bse, + byte[] data + ) { + HSLFPictureData instance = newInstance(type, recordContainer, bse); + instance.formattedData = instance.formatImageForSlideshow(data); + return instance; + } + + private static HSLFPictureData newInstance( + PictureType type, + EscherContainerRecord recordContainer, + EscherBSERecord bse + ) { + switch (type) { + case EMF: + return new EMF(recordContainer, bse); + case WMF: + return new WMF(recordContainer, bse); + case PICT: + return new PICT(recordContainer, bse); + case JPEG: + return new JPEG(recordContainer, bse); + case PNG: + return new PNG(recordContainer, bse); + case DIB: + return new DIB(recordContainer, bse); default: throw new IllegalArgumentException("Unsupported picture type: " + type); } - return pict; } /** @@ -204,14 +369,15 @@ public abstract class HSLFPictureData implements PictureData, GenericRecord { * @return the 24 byte header which preceeds the actual picture data. */ public byte[] getHeader() { - byte[] header = new byte[16 + 8]; + byte[] header = new byte[CHECKSUM_SIZE + PREAMBLE_SIZE]; LittleEndian.putInt(header, 0, getSignature()); LittleEndian.putInt(header, 4, getRawData().length); - System.arraycopy(rawdata, 0, header, 8, 16); + System.arraycopy(formattedData, 0, header, PREAMBLE_SIZE, CHECKSUM_SIZE); return header; } /** + * Returns the 1-based index of this picture. * @return the 1-based index of this pictures within the pictures stream */ public int getIndex() { @@ -225,6 +391,71 @@ public abstract class HSLFPictureData implements PictureData, GenericRecord { this.index = index; } + /** + * Formats the picture data for storage in the slideshow. + * <p> + * Images stored in {@link HSLFSlideShow}s are represented differently than when they are standalone files. The + * exact formatting differs for each image type. + * + * @param data Original image data. If these bytes were written to a disk, a common image viewer would be able to + * render the image. + * @return Formatted image representation. + */ + protected abstract byte[] formatImageForSlideshow(byte[] data); + + /** + * @return Size of this picture when stored in the image stream inside the {@link HSLFSlideShow}. + */ + int getBseSize() { + return formattedData.length + PREAMBLE_SIZE; + } + + @Override + public final void setData(byte[] data) throws IOException { + /* + * When working with slideshow pictures, we need to be aware of 2 container units. The first is a list of + * HSLFPictureData that are the programmatic reference for working with the pictures. The second is the + * Blip Store. For the purposes of this function, you can think of the Blip Store as containing a list of + * pointers (with a small summary) to the picture in the slideshow. + * + * When updating a picture, we need to update the in-memory data structure (this instance), but we also need to + * update the stored pointer. When modifying the pointer, we also need to modify all subsequent pointers, since + * they might shift based on a change in the byte count of the underlying image. + */ + int oldSize = getBseSize(); + formattedData = formatImageForSlideshow(data); + int newSize = getBseSize(); + int changeInSize = newSize - oldSize; + byte[] newUid = getUID(); + + boolean foundBseForOldImage = false; + + // Get the BSE records & sort the list by offset, so we can proceed to shift offsets + @SuppressWarnings("unchecked") // The BStore only contains BSE records + List<EscherBSERecord> bseRecords = (List<EscherBSERecord>) (Object) bStore.getChildRecords(); + bseRecords.sort(Comparator.comparingInt(EscherBSERecord::getOffset)); + + for (EscherBSERecord bse : bseRecords) { + + if (foundBseForOldImage) { + + // The BSE for this picture was modified in a previous iteration, and we are now adjusting + // subsequent offsets. + bse.setOffset(bse.getOffset() + changeInSize); + + } else if (bse == this.bse) { // Reference equals is safe because these BSE belong to the same slideshow + + // This BSE matches the current image. Update the size and UID. + foundBseForOldImage = true; + + bse.setUid(newUid); + + // Image byte count may have changed, so update the pointer. + bse.setSize(newSize); + } + } + } + @Override public final String getContentType() { return getType().contentType; diff --git a/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFPictureShape.java b/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFPictureShape.java index 49921dc81b..548fdff3d1 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFPictureShape.java +++ b/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFPictureShape.java @@ -125,7 +125,9 @@ public class HSLFPictureShape extends HSLFSimpleShape implements PictureShape<HS LOG.atError().log("no reference to picture data found "); } else { for (HSLFPictureData pd : pict) { - if (pd.getOffset() == bse.getOffset()){ + + // Reference equals is safe because these BSE belong to the same slideshow + if (pd.bse == bse) { return pd; } } diff --git a/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFSlideShow.java b/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFSlideShow.java index 908007b4c7..7066140bd6 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFSlideShow.java +++ b/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFSlideShow.java @@ -794,36 +794,11 @@ public final class HSLFSlideShow extends POIDocument implements SlideShow<HSLFSh dggContainer.addChildBefore(bstore, EscherOptRecord.RECORD_ID); } - HSLFPictureData pict = HSLFPictureData.create(format); - pict.setData(data); + EscherBSERecord bse = addNewEscherBseRecord(bstore, format, data, 0); + HSLFPictureData pict = HSLFPictureData.createFromImageData(format, bstore, bse, data); int offset = _hslfSlideShow.addPicture(pict); - - EscherBSERecord bse = new EscherBSERecord(); - bse.setRecordId(EscherBSERecord.RECORD_ID); - bse.setOptions((short) (0x0002 | (format.nativeId << 4))); - bse.setSize(pict.getRawData().length + 8); - byte[] uid = HSLFPictureData.getChecksum(data); - bse.setUid(uid); - - bse.setBlipTypeMacOS((byte) format.nativeId); - bse.setBlipTypeWin32((byte) format.nativeId); - - if (format == PictureType.EMF) { - bse.setBlipTypeMacOS((byte) PictureType.PICT.nativeId); - } else if (format == PictureType.WMF) { - bse.setBlipTypeMacOS((byte) PictureType.PICT.nativeId); - } else if (format == PictureType.PICT) { - bse.setBlipTypeWin32((byte) PictureType.WMF.nativeId); - } - - bse.setRef(0); bse.setOffset(offset); - bse.setRemainingData(new byte[0]); - - bstore.addChildRecord(bse); - int count = bstore.getChildRecords().size(); - bstore.setOptions((short) ((count << 4) | 0xF)); return pict; } @@ -1273,4 +1248,31 @@ public final class HSLFSlideShow extends POIDocument implements SlideShow<HSLFSh public EncryptionInfo getEncryptionInfo() throws IOException { return getSlideShowImpl().getEncryptionInfo(); } + + static EscherBSERecord addNewEscherBseRecord(EscherContainerRecord blipStore, PictureType type, byte[] imageData, int offset) { + EscherBSERecord record = new EscherBSERecord(); + record.setRecordId(EscherBSERecord.RECORD_ID); + record.setOptions((short) (0x0002 | (type.nativeId << 4))); + record.setSize(imageData.length + HSLFPictureData.PREAMBLE_SIZE); + record.setUid(Arrays.copyOf(imageData, HSLFPictureData.CHECKSUM_SIZE)); + + record.setBlipTypeMacOS((byte) type.nativeId); + record.setBlipTypeWin32((byte) type.nativeId); + + if (type == PictureType.EMF) { + record.setBlipTypeMacOS((byte) PictureType.PICT.nativeId); + } else if (type == PictureType.WMF) { + record.setBlipTypeMacOS((byte) PictureType.PICT.nativeId); + } else if (type == PictureType.PICT) { + record.setBlipTypeWin32((byte) PictureType.WMF.nativeId); + } + + record.setOffset(offset); + + blipStore.addChildRecord(record); + int count = blipStore.getChildRecords().size(); + blipStore.setOptions((short) ((count << 4) | 0xF)); + + return record; + } } diff --git a/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFSlideShowImpl.java b/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFSlideShowImpl.java index 9c663e385f..f697e859ce 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFSlideShowImpl.java +++ b/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFSlideShowImpl.java @@ -30,21 +30,33 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; import java.util.Collections; +import java.util.Comparator; import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.NavigableMap; +import java.util.Objects; import java.util.TreeMap; +import java.util.stream.Collectors; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.poi.POIDocument; +import org.apache.poi.ddf.EscherBSERecord; +import org.apache.poi.ddf.EscherContainerRecord; +import org.apache.poi.ddf.EscherOptRecord; +import org.apache.poi.ddf.EscherRecord; import org.apache.poi.hpsf.PropertySet; import org.apache.poi.hslf.exceptions.CorruptPowerPointFileException; import org.apache.poi.hslf.exceptions.HSLFException; import org.apache.poi.hslf.exceptions.OldPowerPointFormatException; import org.apache.poi.hslf.record.CurrentUserAtom; +import org.apache.poi.hslf.record.Document; import org.apache.poi.hslf.record.DocumentEncryptionAtom; import org.apache.poi.hslf.record.ExOleObjStg; import org.apache.poi.hslf.record.PersistPtrHolder; @@ -59,6 +71,7 @@ import org.apache.poi.poifs.filesystem.DocumentEntry; import org.apache.poi.poifs.filesystem.DocumentInputStream; import org.apache.poi.poifs.filesystem.EntryUtils; import org.apache.poi.poifs.filesystem.POIFSFileSystem; +import org.apache.poi.sl.usermodel.PictureData; import org.apache.poi.sl.usermodel.PictureData.PictureType; import org.apache.poi.util.IOUtils; import org.apache.poi.util.LittleEndian; @@ -347,23 +360,26 @@ public final class HSLFSlideShowImpl extends POIDocument implements Closeable { * This is lazily called as and when we want to touch pictures. */ private void readPictures() throws IOException { - _pictures = new ArrayList<>(); - // if the presentation doesn't contain pictures - will use a null set instead + // if the presentation doesn't contain pictures, will use an empty collection instead if (!getDirectory().hasEntry("Pictures")) { + _pictures = new ArrayList<>(); return; } DocumentEntry entry = (DocumentEntry) getDirectory().getEntry("Pictures"); - DocumentInputStream is = getDirectory().createDocumentInputStream(entry); - byte[] pictstream = IOUtils.toByteArray(is, entry.getSize()); - is.close(); + EscherContainerRecord blipStore = getBlipStore(); + byte[] pictstream; + try (DocumentInputStream is = getDirectory().createDocumentInputStream(entry)) { + pictstream = IOUtils.toByteArray(is, entry.getSize()); + } + List<PictureFactory> factories = new ArrayList<>(); try (HSLFSlideShowEncrypted decryptData = new HSLFSlideShowEncrypted(getDocumentEncryptionAtom())) { int pos = 0; // An empty picture record (length 0) will take up 8 bytes - while (pos <= (pictstream.length - 8)) { + while (pos <= (pictstream.length - HSLFPictureData.PREAMBLE_SIZE)) { int offset = pos; decryptData.decryptPicture(pictstream, offset); @@ -388,7 +404,7 @@ public final class HSLFSlideShowImpl extends POIDocument implements Closeable { // (0 is allowed, but odd, since we do wind on by the header each // time, so we won't get stuck) if (imgsize < 0) { - throw new CorruptPowerPointFileException("The file contains a picture, at position " + _pictures.size() + ", which has a negatively sized data length, so we can't trust any of the picture data"); + throw new CorruptPowerPointFileException("The file contains a picture, at position " + factories.size() + ", which has a negatively sized data length, so we can't trust any of the picture data"); } // If the type (including the bonus 0xF018) is 0, skip it @@ -404,26 +420,127 @@ public final class HSLFSlideShowImpl extends POIDocument implements Closeable { "in others, this could indicate a corrupt file"); break; } - // Build the PictureData object from the data - try { - HSLFPictureData pict = HSLFPictureData.create(pt); - pict.setSignature(signature); - - // Copy the data, ready to pass to PictureData - byte[] imgdata = IOUtils.safelyClone(pictstream, pos, imgsize, MAX_RECORD_LENGTH); - pict.setRawData(imgdata); - - pict.setOffset(offset); - pict.setIndex(_pictures.size() + 1); // index is 1-based - _pictures.add(pict); - } catch (IllegalArgumentException e) { - LOG.atError().withThrowable(e).log("Problem reading picture. Your document will probably become corrupted if you save it!"); - } + + // Copy the data, ready to pass to PictureData + byte[] imgdata = IOUtils.safelyClone(pictstream, pos, imgsize, MAX_RECORD_LENGTH); + + factories.add(new PictureFactory(blipStore, pt, imgdata, offset, signature)); } pos += imgsize; } } + + matchPicturesAndRecords(factories, blipStore); + + List<HSLFPictureData> pictures = new ArrayList<>(); + for (PictureFactory it : factories) { + try { + HSLFPictureData pict = it.build(); + + pict.setIndex(pictures.size() + 1); // index is 1-based + pictures.add(pict); + } catch (IllegalArgumentException e) { + LOG.atError().withThrowable(e).log("Problem reading picture. Your document will probably become corrupted if you save it!"); + } + } + + _pictures = pictures; + } + + /** + * Matches all of the {@link PictureFactory PictureFactories} for a slideshow with {@link EscherBSERecord}s in the + * Blip Store for the slideshow. + * <p> + * When reading a slideshow into memory, we have to match the records in the Blip Store with the factories + * representing picture in the pictures stream. This can be difficult, as presentations might have incorrectly + * formatted data. This function attempts to perform matching using multiple heuristics to increase the likelihood + * of finding all pairs, while aiming to reduce the likelihood of associating incorrect pairs. + * + * @param factories Factories for creating {@link HSLFPictureData} out of the pictures stream. + * @param blipStore Blip Store of the presentation being loaded. + */ + private static void matchPicturesAndRecords(List<PictureFactory> factories, EscherContainerRecord blipStore) { + // LinkedList because we're sorting and removing. + LinkedList<PictureFactory> unmatchedFactories = new LinkedList<>(factories); + unmatchedFactories.sort(Comparator.comparingInt(PictureFactory::getOffset)); + + // Arrange records by offset. In the common case of a well-formed slideshow, where every factory has a + // matching record, this is somewhat wasteful, but is necessary to handle the uncommon case where multiple + // records share an offset. + Map<Integer, List<EscherBSERecord>> unmatchedRecords = new HashMap<>(); + for (EscherRecord child : blipStore) { + EscherBSERecord record = (EscherBSERecord) child; + unmatchedRecords.computeIfAbsent(record.getOffset(), k -> new ArrayList<>()).add(record); + } + + // The first pass through the factories only pairs a factory with a record if we're very confident that they + // are a match. Confidence comes from a perfect match on the offset, and if necessary, the UID. Matched + // factories and records are removed from the unmatched collections. + for (Iterator<PictureFactory> iterator = unmatchedFactories.iterator(); iterator.hasNext(); ) { + PictureFactory factory = iterator.next(); + int physicalOffset = factory.getOffset(); + List<EscherBSERecord> recordsAtOffset = unmatchedRecords.get(physicalOffset); + + if (recordsAtOffset == null || recordsAtOffset.isEmpty()) { + // There are no records that have an offset matching the physical offset in the stream. We'll do + // more complicated and less reliable matching for this factory after all "well known" + // image <-> record pairs have been found. + LOG.atDebug().log("No records with offset {}", box(physicalOffset)); + } else if (recordsAtOffset.size() == 1) { + // Only 1 record has the same offset as the target image. Assume these are a pair. + factory.setRecord(recordsAtOffset.get(0)); + unmatchedRecords.remove(physicalOffset); + iterator.remove(); + } else { + + // Multiple records share an offset. Perform additional matching based on UID. + for (int i = 0; i < recordsAtOffset.size(); i++) { + EscherBSERecord record = recordsAtOffset.get(i); + byte[] recordUid = record.getUid(); + byte[] imageHeader = Arrays.copyOf(factory.imageData, HSLFPictureData.CHECKSUM_SIZE); + if (Arrays.equals(recordUid, imageHeader)) { + factory.setRecord(record); + recordsAtOffset.remove(i); + iterator.remove(); + break; + } + } + } + } + + // At this point, any factories remaining didn't have a record with a matching offset. The second pass + // through the factories pairs based on the UID. Factories for which a record with a matching UID cannot be + // found will get a new record. + List<EscherBSERecord> remainingRecords = unmatchedRecords.values() + .stream() + .flatMap(Collection::stream) + .collect(Collectors.toList()); + + for (PictureFactory factory : unmatchedFactories) { + + boolean matched = false; + for (int i = remainingRecords.size() - 1; i >= 0; i--) { + EscherBSERecord record = remainingRecords.get(i); + byte[] recordUid = record.getUid(); + byte[] imageHeader = Arrays.copyOf(factory.imageData, HSLFPictureData.CHECKSUM_SIZE); + if (Arrays.equals(recordUid, imageHeader)) { + remainingRecords.remove(i); + factory.setRecord(record); + record.setOffset(factory.getOffset()); + matched = true; + } + } + + if (!matched) { + // Synthesize a new record + LOG.atDebug().log("No record found for picture at offset {}", box(factory.offset)); + EscherBSERecord record = HSLFSlideShow.addNewEscherBseRecord(blipStore, factory.type, factory.imageData, factory.offset); + factory.setRecord(record); + } + } + + LOG.atDebug().log("Found {} unmatched records.", box(remainingRecords.size())); } /** @@ -756,9 +873,8 @@ public final class HSLFSlideShowImpl extends POIDocument implements Closeable { int offset = 0; if (_pictures.size() > 0) { HSLFPictureData prev = _pictures.get(_pictures.size() - 1); - offset = prev.getOffset() + prev.getRawData().length + 8; + offset = prev.getOffset() + prev.getBseSize(); } - img.setOffset(offset); img.setIndex(_pictures.size() + 1); // index is 1-based _pictures.add(img); return offset; @@ -825,6 +941,32 @@ public final class HSLFSlideShowImpl extends POIDocument implements Closeable { return _objects; } + private EscherContainerRecord getBlipStore() { + Document documentRecord = null; + for (Record record : _records) { + if (record.getRecordType() == RecordTypes.Document.typeID) { + documentRecord = (Document) record; + break; + } + } + + if (documentRecord == null) { + throw new CorruptPowerPointFileException("Document record is missing"); + } + + EscherContainerRecord blipStore; + + EscherContainerRecord dggContainer = documentRecord.getPPDrawingGroup().getDggContainer(); + blipStore = HSLFShape.getEscherChild(dggContainer, EscherContainerRecord.BSTORE_CONTAINER); + if (blipStore == null) { + blipStore = new EscherContainerRecord(); + blipStore.setRecordId(EscherContainerRecord.BSTORE_CONTAINER); + + dggContainer.addChildBefore(blipStore, EscherOptRecord.RECORD_ID); + } + return blipStore; + } + @Override public void close() throws IOException { // only close the filesystem, if we are based on the root node. @@ -903,4 +1045,55 @@ public final class HSLFSlideShowImpl extends POIDocument implements Closeable { return count; } } + + /** + * Assists in creating {@link HSLFPictureData} when parsing a slideshow. + * + * This class is relied upon heavily by {@link #matchPicturesAndRecords(List, EscherContainerRecord)}. + */ + static final class PictureFactory { + final byte[] imageData; + + private final EscherContainerRecord recordContainer; + private final PictureData.PictureType type; + private final int offset; + private final int signature; + private EscherBSERecord record; + + PictureFactory( + EscherContainerRecord recordContainer, + PictureData.PictureType type, + byte[] imageData, + int offset, + int signature + ) { + this.recordContainer = Objects.requireNonNull(recordContainer); + this.type = Objects.requireNonNull(type); + this.imageData = Objects.requireNonNull(imageData); + this.offset = offset; + this.signature = signature; + } + + int getOffset() { + return offset; + } + + /** + * Constructs a new {@link HSLFPictureData}. + * <p> + * The {@link EscherBSERecord} must have been set via {@link #setRecord(EscherBSERecord)} prior to invocation. + */ + HSLFPictureData build() { + Objects.requireNonNull(record, "Can't build an instance until the record has been assigned."); + return HSLFPictureData.createFromSlideshowData(type, recordContainer, record, imageData, signature); + } + + /** + * Sets the {@link EscherBSERecord} with which this factory should create a {@link HSLFPictureData}. + */ + PictureFactory setRecord(EscherBSERecord bse) { + record = bse; + return this; + } + } } diff --git a/src/scratchpad/testcases/org/apache/poi/hslf/dev/BaseTestPPTIterating.java b/src/scratchpad/testcases/org/apache/poi/hslf/dev/BaseTestPPTIterating.java index 869b96e4b8..6722f00f41 100644 --- a/src/scratchpad/testcases/org/apache/poi/hslf/dev/BaseTestPPTIterating.java +++ b/src/scratchpad/testcases/org/apache/poi/hslf/dev/BaseTestPPTIterating.java @@ -55,6 +55,7 @@ public abstract class BaseTestPPTIterating { ENCRYPTED_FILES.add("Password_Protected-np-hello.ppt"); ENCRYPTED_FILES.add("Password_Protected-56-hello.ppt"); ENCRYPTED_FILES.add("Password_Protected-hello.ppt"); + ENCRYPTED_FILES.add("ppt_with_png_encrypted.ppt"); } protected static final Map<String,Class<? extends Throwable>> EXCLUDED = diff --git a/src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestPicture.java b/src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestPicture.java index e514f78835..55a50be321 100644 --- a/src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestPicture.java +++ b/src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestPicture.java @@ -35,6 +35,7 @@ import javax.imageio.ImageIO; import org.apache.poi.POIDataSamples; import org.apache.poi.ddf.EscherBSERecord; +import org.apache.poi.ddf.EscherContainerRecord; import org.apache.poi.sl.usermodel.PictureData.PictureType; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; @@ -87,16 +88,19 @@ public final class TestPicture { } /** - * Picture#getEscherBSERecord threw NullPointerException if EscherContainerRecord.BSTORE_CONTAINER - * was not found. The correct behaviour is to return null. + * {@link HSLFPictureShape#getEscherBSERecord()} threw {@link NullPointerException} if + * {@link EscherContainerRecord#BSTORE_CONTAINER} was not found. The correct behaviour is to return null. */ @Test void bug46122() throws IOException { + HSLFPictureData detachedData; + try (HSLFSlideShow ppt = new HSLFSlideShow()) { + detachedData = ppt.addPicture(new byte[0], PictureType.PNG); + } try (HSLFSlideShow ppt = new HSLFSlideShow()) { HSLFSlide slide = ppt.createSlide(); - HSLFPictureData pd = HSLFPictureData.create(PictureType.PNG); - HSLFPictureShape pict = new HSLFPictureShape(pd); //index to non-existing picture data + HSLFPictureShape pict = new HSLFPictureShape(detachedData); //index to non-existing picture data pict.setAnchor(new Rectangle2D.Double(50, 50, 100, 100)); pict.setSheet(slide); HSLFPictureData data = pict.getPictureData(); diff --git a/src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestPictures.java b/src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestPictures.java index fce2e1804d..40076c758f 100644 --- a/src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestPictures.java +++ b/src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestPictures.java @@ -27,9 +27,14 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; import java.net.URL; import java.util.Arrays; +import java.util.Collections; import java.util.List; +import java.util.Random; import org.apache.poi.POIDataSamples; +import org.apache.poi.ddf.EscherBSERecord; +import org.apache.poi.ddf.EscherContainerRecord; +import org.apache.poi.ddf.EscherRecord; import org.apache.poi.hslf.HSLFTestDataSamples; import org.apache.poi.hslf.blip.DIB; import org.apache.poi.hslf.blip.EMF; @@ -37,6 +42,7 @@ import org.apache.poi.hslf.blip.JPEG; import org.apache.poi.hslf.blip.PICT; import org.apache.poi.hslf.blip.PNG; import org.apache.poi.hslf.blip.WMF; +import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey; import org.apache.poi.sl.image.ImageHeaderEMF; import org.apache.poi.sl.image.ImageHeaderPICT; import org.apache.poi.sl.image.ImageHeaderWMF; @@ -512,9 +518,8 @@ public final class TestPictures { int streamSize = out.size(); - HSLFPictureData data = HSLFPictureData.create(PictureType.JPEG); - data.setData(new byte[100]); - int offset = hslf.addPicture(data); + HSLFPictureData data = ppt.addPicture(new byte[100], PictureType.JPEG); + int offset = data.getOffset(); assertEquals(streamSize, offset); assertEquals(3, ppt.getPictureData().size()); @@ -560,4 +565,172 @@ public final class TestPictures { assertEquals(1, picture.getIndex()); } } + + /** + * Verify that it is possible for a user to change the contents of a {@link HSLFPictureData} using + * {@link HSLFPictureData#setData(byte[])}, and that the changes are saved to the slideshow. + */ + @Test + void testEditPictureData() throws IOException { + byte[] newImage = slTests.readFile("tomcat.png"); + ByteArrayOutputStream modifiedSlideShow = new ByteArrayOutputStream(); + + // Load an existing slideshow and modify the image + try (HSLFSlideShow ppt = HSLFTestDataSamples.getSlideShow("ppt_with_png.ppt")) { + HSLFPictureData picture = ppt.getPictureData().get(0); + picture.setData(newImage); + ppt.write(modifiedSlideShow); + } + + // Load the modified slideshow and verify the image content + try (HSLFSlideShow ppt = new HSLFSlideShow(new ByteArrayInputStream(modifiedSlideShow.toByteArray()))) { + HSLFPictureData picture = ppt.getPictureData().get(0); + byte[] modifiedImageData = picture.getData(); + assertArrayEquals(newImage, modifiedImageData); + } + } + + /** + * Verify that it is possible for a user to change the contents of an encrypted {@link HSLFPictureData} using + * {@link HSLFPictureData#setData(byte[])}, and that the changes are saved to the slideshow. + */ + @Test + void testEditPictureDataEncrypted() throws IOException { + byte[] newImage = slTests.readFile("tomcat.png"); + ByteArrayOutputStream modifiedSlideShow = new ByteArrayOutputStream(); + + Biff8EncryptionKey.setCurrentUserPassword("password"); + try { + // Load an existing slideshow and modify the image + try (HSLFSlideShow ppt = HSLFTestDataSamples.getSlideShow("ppt_with_png_encrypted.ppt")) { + HSLFPictureData picture = ppt.getPictureData().get(0); + picture.setData(newImage); + ppt.write(modifiedSlideShow); + } + + // Load the modified slideshow and verify the image content + try (HSLFSlideShow ppt = new HSLFSlideShow(new ByteArrayInputStream(modifiedSlideShow.toByteArray()))) { + HSLFPictureData picture = ppt.getPictureData().get(0); + byte[] modifiedImageData = picture.getData(); + assertArrayEquals(newImage, modifiedImageData); + } + } finally { + Biff8EncryptionKey.setCurrentUserPassword(null); + } + } + + /** + * Verify that the {@link EscherBSERecord#getOffset()} values are modified for all images after the image being + * changed. + */ + @Test + void testEditPictureDataRecordOffsetsAreShifted() throws IOException { + int[] originalOffsets = {0, 12013, 15081, 34162, 59563}; + int[] modifiedOffsets = {0, 35, 3103, 22184, 47585}; + + ByteArrayOutputStream inMemory = new ByteArrayOutputStream(); + try (HSLFSlideShow ppt = HSLFTestDataSamples.getSlideShow("pictures.ppt")) { + int[] offsets = ppt.getPictureData().stream().mapToInt(HSLFPictureData::getOffset).toArray(); + assertArrayEquals(originalOffsets, offsets); + + HSLFPictureData imageBeingChanged = ppt.getPictureData().get(0); + // It doesn't matter that this isn't a valid image. We are just testing offsets here. + imageBeingChanged.setData(new byte[10]); + + // Verify that the in-memory representations have all been updated + offsets = ppt.getPictureData().stream().mapToInt(HSLFPictureData::getOffset).toArray(); + assertArrayEquals(modifiedOffsets, offsets); + + ppt.write(inMemory); + } + + try (HSLFSlideShow ppt = new HSLFSlideShow(new ByteArrayInputStream(inMemory.toByteArray()))) { + + // Verify that the persisted representations have all been updated + int[] offsets = ppt.getPictureData().stream().mapToInt(HSLFPictureData::getOffset).toArray(); + assertArrayEquals(modifiedOffsets, offsets); + } + } + + /** + * Verify that the {@link EscherBSERecord#getOffset()} values are modified for all images after the image being + * changed, but assuming that the records are not stored in a sorted-by-offset fashion. + * + * We have not encountered a file that has meaningful data that is not sorted. However, we have encountered files + * that have records with an offset of 0 interspersed between meaningful records. See {@code 53446.ppt} and + * {@code alterman_security.ppt} for examples. + */ + @Test + void testEditPictureDataOutOfOrderRecords() throws IOException { + int[] modifiedOffsets = {0, 35, 3103, 22184, 47585}; + + ByteArrayOutputStream inMemory = new ByteArrayOutputStream(); + try (HSLFSlideShow ppt = HSLFTestDataSamples.getSlideShow("pictures.ppt")) { + + // For this test we're going to intentionally manipulate the records into a shuffled order. + EscherContainerRecord container = ppt.getPictureData().get(0).bStore; + List<EscherRecord> children = container.getChildRecords(); + for (EscherRecord child : children) { + container.removeChildRecord(child); + } + Collections.shuffle(children); + for (EscherRecord child : children) { + container.addChildRecord(child); + } + + HSLFPictureData imageBeingChanged = ppt.getPictureData().get(0); + // It doesn't matter that this isn't a valid image. We are just testing offsets here. + imageBeingChanged.setData(new byte[10]); + + // Verify that the in-memory representations have all been updated + int[] offsets = ppt.getPictureData().stream().mapToInt(HSLFPictureData::getOffset).toArray(); + Arrays.sort(offsets); + assertArrayEquals(modifiedOffsets, offsets); + + ppt.write(inMemory); + } + + try (HSLFSlideShow ppt = new HSLFSlideShow(new ByteArrayInputStream(inMemory.toByteArray()))) { + + // Verify that the persisted representations have all been updated + int[] offsets = ppt.getPictureData().stream().mapToInt(HSLFPictureData::getOffset).toArray(); + Arrays.sort(offsets); + assertArrayEquals(modifiedOffsets, offsets); + } + } + + /** + * Verify that a slideshow with records that have offsets not matching those of the pictures in the stream still + * correctly pairs the records and pictures. + */ + @Test + void testSlideshowWithIncorrectOffsets() throws IOException { + int[] originalOffsets; + int originalNumberOfRecords; + + // Create a presentation that has records with unmatched offsets, but with matched UIDs. + ByteArrayOutputStream inMemory = new ByteArrayOutputStream(); + try (HSLFSlideShow ppt = HSLFTestDataSamples.getSlideShow("pictures.ppt")) { + originalOffsets = ppt.getPictureData().stream().mapToInt(HSLFPictureData::getOffset).toArray(); + originalNumberOfRecords = ppt.getPictureData().get(0).bStore.getChildRecords().size(); + + Random random = new Random(); + for (HSLFPictureData picture : ppt.getPictureData()) { + // Bound is arbitrary and irrelevant to the test. + picture.bse.setOffset(random.nextInt(500_000)); + } + ppt.write(inMemory); + } + + try (HSLFSlideShow ppt = new HSLFSlideShow(new ByteArrayInputStream(inMemory.toByteArray()))) { + + // Verify that the offsets all got fixed. + int[] offsets = ppt.getPictureData().stream().mapToInt(HSLFPictureData::getOffset).toArray(); + assertArrayEquals(originalOffsets, offsets); + + // Verify that there are the same number of records as in the original slideshow. + int numberOfRecords = ppt.getPictureData().get(0).bStore.getChildRecords().size(); + assertEquals(originalNumberOfRecords, numberOfRecords); + } + } } |