Przeglądaj źródła

Bug 54332 - WMF extraction failing in Tika for older PowerPoint Files

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1687398 13f79535-47bb-0310-9956-ffa450edef68
tags/REL_3_13_BETA1
Andreas Beeker 9 lat temu
rodzic
commit
1b1d5835b6

+ 1
- 0
src/scratchpad/src/org/apache/poi/hslf/HSLFSlideShow.java Wyświetl plik

// Build the PictureData object from the data // Build the PictureData object from the data
try { try {
PictureData pict = PictureData.create(type - 0xF018); PictureData pict = PictureData.create(type - 0xF018);
pict.setSignature(signature);


// Copy the data, ready to pass to PictureData // Copy the data, ready to pass to PictureData
byte[] imgdata = new byte[imgsize]; byte[] imgdata = new byte[imgsize];

+ 7
- 4
src/scratchpad/src/org/apache/poi/hslf/blip/Bitmap.java Wyświetl plik



public byte[] getData(){ public byte[] getData(){
byte[] rawdata = getRawData(); byte[] rawdata = getRawData();
byte[] imgdata = new byte[rawdata.length-17];
System.arraycopy(rawdata, 17, imgdata, 0, imgdata.length);
int prefixLen = 16*uidInstanceCount+1;
byte[] imgdata = new byte[rawdata.length-prefixLen];
System.arraycopy(rawdata, prefixLen, imgdata, 0, imgdata.length);
return imgdata; return imgdata;
} }


public void setData(byte[] data) throws IOException { public void setData(byte[] data) throws IOException {
ByteArrayOutputStream out = new ByteArrayOutputStream(); ByteArrayOutputStream out = new ByteArrayOutputStream();
byte[] checksum = getChecksum(data);
out.write(checksum);
for (int i=0; i<uidInstanceCount; i++) {
byte[] checksum = getChecksum(data);
out.write(checksum);
}
out.write(0); out.write(0);
out.write(data); out.write(data);



+ 20
- 4
src/scratchpad/src/org/apache/poi/hslf/blip/DIB.java Wyświetl plik

/** /**
* Size of the BITMAPFILEHEADER structure preceding the actual DIB bytes * Size of the BITMAPFILEHEADER structure preceding the actual DIB bytes
*/ */
public static final int HEADER_SIZE = 14;
private static final int HEADER_SIZE = 14;


/** /**
* @return type of this picture * @return type of this picture
} }


/** /**
* DIB signature is <code>0x7A80</code>
* DIB signature is {@code 0x7A80} or {@code 0x7A90}
* *
* @return DIB signature (<code>0x7A80</code>)
* @return DIB signature ({@code 0x7A80} or {@code 0x7A90})
*/ */
public int getSignature(){ public int getSignature(){
return 0x7A80;
return (uidInstanceCount == 1 ? 0x7A80 : 0x7A90);
} }

/**
* Sets the DIB signature - either {@code 0x7A80} or {@code 0x7A90}
*/
public void setSignature(int signature) {
switch (signature) {
case 0x7A80:
uidInstanceCount = 1;
break;
case 0x7A90:
uidInstanceCount = 2;
break;
default:
throw new IllegalArgumentException(signature+" is not a valid instance/signature value for DIB");
}
}
public byte[] getData(){ public byte[] getData(){
return addBMPHeader ( super.getData() ); return addBMPHeader ( super.getData() );

+ 20
- 4
src/scratchpad/src/org/apache/poi/hslf/blip/EMF.java Wyświetl plik

} }


/** /**
* EMF signature is <code>0x3D40</code>
* EMF signature is {@code 0x3D40} or {@code 0x3D50}
* *
* @return EMF signature (<code>0x3D40</code>)
* @return EMF signature ({@code 0x3D40} or {@code 0x3D50})
*/ */
public int getSignature(){
return 0x3D40;
public int getSignature() {
return (uidInstanceCount == 1 ? 0x3D40 : 0x3D50);
}
/**
* Sets the EMF signature - either {@code 0x3D40} or {@code 0x3D50}
*/
public void setSignature(int signature) {
switch (signature) {
case 0x3D40:
uidInstanceCount = 1;
break;
case 0x3D50:
uidInstanceCount = 2;
break;
default:
throw new IllegalArgumentException(signature+" is not a valid instance/signature value for EMF");
}
} }
} }

+ 43
- 3
src/scratchpad/src/org/apache/poi/hslf/blip/JPEG.java Wyświetl plik

*/ */
public final class JPEG extends Bitmap { public final class JPEG extends Bitmap {


public enum ColorSpace { rgb, cymk };
private ColorSpace colorSpace = ColorSpace.rgb;
/** /**
* @return type of this picture * @return type of this picture
* @see org.apache.poi.hslf.model.Picture#JPEG * @see org.apache.poi.hslf.model.Picture#JPEG
return Picture.JPEG; return Picture.JPEG;
} }


public ColorSpace getColorSpace() {
return colorSpace;
}
public void setColorSpace(ColorSpace colorSpace) {
this.colorSpace = colorSpace;
}
/** /**
* JPEG signature is <code>0x46A0</code>
* JPEG signature is one of {@code 0x46A0, 0x46B0, 0x6E20, 0x6E30}
* *
* @return JPEG signature (<code>0x46A0</code>)
* @return JPEG signature ({@code 0x46A0, 0x46B0, 0x6E20, 0x6E30})
*/ */
public int getSignature(){ public int getSignature(){
return 0x46A0;
return (colorSpace == ColorSpace.rgb)
? (uidInstanceCount == 1 ? 0x46A0 : 0x46B0)
: (uidInstanceCount == 1 ? 0x6E20 : 0x6E30);
} }
/**
* Sets the PICT signature - either {@code 0x5420} or {@code 0x5430}
*/
public void setSignature(int signature) {
switch (signature) {
case 0x46A0:
uidInstanceCount = 1;
colorSpace = ColorSpace.rgb;
break;
case 0x46B0:
uidInstanceCount = 2;
colorSpace = ColorSpace.rgb;
break;
case 0x6E20:
uidInstanceCount = 1;
colorSpace = ColorSpace.cymk;
break;
case 0x6E30:
uidInstanceCount = 2;
colorSpace = ColorSpace.cymk;
break;
default:
throw new IllegalArgumentException(signature+" is not a valid instance/signature value for JPEG");
}
}
} }

+ 2
- 2
src/scratchpad/src/org/apache/poi/hslf/blip/Metafile.java Wyświetl plik



zipsize = LittleEndian.getInt(data, pos); pos += LittleEndian.INT_SIZE; zipsize = LittleEndian.getInt(data, pos); pos += LittleEndian.INT_SIZE;


compression = LittleEndian.getUnsignedByte(data, pos); pos++;
filter = LittleEndian.getUnsignedByte(data, pos); pos++;
compression = LittleEndian.getUByte(data, pos); pos++;
filter = LittleEndian.getUByte(data, pos); pos++;
} }


public void write(OutputStream out) throws IOException { public void write(OutputStream out) throws IOException {

+ 19
- 8
src/scratchpad/src/org/apache/poi/hslf/blip/PICT.java Wyświetl plik

*/ */
public final class PICT extends Metafile { public final class PICT extends Metafile {


public PICT(){
super();
}

/** /**
* Extract compressed PICT data from a ppt * Extract compressed PICT data from a ppt
*/ */
byte[] macheader = new byte[512]; byte[] macheader = new byte[512];
ByteArrayOutputStream out = new ByteArrayOutputStream(); ByteArrayOutputStream out = new ByteArrayOutputStream();
out.write(macheader); out.write(macheader);
int pos = CHECKSUM_SIZE;
int pos = CHECKSUM_SIZE*uidInstanceCount;
byte[] pict; byte[] pict;
try { try {
pict = read(rawdata, pos); pict = read(rawdata, pos);
} }


/** /**
* PICT signature is <code>0x5430</code>
* PICT signature is {@code 0x5420} or {@code 0x5430}
* *
* @return PICT signature (<code>0x5430</code>)
* @return PICT signature ({@code 0x5420} or {@code 0x5430})
*/ */
public int getSignature(){ public int getSignature(){
return 0x5430;
return (uidInstanceCount == 1 ? 0x5420 : 0x5430);
} }


/**
* Sets the PICT signature - either {@code 0x5420} or {@code 0x5430}
*/
public void setSignature(int signature) {
switch (signature) {
case 0x5420:
uidInstanceCount = 1;
break;
case 0x5430:
uidInstanceCount = 2;
break;
default:
throw new IllegalArgumentException(signature+" is not a valid instance/signature value for PICT");
}
}
} }

+ 20
- 10
src/scratchpad/src/org/apache/poi/hslf/blip/PNG.java Wyświetl plik



package org.apache.poi.hslf.blip; package org.apache.poi.hslf.blip;


import org.apache.poi.util.PngUtils;
import org.apache.poi.hslf.model.Picture; import org.apache.poi.hslf.model.Picture;
import org.apache.poi.hslf.exceptions.HSLFException;

import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import org.apache.poi.util.PngUtils;


/** /**
* Represents a PNG picture data in a PPT file * Represents a PNG picture data in a PPT file
} }


/** /**
* PNG signature is <code>0x6E00</code>
* PNG signature is {@code 0x6E00} or {@code 0x6E10}
* *
* @return PNG signature (<code>0x6E00</code>)
* @return PNG signature ({@code 0x6E00} or {@code 0x6E10})
*/ */
public int getSignature(){ public int getSignature(){
return 0x6E00;
return (uidInstanceCount == 1 ? 0x6E00 : 0x6E10);
}
/**
* Sets the PNG signature - either {@code 0x6E00} or {@code 0x6E10}
*/
public void setSignature(int signature) {
switch (signature) {
case 0x6E00:
uidInstanceCount = 1;
break;
case 0x6E10:
uidInstanceCount = 2;
break;
default:
throw new IllegalArgumentException(signature+" is not a valid instance/signature value for PNG");
}
} }
} }

+ 22
- 5
src/scratchpad/src/org/apache/poi/hslf/blip/WMF.java Wyświetl plik

ByteArrayOutputStream out = new ByteArrayOutputStream(); ByteArrayOutputStream out = new ByteArrayOutputStream();
InputStream is = new ByteArrayInputStream( rawdata ); InputStream is = new ByteArrayInputStream( rawdata );
Header header = new Header(); Header header = new Header();
header.read(rawdata, CHECKSUM_SIZE);
is.skip(header.getSize() + CHECKSUM_SIZE);
header.read(rawdata, CHECKSUM_SIZE*uidInstanceCount);
is.skip(header.getSize() + CHECKSUM_SIZE*uidInstanceCount);


AldusHeader aldus = new AldusHeader(); AldusHeader aldus = new AldusHeader();
aldus.left = header.bounds.x; aldus.left = header.bounds.x;


byte[] checksum = getChecksum(data); byte[] checksum = getChecksum(data);
ByteArrayOutputStream out = new ByteArrayOutputStream(); ByteArrayOutputStream out = new ByteArrayOutputStream();
out.write(checksum);
for (int i=0; i<uidInstanceCount; i++) {
out.write(checksum);
}
header.write(out); header.write(out);
out.write(compressed); out.write(compressed);


} }


/** /**
* WMF signature is <code>0x2160</code>
* WMF signature is either {@code 0x2160} or {@code 0x2170}
*/ */
public int getSignature(){ public int getSignature(){
return 0x2160;
return (uidInstanceCount == 1 ? 0x2160 : 0x2170);
} }


/**
* Sets the WMF signature - either {@code 0x2160} or {@code 0x2170}
*/
public void setSignature(int signature) {
switch (signature) {
case 0x2160:
uidInstanceCount = 1;
break;
case 0x2170:
uidInstanceCount = 2;
break;
default:
throw new IllegalArgumentException(signature+" is not a valid instance/signature value for WMF");
}
}


/** /**
* Aldus Placeable Metafile header - 22 byte structure before WMF data. * Aldus Placeable Metafile header - 22 byte structure before WMF data.

+ 17
- 2
src/scratchpad/src/org/apache/poi/hslf/usermodel/PictureData.java Wyświetl plik

* Binary data of the picture * Binary data of the picture
*/ */
private byte[] rawdata; private byte[] rawdata;
/** /**
* The offset to the picture in the stream * The offset to the picture in the stream
*/ */
protected int offset; protected int offset;

/**
* The instance type/signatures defines if one or two UID instances will be included
*/
protected int uidInstanceCount = 1;
/** /**
* Returns type of this picture. * Returns type of this picture.
* Must be one of the static constants defined in the <code>Picture<code> class. * Must be one of the static constants defined in the <code>Picture<code> class.
/** /**
* Blip signature. * Blip signature.
*/ */
protected abstract int getSignature();
public abstract int getSignature();
public abstract void setSignature(int signature);


/**
* The instance type/signatures defines if one or two UID instances will be included
*/
protected int getUIDInstanceCount() {
return uidInstanceCount;
}
protected static final ImagePainter[] painters = new ImagePainter[8]; protected static final ImagePainter[] painters = new ImagePainter[8];
static { static {
PictureData.setImagePainter(Picture.PNG, new BitmapPainter()); PictureData.setImagePainter(Picture.PNG, new BitmapPainter());

+ 28
- 9
src/scratchpad/testcases/org/apache/poi/hslf/model/TestPicture.java Wyświetl plik

null // EMF null // EMF
}; };


for (int i = 0; i < pictures.length; i++) {
BufferedImage image = ImageIO.read(new ByteArrayInputStream(pictures[i].getData()));

if (pictures[i].getType() != Picture.WMF && pictures[i].getType() != Picture.EMF) {
assertNotNull(image);

int[] dimensions = expectedSizes[i];
assertEquals(dimensions[0], image.getWidth());
assertEquals(dimensions[1], image.getHeight());
int i=0;
for (PictureData pd : pictures) {
BufferedImage image = ImageIO.read(new ByteArrayInputStream(pd.getData()));
switch (pd.getType()) {
case Picture.WMF:
case Picture.EMF:
break;
default:
assertNotNull(image);
int[] dimensions = expectedSizes[i];
assertEquals(dimensions[0], image.getWidth());
assertEquals(dimensions[1], image.getHeight());
break;
} }
i++;
} }
} }
@Test
public void bug54332() throws Exception {
HSLFSlideShow hss = new HSLFSlideShow(_slTests.openResourceAsStream("54332a.ppt")); // TIKA-1046

PictureData[] pictures = hss.getPictures();
assertEquals(1, pictures.length);
assertEquals(102352, pictures[0].getData().length);
hss = new HSLFSlideShow(_slTests.openResourceAsStream("54332b.ppt")); // TIKA-1612
pictures = hss.getPictures();
assertEquals(1, pictures.length);
assertEquals(55830, pictures[0].getData().length);
}


@Test @Test
@Ignore("Just for visual validation - antialiasing is different on various systems") @Ignore("Just for visual validation - antialiasing is different on various systems")

BIN
test-data/slideshow/54332a.ppt Wyświetl plik


BIN
test-data/slideshow/54332b.ppt Wyświetl plik


Ładowanie…
Anuluj
Zapisz