refactor TestPPTX2PNG code to support archives git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1893451 13f79535-47bb-0310-9956-ffa450edef68tags/REL_5_2_0
@@ -35,6 +35,7 @@ open module org.apache.poi.ooxml.schemas { | |||
exports org.apache.poi.schemas.ooxml.system.ooxml; | |||
exports org.apache.poi.schemas.vmldrawing; | |||
exports org.etsi.uri.x01903.v13; | |||
exports org.etsi.uri.x01903.v14; | |||
exports org.openxmlformats.schemas.drawingml.x2006.chart; | |||
exports org.openxmlformats.schemas.drawingml.x2006.main; | |||
exports org.openxmlformats.schemas.drawingml.x2006.picture; |
@@ -136,6 +136,7 @@ dependencies { | |||
testImplementation 'org.openjdk.jmh:jmh-core:1.32' | |||
testImplementation 'org.openjdk.jmh:jmh-generator-annprocess:1.32' | |||
testImplementation 'com.google.guava:guava:30.1.1-jre' | |||
testImplementation 'org.tukaani:xz:1.9' | |||
// prevent slf4j warnings coming from xmlsec -> slf4j-api 1.7.32 dependency | |||
// see https://logging.apache.org/log4j/2.x/log4j-slf4j-impl/ |
@@ -25,13 +25,20 @@ import static org.junit.jupiter.api.Assumptions.assumeFalse; | |||
import java.io.File; | |||
import java.io.IOException; | |||
import java.io.InputStream; | |||
import java.util.ArrayList; | |||
import java.util.List; | |||
import java.util.function.Function; | |||
import java.util.Locale; | |||
import java.util.stream.Stream; | |||
import java.util.zip.ZipEntry; | |||
import java.util.zip.ZipFile; | |||
import org.apache.commons.compress.archivers.sevenz.SevenZArchiveEntry; | |||
import org.apache.commons.compress.archivers.sevenz.SevenZFile; | |||
import org.apache.poi.POIDataSamples; | |||
import org.apache.poi.poifs.filesystem.FileMagic; | |||
import org.apache.poi.xslf.util.PPTX2PNG; | |||
import org.junit.jupiter.api.AfterAll; | |||
import org.junit.jupiter.api.BeforeAll; | |||
import org.junit.jupiter.params.ParameterizedTest; | |||
import org.junit.jupiter.params.provider.Arguments; | |||
@@ -44,6 +51,7 @@ import org.junit.jupiter.params.provider.MethodSource; | |||
class TestPPTX2PNG { | |||
private static boolean xslfOnly; | |||
private static final POIDataSamples samples = POIDataSamples.getSlideShowInstance(); | |||
private static final File basedir = null; | |||
private static final String files = | |||
@@ -58,38 +66,55 @@ class TestPPTX2PNG { | |||
private static final String pdfFiles = | |||
"alterman_security.ppt"; | |||
private static InputStream defStdin; | |||
@BeforeAll | |||
public static void checkHslf() { | |||
public static void init() { | |||
try { | |||
Class.forName("org.apache.poi.hslf.usermodel.HSLFSlideShow"); | |||
} catch (Exception e) { | |||
xslfOnly = true; | |||
} | |||
defStdin = System.in; | |||
} | |||
public static Stream<Arguments> data() { | |||
Function<String, Stream<Arguments>> fun = (basedir == null) | |||
? (f) -> Stream.of(Arguments.of(f)) | |||
: (f) -> Stream.of(basedir.listFiles(p -> p.getName().matches(f))).map(File::getName).map(Arguments::of); | |||
@AfterAll | |||
public static void resetStdin() { | |||
System.setIn(defStdin); | |||
} | |||
return Stream.of(files.split(", ?")).flatMap(fun); | |||
public static Stream<Arguments> data() throws IOException { | |||
if (basedir != null && basedir.getName().endsWith(".zip")) { | |||
ZipFile zipFile = new ZipFile(basedir); | |||
return zipFile.stream().map(f -> Arguments.of(f.getName(), f, zipFile)); | |||
} else if (basedir != null && basedir.getName().endsWith(".7z")) { | |||
SevenZFile sevenZFile = new SevenZFile(basedir); | |||
return ((ArrayList<SevenZArchiveEntry>)sevenZFile.getEntries()).stream().filter(f -> !f.isDirectory()).map(f -> Arguments.of(f.getName(), f, sevenZFile)); | |||
} else { | |||
return Stream.of(files.split(", ?")). | |||
map(basedir == null ? samples::getFile : f -> new File(basedir, f)). | |||
map(f -> Arguments.of(f.getName(), f, f.getParentFile())); | |||
} | |||
} | |||
// use filename instead of File object to omit full pathname in test name | |||
@ParameterizedTest | |||
@ParameterizedTest(name = "{0} ({index})") | |||
@MethodSource("data") | |||
void render(String pptFile) throws Exception { | |||
assumeFalse(xslfOnly && pptFile.matches(".*\\.(ppt|emf|wmf)$"), "ignore HSLF (.ppt) / HEMF (.emf) / HWMF (.wmf) files in no-scratchpad run"); | |||
PPTX2PNG.main(getArgs(pptFile, "null")); | |||
if (svgFiles.contains(pptFile)) { | |||
PPTX2PNG.main(getArgs(pptFile, "svg")); | |||
void render(String fileName, Object fileObj, Object fileContainer) throws Exception { | |||
assumeFalse(xslfOnly && fileName.matches(".*\\.(ppt|emf|wmf)$"), "ignore HSLF (.ppt) / HEMF (.emf) / HWMF (.wmf) files in no-scratchpad run"); | |||
PPTX2PNG.main(getArgs(fileName, fileObj, fileContainer, "null")); | |||
if (svgFiles.contains(fileName)) { | |||
PPTX2PNG.main(getArgs(fileName, fileObj, fileContainer, "svg")); | |||
} | |||
if (pdfFiles.contains(fileName)) { | |||
PPTX2PNG.main(getArgs(fileName, fileObj, fileContainer, "pdf")); | |||
} | |||
if (pdfFiles.contains(pptFile)) { | |||
PPTX2PNG.main(getArgs(pptFile, "pdf")); | |||
if (System.in != defStdin) { | |||
System.in.close(); | |||
} | |||
} | |||
private String[] getArgs(String pptFile, String format) throws IOException { | |||
private String[] getArgs(String fileName, Object fileObj, Object fileContainer, String format) throws IOException { | |||
File tmpDir = new File("build/tmp/"); | |||
// fix maven build errors | |||
@@ -105,20 +130,46 @@ class TestPPTX2PNG { | |||
// "-dump", new File("build/tmp/", pptFile+".json").getCanonicalPath(), | |||
"-dump", "null", | |||
"-quiet", | |||
"-ignoreParse", | |||
// "-charset", "GBK", | |||
// "-emfHeaderBounds", | |||
// "-textAsShapes", | |||
// "-extractEmbedded", | |||
"-fixside", "long", | |||
"-scale", "800" | |||
)); | |||
if ("bug64693.pptx".equals(pptFile)) { | |||
args.addAll(asList( | |||
"-charset", "GBK" | |||
)); | |||
String lName = fileName.toLowerCase(Locale.ROOT); | |||
FileMagic inputType = null; | |||
if (lName.endsWith(".emf")) { | |||
inputType = FileMagic.EMF; | |||
} else if (lName.endsWith(".wmf")) { | |||
inputType = FileMagic.WMF; | |||
} | |||
args.add((basedir == null ? samples.getFile(pptFile) : new File(basedir, pptFile)).getAbsolutePath()); | |||
if (inputType != null) { | |||
args.add("-inputtype"); | |||
args.add(inputType.toString()); | |||
} | |||
if (fileName.endsWith("bug64693.pptx")) { | |||
args.add("-charset"); | |||
args.add("GBK"); | |||
} | |||
if (fileObj instanceof ZipEntry) { | |||
ZipEntry ze = (ZipEntry)fileObj; | |||
ZipFile zf = (ZipFile)fileContainer; | |||
System.setIn(zf.getInputStream(ze)); | |||
args.add("stdin"); | |||
} else if (fileObj instanceof SevenZArchiveEntry) { | |||
SevenZArchiveEntry ze = (SevenZArchiveEntry)fileObj; | |||
SevenZFile zf = (SevenZFile)fileContainer; | |||
System.setIn(zf.getInputStream(ze)); | |||
args.add("stdin"); | |||
} else if (fileObj instanceof File) { | |||
args.add(((File)fileObj).getAbsolutePath()); | |||
} | |||
return args.toArray(new String[0]); | |||
} |
@@ -50,6 +50,7 @@ import org.apache.poi.util.Dimension2DDouble; | |||
import org.apache.poi.util.Internal; | |||
import org.apache.poi.util.LittleEndianInputStream; | |||
import org.apache.poi.util.LocaleUtil; | |||
import org.apache.poi.util.RecordFormatException; | |||
import org.apache.poi.util.Units; | |||
/** | |||
@@ -71,7 +72,12 @@ public class HemfPicture implements Iterable<HemfRecord>, GenericRecord { | |||
} | |||
public HemfHeader getHeader() { | |||
return (HemfHeader)getRecords().get(0); | |||
List<HemfRecord> r = getRecords(); | |||
if (r.isEmpty()) { | |||
throw new RecordFormatException("No records could be parsed - your .emf file is invalid"); | |||
} else { | |||
return (HemfHeader)r.get(0); | |||
} | |||
} | |||
public List<HemfRecord> getRecords() { |
@@ -57,70 +57,6 @@ public class TestHemfPicture { | |||
private static final POIDataSamples ss_samples = POIDataSamples.getSpreadSheetInstance(); | |||
private static final POIDataSamples sl_samples = POIDataSamples.getSlideShowInstance(); | |||
/* | |||
@Test | |||
@Disabled("Only for manual tests") | |||
void paintSingle() throws Exception { | |||
File fileIn = new File("tmp/emfs/govdocs1/844/844795.ppt_2.emf"); | |||
String[] args = { | |||
"-format", "png", // png,gif,jpg or null for test | |||
"-outdir", new File("build/tmp/").getCanonicalPath(), | |||
"-outfile", fileIn.getName().replaceAll("\\.[^.]+?$", ".png"), | |||
"-fixside", "long", | |||
"-scale", "800", | |||
"-ignoreParse", | |||
// "-dump", new File("build/tmp/", fileIn.getName().replaceAll("\\.[^.]+?$",".json")).getCanonicalPath(), | |||
// "-quiet", | |||
// "-extractEmbedded", | |||
fileIn.getPath() | |||
}; | |||
PPTX2PNG.main(args); | |||
} | |||
*/ | |||
/* | |||
@Test | |||
@Disabled("Only for manual tests - need to add org.tukaani:xz:1.8 for this to work") | |||
void paintMultiple() throws Exception { | |||
Pattern fileExt = Pattern.compile("(?i)^(.+/)*(.+)\\.(emf|wmf)$"); | |||
final byte[] buf = new byte[50_000_000]; | |||
try (SevenZFile sevenZFile = new SevenZFile(new File("tmp/plus_emf.7z")) | |||
) { | |||
SevenZArchiveEntry entry; | |||
while ((entry = sevenZFile.getNextEntry()) != null) { | |||
if (entry.isDirectory() || entry.getSize() == 0) continue; | |||
Matcher m = fileExt.matcher(entry.getName()); | |||
if (!m.matches()) continue; | |||
int size = sevenZFile.read(buf); | |||
ByteArrayInputStream bis = new ByteArrayInputStream(buf, 0, size); | |||
System.setIn(bis); | |||
String[] args = { | |||
"-format", "png", // png,gif,jpg or null for test | |||
"-outdir", new File("build/tmp/").getCanonicalPath(), | |||
"-outfile", m.replaceAll("$2.png"), | |||
"-fixside", "long", | |||
"-scale", "800", | |||
"-ignoreParse", | |||
"-inputtype", m.replaceAll("$3").toUpperCase(), | |||
// "-dump", new File("build/tmp/", lastName.replace(".emf",".json")).getCanonicalPath(), | |||
"-quiet", | |||
// "-extractEmbedded", | |||
"stdin" | |||
}; | |||
try { | |||
PPTX2PNG.main(args); | |||
System.out.println("Processing "+entry.getName()+" ok"); | |||
} catch (Exception e) { | |||
System.out.println("Processing "+entry.getName()+" failed"); | |||
} | |||
} | |||
} | |||
} | |||
*/ | |||
@Test | |||
void testBasicWindows() throws Exception { | |||
try (InputStream is = ss_samples.openResourceAsStream("SimpleEMF_windows.emf")) { |
@@ -27,10 +27,11 @@ import java.awt.geom.Rectangle2D; | |||
import java.awt.image.AffineTransformOp; | |||
import java.awt.image.BufferedImage; | |||
import java.awt.image.RescaleOp; | |||
import java.io.ByteArrayInputStream; | |||
import java.io.IOException; | |||
import java.io.InputStream; | |||
import java.util.Iterator; | |||
import java.util.stream.Stream; | |||
import java.util.stream.StreamSupport; | |||
import javax.imageio.ImageIO; | |||
import javax.imageio.ImageReadParam; | |||
@@ -39,6 +40,7 @@ import javax.imageio.ImageTypeSpecifier; | |||
import javax.imageio.stream.ImageInputStream; | |||
import javax.imageio.stream.MemoryCacheImageInputStream; | |||
import org.apache.commons.collections4.iterators.IteratorIterable; | |||
import org.apache.commons.io.input.UnsynchronizedByteArrayInputStream; | |||
import org.apache.commons.io.output.UnsynchronizedByteArrayOutputStream; | |||
import org.apache.logging.log4j.LogManager; | |||
@@ -51,23 +53,32 @@ import org.apache.poi.util.IOUtils; | |||
**/ | |||
public class BitmapImageRenderer implements ImageRenderer { | |||
private static final Logger LOG = LogManager.getLogger(BitmapImageRenderer.class); | |||
private static final ImageLoader[] IMAGE_LOADERS = { | |||
BitmapImageRenderer::loadColored, | |||
BitmapImageRenderer::loadGrayScaled, | |||
BitmapImageRenderer::loadTruncated | |||
}; | |||
private static final String UNSUPPORTED_IMAGE_TYPE = "Unsupported Image Type"; | |||
private static final PictureType[] ALLOWED_TYPES = { | |||
PictureType.JPEG, | |||
PictureType.PNG, | |||
PictureType.BMP, | |||
PictureType.GIF | |||
}; | |||
protected BufferedImage img; | |||
private boolean doCache; | |||
private byte[] cachedImage; | |||
private String cachedContentType; | |||
private interface ImageLoader { | |||
BufferedImage load(ImageReader reader, ImageInputStream iis, ImageReadParam param) throws IOException; | |||
} | |||
@Override | |||
public boolean canRender(String contentType) { | |||
PictureType[] pts = { | |||
PictureType.JPEG, PictureType.PNG, PictureType.BMP, PictureType.GIF | |||
}; | |||
for (PictureType pt : pts) { | |||
if (pt.contentType.equalsIgnoreCase(contentType)) { | |||
return true; | |||
} | |||
} | |||
return false; | |||
return Stream.of(ALLOWED_TYPES).anyMatch(t -> t.contentType.equalsIgnoreCase(contentType)); | |||
} | |||
@Override | |||
@@ -108,110 +119,36 @@ public class BitmapImageRenderer implements ImageRenderer { | |||
IOException lastException = null; | |||
BufferedImage img = null; | |||
final InputStream bis; | |||
if (data instanceof ByteArrayInputStream) { | |||
bis = data; | |||
} else if (data instanceof UnsynchronizedByteArrayInputStream) { | |||
bis = data; | |||
} else { | |||
UnsynchronizedByteArrayOutputStream bos = new UnsynchronizedByteArrayOutputStream(0x3FFFF); | |||
IOUtils.copy(data, bos); | |||
bis = bos.toInputStream(); | |||
} | |||
// currently don't use FileCacheImageInputStream, | |||
// because of the risk of filling the file handles (see #59166) | |||
ImageInputStream iis = new MemoryCacheImageInputStream(bis); | |||
try { | |||
try (ImageInputStream iis = new MemoryCacheImageInputStream(data)) { | |||
Iterator<ImageReader> iter = ImageIO.getImageReaders(iis); | |||
while (img==null && iter.hasNext()) { | |||
lastException = null; | |||
ImageReader reader = iter.next(); | |||
ImageReadParam param = reader.getDefaultReadParam(); | |||
// 0:default mode, 1:fallback mode | |||
for (int mode=0; img==null && mode<3; mode++) { | |||
lastException = null; | |||
if (mode > 0) { | |||
bis.reset(); | |||
iis.close(); | |||
iis = new MemoryCacheImageInputStream(bis); | |||
} | |||
for (ImageLoader il : IMAGE_LOADERS) { | |||
iis.reset(); | |||
iis.mark(); | |||
try { | |||
switch (mode) { | |||
case 0: | |||
reader.setInput(iis, false, true); | |||
img = reader.read(0, param); | |||
break; | |||
case 1: { | |||
// try to load picture in gray scale mode | |||
// fallback mode for invalid image band metadata | |||
// see http://stackoverflow.com/questions/10416378 | |||
Iterator<ImageTypeSpecifier> imageTypes = reader.getImageTypes(0); | |||
while (imageTypes.hasNext()) { | |||
ImageTypeSpecifier imageTypeSpecifier = imageTypes.next(); | |||
int bufferedImageType = imageTypeSpecifier.getBufferedImageType(); | |||
if (bufferedImageType == BufferedImage.TYPE_BYTE_GRAY) { | |||
param.setDestinationType(imageTypeSpecifier); | |||
break; | |||
} | |||
} | |||
reader.setInput(iis, false, true); | |||
img = reader.read(0, param); | |||
break; | |||
} | |||
case 2: { | |||
// try to load truncated pictures by supplying a BufferedImage | |||
// and use the processed data up till the point of error | |||
reader.setInput(iis, false, true); | |||
int height = reader.getHeight(0); | |||
int width = reader.getWidth(0); | |||
Iterator<ImageTypeSpecifier> imageTypes = reader.getImageTypes(0); | |||
if (imageTypes.hasNext()) { | |||
ImageTypeSpecifier imageTypeSpecifier = imageTypes.next(); | |||
img = imageTypeSpecifier.createBufferedImage(width, height); | |||
param.setDestination(img); | |||
} else { | |||
lastException = new IOException("unable to load even a truncated version of the image."); | |||
break; | |||
} | |||
try { | |||
reader.read(0, param); | |||
} finally { | |||
if (img.getType() != BufferedImage.TYPE_INT_ARGB) { | |||
int y = findTruncatedBlackBox(img, width, height); | |||
if (y < height) { | |||
BufferedImage argbImg = new BufferedImage(width, height, BufferedImage.TYPE_INT_ARGB); | |||
Graphics2D g = argbImg.createGraphics(); | |||
g.clipRect(0, 0, width, y); | |||
g.drawImage(img, 0, 0, null); | |||
g.dispose(); | |||
img.flush(); | |||
img = argbImg; | |||
} | |||
} | |||
} | |||
break; | |||
} | |||
img = il.load(reader, iis, param); | |||
if (img != null) { | |||
break; | |||
} | |||
} catch (IOException e) { | |||
if (mode < 2) { | |||
lastException = e; | |||
lastException = e; | |||
if (UNSUPPORTED_IMAGE_TYPE.equals(e.getMessage())) { | |||
// fail early | |||
break; | |||
} | |||
} catch (RuntimeException e) { | |||
if (mode < 2) { | |||
lastException = new IOException("ImageIO runtime exception - "+(mode==0 ? "normal" : "fallback"), e); | |||
} | |||
lastException = new IOException("ImageIO runtime exception", e); | |||
} | |||
} | |||
reader.dispose(); | |||
} | |||
} finally { | |||
iis.close(); | |||
} | |||
// If you don't have an image at the end of all readers | |||
@@ -221,20 +158,77 @@ public class BitmapImageRenderer implements ImageRenderer { | |||
// multiple locations above ... | |||
throw lastException; | |||
} | |||
LOG.atWarn().log("Content-type: {} is not support. Image ignored.", contentType); | |||
LOG.atWarn().log("Content-type: {} is not supported. Image ignored.", contentType); | |||
return null; | |||
} | |||
if (img.getColorModel().hasAlpha()) { | |||
return img; | |||
} | |||
// add alpha channel | |||
if (img.getType() != BufferedImage.TYPE_INT_ARGB) { | |||
BufferedImage argbImg = new BufferedImage(img.getWidth(), img.getHeight(), BufferedImage.TYPE_INT_ARGB); | |||
Graphics g = argbImg.getGraphics(); | |||
g.drawImage(img, 0, 0, null); | |||
g.dispose(); | |||
return argbImg; | |||
BufferedImage argbImg = new BufferedImage(img.getWidth(), img.getHeight(), BufferedImage.TYPE_INT_ARGB); | |||
Graphics g = argbImg.getGraphics(); | |||
g.drawImage(img, 0, 0, null); | |||
g.dispose(); | |||
return argbImg; | |||
} | |||
private static BufferedImage loadColored(ImageReader reader, ImageInputStream iis, ImageReadParam param) throws IOException { | |||
reader.setInput(iis, false, true); | |||
return reader.read(0, param); | |||
} | |||
private static BufferedImage loadGrayScaled(ImageReader reader, ImageInputStream iis, ImageReadParam param) throws IOException { | |||
// try to load picture in gray scale mode | |||
// fallback mode for invalid image band metadata | |||
Iterable<ImageTypeSpecifier> specs = new IteratorIterable<>(reader.getImageTypes(0)); | |||
StreamSupport.stream(specs.spliterator(), false). | |||
filter(its -> its.getBufferedImageType() == BufferedImage.TYPE_BYTE_GRAY).findFirst(). | |||
ifPresent(param::setDestinationType); | |||
reader.setInput(iis, false, true); | |||
return reader.read(0, param); | |||
} | |||
private static BufferedImage loadTruncated(ImageReader reader, ImageInputStream iis, ImageReadParam param) throws IOException { | |||
// try to load truncated pictures by supplying a BufferedImage | |||
// and use the processed data up till the point of error | |||
reader.setInput(iis, false, true); | |||
int height = reader.getHeight(0); | |||
int width = reader.getWidth(0); | |||
Iterator<ImageTypeSpecifier> imageTypes = reader.getImageTypes(0); | |||
if (!imageTypes.hasNext()) { | |||
// unable to load even a truncated version of the image | |||
// implicitly throwing previous error | |||
return null; | |||
} | |||
ImageTypeSpecifier imageTypeSpecifier = imageTypes.next(); | |||
BufferedImage img = imageTypeSpecifier.createBufferedImage(width, height); | |||
param.setDestination(img); | |||
return img; | |||
try { | |||
reader.read(0, param); | |||
} catch (IOException ignored) { | |||
} | |||
if (img.getColorModel().hasAlpha()) { | |||
return img; | |||
} | |||
int y = findTruncatedBlackBox(img, width, height); | |||
if (y >= height) { | |||
return img; | |||
} | |||
BufferedImage argbImg = new BufferedImage(width, height, BufferedImage.TYPE_INT_ARGB); | |||
Graphics2D g = argbImg.createGraphics(); | |||
g.clipRect(0, 0, width, y); | |||
g.drawImage(img, 0, 0, null); | |||
g.dispose(); | |||
img.flush(); | |||
return argbImg; | |||
} | |||
private static int findTruncatedBlackBox(BufferedImage img, int width, int height) { |