From 4d619644ea0b202531119b1d28bc0e4e6b2c1ae0 Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Mon, 16 Jan 2006 18:27:37 +0000 Subject: [PATCH] From Bug 38289: Yegor's support for pictures embeded in PPT files git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/trunk@369540 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/poi/hslf/HSLFSlideShow.java | 31 ++++ .../apache/poi/hslf/usermodel/Picture.java | 140 ++++++++++++++++++ .../apache/poi/hslf/usermodel/SlideShow.java | 7 + 3 files changed, 178 insertions(+) create mode 100644 src/scratchpad/src/org/apache/poi/hslf/usermodel/Picture.java diff --git a/src/scratchpad/src/org/apache/poi/hslf/HSLFSlideShow.java b/src/scratchpad/src/org/apache/poi/hslf/HSLFSlideShow.java index 79e28bf5a8..f12896a140 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/HSLFSlideShow.java +++ b/src/scratchpad/src/org/apache/poi/hslf/HSLFSlideShow.java @@ -36,6 +36,7 @@ import org.apache.poi.hpsf.DocumentSummaryInformation; import org.apache.poi.util.LittleEndian; import org.apache.poi.hslf.record.*; +import org.apache.poi.hslf.usermodel.Picture; /** * This class contains the main functionality for the Powerpoint file @@ -337,4 +338,34 @@ public class HSLFSlideShow * Fetch the Current User Atom of the document */ public CurrentUserAtom getCurrentUserAtom() { return currentUser; } + + /** + * Read pictures contained in this presentation + * + * @return array with the read pictures ot null if the + * presentation doesn't contain pictures. + */ + public Picture[] getPictures() throws IOException { + byte[] pictstream; + + try { + DocumentEntry entry = (DocumentEntry)filesystem.getRoot().getEntry("Pictures"); + pictstream = new byte[entry.getSize()]; + DocumentInputStream is = filesystem.createDocumentInputStream("Pictures"); + is.read(pictstream); + } catch (FileNotFoundException e){ + //silently catch exceptions if the presentation doesn't contain pictures + return null; + } + + ArrayList p = new ArrayList(); + int pos = 0; + while (pos < pictstream.length) { + Picture pict = new Picture(pictstream, pos); + p.add(pict); + pos += Picture.HEADER_SIZE + pict.getSize(); + } + + return (Picture[])p.toArray(new Picture[p.size()]); + } } diff --git a/src/scratchpad/src/org/apache/poi/hslf/usermodel/Picture.java b/src/scratchpad/src/org/apache/poi/hslf/usermodel/Picture.java new file mode 100644 index 0000000000..64caff7fa7 --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hslf/usermodel/Picture.java @@ -0,0 +1,140 @@ +/* ==================================================================== + Copyright 2002-2004 Apache Software Foundation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.hslf.usermodel; + +import org.apache.poi.util.LittleEndian; + +/** + * Represents a picture in a PowerPoint document. + *

+ * The information about an image in PowerPoint document is stored in + * two places: + *

  • EscherBSE container in the Document keeps information about image + * type, image index to refer by slides etc. + *
  • "Pictures" OLE stream holds the actual data of the image. + *

    + *

    + * Data in the "Pictures" OLE stream is organized as follows:
    + * For each image there is an entry: 25 byte header + image data. + * Image data is the exact content of the JPEG file, i.e. PowerPoint + * puts the whole jpeg file there without any modifications.
    + * Header format: + *

  • 2 byte: image type. For JPEGs it is 0x46A0, for PNG it is 0x6E00. + *
  • 2 byte: unknown. + *
  • 4 byte : image size + 17. Looks like shift from the end of + * header but why to add it to the image size? + *
  • next 16 bytes. Unique identifier of this image which is used by + * EscherBSE record. + *

    + * + * @author Yegor Kozlov + */ +public class Picture { + + /** + * Windows Metafile + */ + public static final int WMF = 0x2160; + + /** + * Macintosh PICT + */ + public static final int PICT = 0x5420; + + /** + * JPEG + */ + public static final int JPEG = 0x46A0; + + /** + * PNG + */ + public static final int PNG = 0x6E00; + + /** + * Windows DIB (BMP) + */ + public static final int DIB = 0x7A80; + + /** + * The size of the header + */ + public static final int HEADER_SIZE = 25; + + /** + * Binary data of the picture + */ + protected byte[] pictdata; + + /** + * Header which holds information about this picture + */ + protected byte[] header; + + /** + * Read a picture from "Pictures" OLE stream + * + * @param pictstream the bytes to read + * @param offset the index of the first byte to read + */ + public Picture(byte[] pictstream, int offset){ + header = new byte[Picture.HEADER_SIZE]; + System.arraycopy(pictstream, offset, header, 0, header.length); + + int size = LittleEndian.getInt(header, 4) - 17; + pictdata = new byte[size]; + System.arraycopy(pictstream, offset + Picture.HEADER_SIZE, pictdata, 0, pictdata.length); + } + + /** + * @return the binary data of this picture + */ + public byte[] getData(){ + return pictdata; + } + + /** + * Return image size in bytes + * + * @return the size of the picture in bytes + */ + public int getSize(){ + return pictdata.length; + } + + /** + * Returns the unique identifier (UID) of this picture. + * The UID is a checksum of the picture data. Its length is 16 bytes + * and it must be unique across the presentation. + * + * @return the unique identifier of this picture + */ + public byte[] getUID(){ + byte[] uid = new byte[16]; + System.arraycopy(header, 8, uid, 0, uid.length); + return uid; + } + + /** + * Returns the type of this picture. Must be one of the static constans defined in this class. + * + * @return type of this picture. + */ + public int getType(){ + int type = LittleEndian.getShort(header, 0); + return type; + } +} diff --git a/src/scratchpad/src/org/apache/poi/hslf/usermodel/SlideShow.java b/src/scratchpad/src/org/apache/poi/hslf/usermodel/SlideShow.java index 15b610ce09..23108d4783 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/usermodel/SlideShow.java +++ b/src/scratchpad/src/org/apache/poi/hslf/usermodel/SlideShow.java @@ -391,4 +391,11 @@ public class SlideShow * found in the slideshow */ //public MetaSheet[] getMetaSheets() { return _msheets; } + + /** + * Returns all the pictures attached to the SlideShow + */ + public Picture[] getPictures() throws IOException { + return _hslfSlideShow.getPictures(); + } } -- 2.39.5