From: Avik Sengupta Date: Sat, 28 May 2005 05:36:00 +0000 (+0000) Subject: Initial Powerpoint support, by Nick Burch X-Git-Tag: BEFORE_RICHTEXT~68 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=6424e17b17138671152149cf3265e4f3bb0b9804;p=poi.git Initial Powerpoint support, by Nick Burch git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/trunk@353701 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/scratchpad/src/org/apache/poi/hslf/HSLFSlideShow.java b/src/scratchpad/src/org/apache/poi/hslf/HSLFSlideShow.java new file mode 100644 index 0000000000..2701bb1cd9 --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hslf/HSLFSlideShow.java @@ -0,0 +1,347 @@ + +/* ==================================================================== + Copyright 2002-2004 Apache Software Foundation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + + + +package org.apache.poi.hslf; + +import java.util.*; +import java.io.*; + +import org.apache.poi.poifs.filesystem.POIFSFileSystem; +import org.apache.poi.poifs.filesystem.POIFSDocument; +import org.apache.poi.poifs.filesystem.DocumentEntry; +import org.apache.poi.poifs.filesystem.DocumentInputStream; + +import org.apache.poi.hpsf.PropertySet; +import org.apache.poi.hpsf.PropertySetFactory; +import org.apache.poi.hpsf.MutablePropertySet; +import org.apache.poi.hpsf.SummaryInformation; +import org.apache.poi.hpsf.DocumentSummaryInformation; + +import org.apache.poi.util.LittleEndian; + +import org.apache.poi.hslf.record.*; + +/** + * This class contains the main functionality for the Powerpoint file + * "reader". It is only a very basic class for now + * + * @author Nick Burch + */ + +public class HSLFSlideShow +{ + private InputStream istream; + private POIFSFileSystem filesystem; + + // Holds metadata on our document + private SummaryInformation sInf; + private DocumentSummaryInformation dsInf; + private CurrentUserAtom currentUser; + + // Low level contents of the file + private byte[] _docstream; + + // Low level contents + private Record[] _records; + + /** + * Constructs a Powerpoint document from fileName. Parses the document + * and places all the important stuff into data structures. + * + * @param fileName The name of the file to read. + * @throws IOException if there is a problem while parsing the document. + */ + public HSLFSlideShow(String fileName) throws IOException + { + this(new FileInputStream(fileName)); + } + + /** + * Constructs a Powerpoint document from an input stream. Parses the + * document and places all the important stuff into data structures. + * + * @param inputStream the source of the data + * @throws IOException if there is a problem while parsing the document. + */ + public HSLFSlideShow(InputStream inputStream) throws IOException + { + //do Ole stuff + this(new POIFSFileSystem(inputStream)); + istream = inputStream; + } + + /** + * Constructs a Powerpoint document from a POIFS Filesystem. Parses the + * document and places all the important stuff into data structures. + * + * @param filesystem the POIFS FileSystem to read from + * @throws IOException if there is a problem while parsing the document. + */ + public HSLFSlideShow(POIFSFileSystem filesystem) throws IOException + { + this.filesystem = filesystem; + + // Go find a PowerPoint document in the stream + // Save anything useful we come across + readFIB(); + + // Look for Property Streams: + readProperties(); + } + + + /** + * Shuts things down. Closes underlying streams etc + * + * @throws IOException + */ + public void close() throws IOException + { + if(istream != null) { + istream.close(); + } + filesystem = null; + } + + + /** + * Extracts the main document stream from the POI file then hands off + * to other functions that parse other areas. + * + * @throws IOException + */ + private void readFIB() throws IOException + { + // Get the main document stream + DocumentEntry docProps = + (DocumentEntry)filesystem.getRoot().getEntry("PowerPoint Document"); + + // Grab the document stream + _docstream = new byte[docProps.getSize()]; + filesystem.createDocumentInputStream("PowerPoint Document").read(_docstream); + + // The format of records in a powerpoint file are: + // + // + // + // If it has a zero length, following it will be another record + // + // If it has a length, depending on its type it may have children or data + // If it has children, these will follow straight away + // > + // If it has data, this will come straigh after, and run for the length + // + // All lengths given exclude the 8 byte record header + // (Data records are known as Atoms) + + // Document should start with: + // 0F 00 E8 03 ## ## ## ## + // (type 1000 = document, info 00 0f is normal, rest is document length) + // 01 00 E9 03 28 00 00 00 + // (type 1001 = document atom, info 00 01 normal, 28 bytes long) + // 80 16 00 00 E0 10 00 00 xx xx xx xx xx xx xx xx + // 05 00 00 00 0A 00 00 00 xx xx xx + // (the contents of the document atom, not sure what it means yet) + // (records then follow) + + // When parsing a document, look to see if you know about that type + // of the current record. If you know it's a type that has children, + // process the record's data area looking for more records + // If you know about the type and it doesn't have children, either do + // something with the data (eg TextRun) or skip over it + // If you don't know about the type, play safe and skip over it (using + // its length to know where the next record will start) + // + // For now, this work is handled by Record.findChildRecords + + _records = Record.findChildRecords(_docstream,0,_docstream.length); + } + + + /** + * Find the properties from the filesystem, and load them + */ + public void readProperties() { + // DocumentSummaryInformation + dsInf = (DocumentSummaryInformation)getPropertySet("\005DocumentSummaryInformation"); + + // SummaryInformation + sInf = (SummaryInformation)getPropertySet("\005SummaryInformation"); + + // Current User + try { + currentUser = new CurrentUserAtom(filesystem); + } catch(IOException ie) { + System.err.println("Error finding Current User Atom:\n" + ie); + currentUser = new CurrentUserAtom(); + } + } + + + /** + * For a given named property entry, either return it or null if + * if it wasn't found + */ + public PropertySet getPropertySet(String setName) { + DocumentInputStream dis; + try { + // Find the entry, and get an input stream for it + dis = filesystem.createDocumentInputStream(setName); + } catch(IOException ie) { + // Oh well, doesn't exist + System.err.println("Error getting property set with name " + setName + "\n" + ie); + return null; + } + + try { + // Create the Property Set + PropertySet set = PropertySetFactory.create(dis); + return set; + } catch(IOException ie) { + // Must be corrupt or something like that + System.err.println("Error creating property set with name " + setName + "\n" + ie); + } catch(org.apache.poi.hpsf.HPSFException he) { + // Oh well, doesn't exist + System.err.println("Error creating property set with name " + setName + "\n" + he); + } + return null; + } + + + /** + * Writes out the slideshow file the is represented by an instance of + * this class + * @param out The OutputStream to write to. + * @throws IOException If there is an unexpected IOException from the passed + * in OutputStream + */ + public void write(OutputStream out) throws IOException { + // Get a new Filesystem to write into + POIFSFileSystem outFS = new POIFSFileSystem(); + + // Write out the Property Streams + if(sInf != null) { + writePropertySet("\005SummaryInformation",sInf,outFS); + } + if(dsInf != null) { + writePropertySet("\005DocumentSummaryInformation",dsInf,outFS); + } + + // Need to take special care of PersistPtrHolder and UserEditAtoms + // Store where they used to be, and where they are now + Hashtable persistPtrHolderPos = new Hashtable(); + Hashtable userEditAtomsPos = new Hashtable(); + int lastUserEditAtomPos = -1; + + // Write ourselves out + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + for(int i=0; i<_records.length; i++) { + // If it's a special record, record where it was and now is + if(_records[i] instanceof PersistPtrHolder) { + // Update position + PersistPtrHolder pph = (PersistPtrHolder)_records[i]; + int oldPos = pph.getLastOnDiskOffset(); + int newPos = baos.size(); + pph.setLastOnDiskOffet(newPos); + persistPtrHolderPos.put(new Integer(oldPos),new Integer(newPos)); + } + if(_records[i] instanceof UserEditAtom) { + // Update position + UserEditAtom uea = (UserEditAtom)_records[i]; + int oldPos = uea.getLastOnDiskOffset(); + int newPos = baos.size(); + lastUserEditAtomPos = newPos; + uea.setLastOnDiskOffet(newPos); + userEditAtomsPos.put(new Integer(oldPos),new Integer(newPos)); + + // Update internal positions + if(uea.getLastUserEditAtomOffset() != 0) { + Integer ueNewPos = (Integer)userEditAtomsPos.get( new Integer( uea.getLastUserEditAtomOffset() ) ); + uea.setLastUserEditAtomOffset(ueNewPos.intValue()); + } + if(uea.getPersistPointersOffset() != 0) { + Integer ppNewPos = (Integer)persistPtrHolderPos.get( new Integer( uea.getPersistPointersOffset() ) ); + uea.setPersistPointersOffset(ppNewPos.intValue()); + } + } + + // Finally, write out + _records[i].writeOut(baos); + } + ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray()); + outFS.createDocument(bais,"PowerPoint Document"); + + // Update and write out the Current User atom + if(lastUserEditAtomPos != -1) { + currentUser.setCurrentEditOffset(lastUserEditAtomPos); + } + currentUser.writeToFS(outFS); + + // Send the POIFSFileSystem object out + outFS.writeFilesystem(out); + } + + + /** + * Writes out a given ProperySet + */ + private void writePropertySet(String name, PropertySet set, POIFSFileSystem fs) throws IOException { + try { + MutablePropertySet mSet = new MutablePropertySet(set); + ByteArrayOutputStream bOut = new ByteArrayOutputStream(); + mSet.write(bOut); + byte[] data = bOut.toByteArray(); + ByteArrayInputStream bIn = new ByteArrayInputStream(data); + fs.createDocument(bIn,name); + System.out.println("Wrote property set " + name + " of size " + data.length); + } catch(org.apache.poi.hpsf.WritingNotSupportedException wnse) { + System.err.println("Couldn't write property set with name " + name + " as not supported by HPSF yet"); + } + } + + + /* ******************* fetching methods follow ********************* */ + + + /** + * Returns an array of all the records found in the slideshow + */ + public Record[] getRecords() { return _records; } + + /** + * Returns an array of the bytes of the file. Only correct after a + * call to open or write - at all other times might be wrong! + */ + public byte[] getUnderlyingBytes() { return _docstream; } + + /** + * Fetch the Document Summary Information of the document + */ + public DocumentSummaryInformation getDocumentSummaryInformation() { return dsInf; } + + /** + * Fetch the Summary Information of the document + */ + public SummaryInformation getSummaryInformation() { return sInf; } + + /** + * Fetch the Current User Atom of the document + */ + public CurrentUserAtom getCurrentUserAtom() { return currentUser; } +} diff --git a/src/scratchpad/src/org/apache/poi/hslf/dev/PPDrawingTextListing.java b/src/scratchpad/src/org/apache/poi/hslf/dev/PPDrawingTextListing.java new file mode 100644 index 0000000000..680e63edb8 --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hslf/dev/PPDrawingTextListing.java @@ -0,0 +1,86 @@ + +/* ==================================================================== + Copyright 2002-2004 Apache Software Foundation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + + + +package org.apache.poi.hslf.dev; + +import org.apache.poi.hslf.*; +import org.apache.poi.hslf.model.*; +import org.apache.poi.hslf.record.*; +import org.apache.poi.hslf.usermodel.*; + +import java.io.*; + +/** + * Uses record level code to locate PPDrawing entries. + * Having found them, it sees if they have DDF Textbox records, and if so, + * searches those for text. Prints out any text it finds + */ +public class PPDrawingTextListing { + public static void main(String[] args) throws Exception { + if(args.length < 1) { + System.err.println("Need to give a filename"); + System.exit(1); + } + + HSLFSlideShow ss = new HSLFSlideShow(args[0]); + + // Find PPDrawings at any second level position + Record[] records = ss.getRecords(); + for(int i=0; i"); + return; + } + + String filename = args[0]; + + SlideShowRecordDumper foo = new SlideShowRecordDumper(filename); + + foo.printDump(); + foo.close(); + } + + + /** + * Constructs a Powerpoint dump from fileName. Parses the document + * and dumps out the contents + * + * @param fileName The name of the file to read. + * @throws IOException if there is a problem while parsing the document. + */ + public SlideShowRecordDumper(String fileName) throws IOException + { + doc = new HSLFSlideShow(fileName); + } + + /** + * Shuts things down. Closes underlying streams etc + * + * @throws IOException + */ + public void close() throws IOException + { + if(doc != null) { + doc.close(); + } + doc = null; + } + + + public void printDump() throws IOException { + // Prints out the records in the tree + walkTree(0,0,doc.getRecords()); + } + + public String makeHex(int number, int padding) { + String hex = Integer.toHexString(number).toUpperCase(); + while(hex.length() < padding) { + hex = "0" + hex; + } + return hex; + } + + public String reverseHex(String s) { + StringBuffer ret = new StringBuffer(); + + // Get to a multiple of two + if((s.length() / 2) * 2 != s.length()) { s = "0" + s; } + + // Break up into blocks + char[] c = s.toCharArray(); + for(int i=c.length; i>0; i-=2) { + ret.append(c[i-2]); + ret.append(c[i-1]); + if(i != 2) { ret.append(' '); } + } + return ret.toString(); + } + + public int getDiskLen(Record r) throws IOException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + r.writeOut(baos); + byte[] b = baos.toByteArray(); + return b.length; + } + + + public void walkTree(int depth, int pos, Record[] records) throws IOException { + int indent = depth; + String ind = ""; + for(int i=0; i " + rHexType + " )"); + System.out.println(ind + " Len is " + (len-8) + " (" + makeHex((len-8),8) + "), on disk len is " + len ); + System.out.println(); + + // If it has children, show them + if(r.getChildRecords() != null) { + walkTree((depth+3),pos+8,r.getChildRecords()); + } + + // Wind on the position marker + pos += len; + } + } +} diff --git a/src/scratchpad/src/org/apache/poi/hslf/dev/UserEditAndPersistListing.java b/src/scratchpad/src/org/apache/poi/hslf/dev/UserEditAndPersistListing.java new file mode 100644 index 0000000000..5fff3fa03b --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hslf/dev/UserEditAndPersistListing.java @@ -0,0 +1,95 @@ + +/* ==================================================================== + Copyright 2002-2004 Apache Software Foundation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + + + +package org.apache.poi.hslf.dev; + +import org.apache.poi.hslf.*; +import org.apache.poi.hslf.record.*; + +import java.io.*; + +/** + * Uses record level code to locate UserEditAtom records, and other + * persistence related atoms. Tries to match them together, to help + * illuminate quite what all the offsets mean + */ +public class UserEditAndPersistListing { + public static void main(String[] args) throws Exception { + if(args.length < 1) { + System.err.println("Need to give a filename"); + System.exit(1); + } + + HSLFSlideShow ss = new HSLFSlideShow(args[0]); + System.out.println(""); + + // Find any persist ones first + Record[] records = ss.getRecords(); + int pos = 0; + for(int i=0; i"); + System.exit(1); + } + + boolean notes = false; + String file; + if(args.length > 1) { + notes = true; + file = args[1]; + } else { + file = args[0]; + } + + PowerPointExtractor ppe = new PowerPointExtractor(file); + System.out.println(ppe.getText(true,notes)); + ppe.close(); + } + + /** + * Creates a PowerPointExtractor + * @param fileName + */ + public PowerPointExtractor(String fileName) throws IOException { + _hslfshow = new HSLFSlideShow(fileName); + _show = new SlideShow(_hslfshow); + _slides = _show.getSlides(); + _notes = _show.getNotes(); + } + + /** + * Creates a PowerPointExtractor + * @param iStream + */ + public PowerPointExtractor(InputStream iStream) throws IOException { + _hslfshow = new HSLFSlideShow(iStream); + _show = new SlideShow(_hslfshow); + _slides = _show.getSlides(); + _notes = _show.getNotes(); + } + + /** + * Creates a PowerPointExtractor + * @param fs + */ + public PowerPointExtractor(POIFSFileSystem fs) throws IOException { + _hslfshow = new HSLFSlideShow(fs); + _show = new SlideShow(_hslfshow); + _slides = _show.getSlides(); + _notes = _show.getNotes(); + } + + + /** + * Shuts down the underlying streams + */ + public void close() throws IOException { + _hslfshow.close(); + _hslfshow = null; + _show = null; + _slides = null; + _notes = null; + } + + + /** + * Fetches all the slide text from the slideshow, but not the notes + */ + public String getText() { + return getText(true,false); + } + + /** + * Fetches all the notes text from the slideshow, but not the slide text + */ + public String getNotes() { + return getText(false,true); + } + + /** + * Fetches text from the slideshow, be it slide text or note text + * @param getSlideText fetch slide text + * @param getNoteText fetch note text + */ + public String getText(boolean getSlideText, boolean getNoteText) { + StringBuffer ret = new StringBuffer(); + + if(getSlideText) { + for(int i=0; i<_slides.length; i++) { + Slide slide = _slides[i]; + TextRun[] runs = slide.getTextRuns(); + for(int j=0; j 0) { + for(int j=0; j= start+len) { + byte[] textBytes = new byte[len]; + System.arraycopy(_contents,start,textBytes,0,len); + lastEditUser = StringUtil.getFromUnicodeLE(textBytes); + } else { + // Fake from the 8 bit version + byte[] textBytes = new byte[(int)usernameLen]; + System.arraycopy(_contents,28,textBytes,0,(int)usernameLen); + lastEditUser = StringUtil.getFromCompressedUnicode(textBytes,0,(int)usernameLen); + } + } + + + /** + * Writes ourselves back out + */ + public void writeOut(OutputStream out) throws IOException { + // Decide on the size + // 8 = atom header + // 20 = up to name + // 4 = revision + // 3 * len = ascii + unicode + int size = 8 + 20 + 4 + (3 * lastEditUser.length()); + _contents = new byte[size]; + + // First we have a 8 byte atom header + System.arraycopy(atomHeader,0,_contents,0,4); + // Size is 20+user len + revision len(4) + int atomSize = 20+4+lastEditUser.length(); + LittleEndian.putInt(_contents,4,atomSize); + + // Now we have the size of the details, which is 20 + LittleEndian.putInt(_contents,8,20); + + // Now the ppt magic number (4 bytes) + System.arraycopy(magicNumber,0,_contents,12,4); + + // Now the current edit offset + LittleEndian.putInt(_contents,16,(int)currentEditOffset); + + // Now the file versions, 2+2+1+1 + LittleEndian.putShort(_contents,20,(short)docFinalVersionA); + LittleEndian.putShort(_contents,22,(short)docFinalVersionB); + _contents[24] = docMajorNo; + _contents[25] = docMinorNo; + + // 2 bytes blank + _contents[26] = 0; + _contents[27] = 0; + + // username in bytes in us ascii + byte[] asciiUN = new byte[lastEditUser.length()]; + StringUtil.putCompressedUnicode(lastEditUser,asciiUN,0); + System.arraycopy(asciiUN,0,_contents,28,asciiUN.length); + + // 4 byte release version + LittleEndian.putInt(_contents,28+asciiUN.length,(int)releaseVersion); + + // username in unicode + byte [] ucUN = new byte[lastEditUser.length()*2]; + StringUtil.putUnicodeLE(lastEditUser,ucUN,0); + System.arraycopy(ucUN,0,_contents,28+asciiUN.length+4,ucUN.length); + + // Write out + out.write(_contents); + } + + /** + * Writes ourselves back out to a filesystem + */ + public void writeToFS(POIFSFileSystem fs) throws IOException { + // Grab contents + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + writeOut(baos); + ByteArrayInputStream bais = + new ByteArrayInputStream(baos.toByteArray()); + + // Write out + fs.createDocument(bais,"Current User"); + } +} diff --git a/src/scratchpad/src/org/apache/poi/hslf/record/DummyRecordWithChildren.java b/src/scratchpad/src/org/apache/poi/hslf/record/DummyRecordWithChildren.java new file mode 100644 index 0000000000..5a30fae6f4 --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hslf/record/DummyRecordWithChildren.java @@ -0,0 +1,70 @@ + +/* ==================================================================== + Copyright 2002-2004 Apache Software Foundation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + + +package org.apache.poi.hslf.record; + +import org.apache.poi.util.LittleEndian; +import java.io.IOException; +import java.io.OutputStream; +import java.io.ByteArrayOutputStream; + +/** + * If we come across a record we know has children of (potential) + * interest, but where the record itself is boring, we create one + * of these. It allows us to get at the children, but not much else + * + * @author Nick Burch + */ + +public class DummyRecordWithChildren extends RecordContainer +{ + private Record[] _children; + private byte[] _header; + private long _type; + + /** + * Create a new holder for a boring record with children + */ + protected DummyRecordWithChildren(byte[] source, int start, int len) { + // Just grab the header, not the whole contents + _header = new byte[8]; + System.arraycopy(source,start,_header,0,8); + _type = LittleEndian.getUShort(_header,2); + + // Find our children + _children = Record.findChildRecords(source,start+8,len-8); + } + + /** + * Return the value we were given at creation + */ + public long getRecordType() { return _type; } + + /** + * Return any children + */ + public Record[] getChildRecords() { return _children; } + + /** + * Write the contents of the record back, so it can be written + * to disk + */ + public void writeOut(OutputStream out) throws IOException { + writeOut(_header[0],_header[1],_type,_children,out); + } +} diff --git a/src/scratchpad/src/org/apache/poi/hslf/record/EscherTextboxWrapper.java b/src/scratchpad/src/org/apache/poi/hslf/record/EscherTextboxWrapper.java new file mode 100644 index 0000000000..04d388d255 --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hslf/record/EscherTextboxWrapper.java @@ -0,0 +1,90 @@ + +/* ==================================================================== + Copyright 2002-2004 Apache Software Foundation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + + +package org.apache.poi.hslf.record; + +import org.apache.poi.ddf.*; + +import java.io.IOException; +import java.io.OutputStream; +import java.io.ByteArrayOutputStream; + +/** + * A wrapper around a DDF (Escher) EscherTextbox Record. Causes the DDF + * Record to be accessible as if it were a HSLF record. + * Note: when asked to write out, will simply put any child records correctly + * into the Escher layer. A call to the escher layer to write out (by the + * parent PPDrawing) will do the actual write out + * + * @author Nick Burch + */ + +public class EscherTextboxWrapper extends RecordContainer +{ + private EscherTextboxRecord _escherRecord; + private Record[] _children; + private long _type; + + /** + * Returns the underlying DDF Escher Record + */ + public EscherTextboxRecord getEscherRecord() { return _escherRecord; } + + /** + * Creates the wrapper for the given DDF Escher Record and children + */ + protected EscherTextboxWrapper(EscherTextboxRecord textbox) { + _escherRecord = textbox; + _type = (long)_escherRecord.getRecordId(); + + // Find the child records in the escher data + byte[] data = _escherRecord.getData(); + _children = Record.findChildRecords(data,0,data.length); + } + + + /** + * Return the type of the escher record (normally in the 0xFnnn range) + */ + public long getRecordType() { return _type; } + + /** + * Return any children + */ + public Record[] getChildRecords() { return _children; } + + /** + * Stores the data for the child records back into the Escher layer. + * Doesn't actually do the writing out, that's left to the Escher + * layer to do. Must be called before writeOut/serialize is called + * on the underlying Escher object! + */ + public void writeOut(OutputStream out) throws IOException { + // Write out our children, and stuff them into the Escher layer + + // Grab the children's data + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + for(int i=0; i<_children.length; i++) { + _children[i].writeOut(baos); + } + byte[] data = baos.toByteArray(); + + // Save in the escher layer + _escherRecord.setData(data); + } +} diff --git a/src/scratchpad/src/org/apache/poi/hslf/record/Notes.java b/src/scratchpad/src/org/apache/poi/hslf/record/Notes.java new file mode 100644 index 0000000000..86e692b36a --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hslf/record/Notes.java @@ -0,0 +1,95 @@ + +/* ==================================================================== + Copyright 2002-2004 Apache Software Foundation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + + +package org.apache.poi.hslf.record; + +import org.apache.poi.util.LittleEndian; +import java.io.IOException; +import java.io.OutputStream; +import java.io.ByteArrayOutputStream; + +/** + * Master container for Notes. There is one of these for every page of + * notes, and they have certain specific children + * + * @author Nick Burch + */ + +public class Notes extends RecordContainer +{ + private Record[] _children; + private byte[] _header; + private static long _type = 1008l; + + // Links to our more interesting children + private NotesAtom notesAtom; + private PPDrawing ppDrawing; + + /** + * Returns the NotesAtom of this Notes + */ + public NotesAtom getNotesAtom() { return notesAtom; } + /** + * Returns the PPDrawing of this Notes, which has all the + * interesting data in it + */ + public PPDrawing getPPDrawing() { return ppDrawing; } + + + /** + * Set things up, and find our more interesting children + */ + protected Notes(byte[] source, int start, int len) { + // Grab the header + _header = new byte[8]; + System.arraycopy(source,start,_header,0,8); + + // Find our children + _children = Record.findChildRecords(source,start+8,len-8); + + // Find the interesting ones in there + for(int i=0; i<_children.length; i++) { + if(_children[i] instanceof NotesAtom) { + notesAtom = (NotesAtom)_children[i]; + //System.out.println("Found notes for sheet " + notesAtom.getSlideID()); + } + if(_children[i] instanceof PPDrawing) { + ppDrawing = (PPDrawing)_children[i]; + } + } + } + + + /** + * We are of type 1008 + */ + public long getRecordType() { return _type; } + + /** + * Return any children + */ + public Record[] getChildRecords() { return _children; } + + /** + * Write the contents of the record back, so it can be written + * to disk + */ + public void writeOut(OutputStream out) throws IOException { + writeOut(_header[0],_header[1],_type,_children,out); + } +} diff --git a/src/scratchpad/src/org/apache/poi/hslf/record/NotesAtom.java b/src/scratchpad/src/org/apache/poi/hslf/record/NotesAtom.java new file mode 100644 index 0000000000..dba56256e4 --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hslf/record/NotesAtom.java @@ -0,0 +1,120 @@ + +/* ==================================================================== + Copyright 2002-2004 Apache Software Foundation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + + +package org.apache.poi.hslf.record; + +import org.apache.poi.util.LittleEndian; +import java.io.IOException; +import java.io.OutputStream; + +/** + * A Notes Atom (type 1009). Holds information on the parent Notes, such + * as what slide it is tied to + * + * @author Nick Burch + */ + +public class NotesAtom extends RecordAtom +{ + private byte[] _header; + private static long _type = 1009l; + + private int slideID; + private boolean followMasterObjects; + private boolean followMasterScheme; + private boolean followMasterBackground; + private byte[] reserved; + + + public int getSlideID() { return slideID; } + public void setSlideID(int id) { slideID = id; } + + public boolean getFollowMasterObjects() { return followMasterObjects; } + public boolean getFollowMasterScheme() { return followMasterScheme; } + public boolean getFollowMasterBackground() { return followMasterBackground; } + public void setFollowMasterObjects(boolean flag) { followMasterObjects = flag; } + public void setFollowMasterScheme(boolean flag) { followMasterScheme = flag; } + public void setFollowMasterBackground(boolean flag) { followMasterBackground = flag; } + + + /* *************** record code follows ********************** */ + + /** + * For the Notes Atom + */ + protected NotesAtom(byte[] source, int start, int len) { + // Sanity Checking + if(len < 8) { len = 8; } + + // Get the header + _header = new byte[8]; + System.arraycopy(source,start,_header,0,8); + + // Get the slide ID + slideID = (int)LittleEndian.getInt(source,start+8); + + // Grok the flags, stored as bits + int flags = LittleEndian.getUShort(source,start+12); + if((flags&4) == 4) { + followMasterBackground = true; + } else { + followMasterBackground = false; + } + if((flags&2) == 2) { + followMasterScheme = true; + } else { + followMasterScheme = false; + } + if((flags&1) == 1) { + followMasterObjects = true; + } else { + followMasterObjects = false; + } + + // There might be 2 more bytes, which are a reserved field + reserved = new byte[len-14]; + System.arraycopy(source,start+14,reserved,0,reserved.length); + } + + /** + * We are of type 1009 + */ + public long getRecordType() { return _type; } + + /** + * Write the contents of the record back, so it can be written + * to disk + */ + public void writeOut(OutputStream out) throws IOException { + // Header + out.write(_header); + + // Slide ID + writeLittleEndian(slideID,out); + + // Flags + short flags = 0; + if(followMasterObjects) { flags += 1; } + if(followMasterScheme) { flags += 2; } + if(followMasterBackground) { flags += 4; } + writeLittleEndian(flags,out); + + // Reserved fields + out.write(reserved); + } +} diff --git a/src/scratchpad/src/org/apache/poi/hslf/record/PPDrawing.java b/src/scratchpad/src/org/apache/poi/hslf/record/PPDrawing.java new file mode 100644 index 0000000000..e19bc0a994 --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hslf/record/PPDrawing.java @@ -0,0 +1,191 @@ + +/* ==================================================================== + Copyright 2002-2004 Apache Software Foundation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + + +package org.apache.poi.hslf.record; + +import org.apache.poi.util.LittleEndian; + +import org.apache.poi.ddf.*; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.List; +import java.util.Vector; + +/** + * These are actually wrappers onto Escher drawings. Make use of + * the DDF classes to do useful things with them. + * For now, creates a tree of the Escher records, and then creates any + * PowerPoint (hslf) records found within the EscherTextboxRecord + * (msofbtClientTextbox) records. + * Also provides easy access to the EscherTextboxRecords, so that their + * text may be extracted and used in Sheets + * + * @author Nick Burch + */ + +// For now, pretending to be an atom. Might not always be, but that +// would require a wrapping class +public class PPDrawing extends RecordAtom +{ + private byte[] _header; + private long _type; + + private EscherRecord[] childRecords; + private EscherTextboxWrapper[] textboxWrappers; + + + /** + * Get access to the underlying Escher Records + */ + public EscherRecord[] getEscherRecords() { return childRecords; } + + /** + * Get access to the atoms inside Textboxes + */ + public EscherTextboxWrapper[] getTextboxWrappers() { return textboxWrappers; } + + + /* ******************** record stuff follows ********************** */ + + /** + * Sets everything up, groks the escher etc + */ + protected PPDrawing(byte[] source, int start, int len) { + // Get the header + _header = new byte[8]; + System.arraycopy(source,start,_header,0,8); + + // Get the type + _type = LittleEndian.getUShort(_header,2); + + // Get the contents for now + byte[] contents = new byte[len]; + System.arraycopy(source,start,contents,0,len); + + + // Build up a tree of Escher records contained within + DefaultEscherRecordFactory erf = new DefaultEscherRecordFactory(); + Vector escherChildren = new Vector(); + findEscherChildren(erf,contents,8,len-8,escherChildren); + + childRecords = new EscherRecord[escherChildren.size()]; + for(int i=0; i= 8) { + findEscherChildren(erf, source, startPos, lenToGo, found); + } + } + + /** + * Look for EscherTextboxRecords + */ + private void findEscherTextboxRecord(EscherRecord[] toSearch, Vector found) { + for(int i=0; i i) { + Vector thisSetsV = sortedSetsV[i]; + sets = new SlideAtomsSet[thisSetsV.size()]; + for(int j=0; j two notes, plus the notes on the slide master + assertEquals(3, notes.length); + } +} diff --git a/src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestNotesText.java b/src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestNotesText.java new file mode 100644 index 0000000000..e4d6590be4 --- /dev/null +++ b/src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestNotesText.java @@ -0,0 +1,61 @@ + +/* ==================================================================== + Copyright 2002-2004 Apache Software Foundation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + + + +package org.apache.poi.hslf.usermodel; + + +import junit.framework.TestCase; +import org.apache.poi.hslf.*; +import org.apache.poi.hslf.model.*; + +/** + * Tests that SlideShow returns MetaSheets which have the right text in them + * + * @author Nick Burch (nick at torchbox dot com) + */ +public class TestNotesText extends TestCase { + // SlideShow primed on the test data + private SlideShow ss; + + public TestNotesText() throws Exception { + String dirname = System.getProperty("HSLF.testdata.path"); + String filename = dirname + "/basic_test_ppt_file.ppt"; + HSLFSlideShow hss = new HSLFSlideShow(filename); + ss = new SlideShow(hss); + } + + public void testNotesOne() throws Exception { + Notes notes = ss.getNotes()[1]; + + String[] expectText = new String[] {"These are the notes for page 1"}; + assertEquals(expectText.length, notes.getTextRuns().length); + for(int i=0; i