--- /dev/null
+
+/* ====================================================================
+ Copyright 2002-2004 Apache Software Foundation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+
+
+package org.apache.poi.hslf;
+
+import java.util.*;
+import java.io.*;
+
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.poifs.filesystem.POIFSDocument;
+import org.apache.poi.poifs.filesystem.DocumentEntry;
+import org.apache.poi.poifs.filesystem.DocumentInputStream;
+
+import org.apache.poi.hpsf.PropertySet;
+import org.apache.poi.hpsf.PropertySetFactory;
+import org.apache.poi.hpsf.MutablePropertySet;
+import org.apache.poi.hpsf.SummaryInformation;
+import org.apache.poi.hpsf.DocumentSummaryInformation;
+
+import org.apache.poi.util.LittleEndian;
+
+import org.apache.poi.hslf.record.*;
+
+/**
+ * This class contains the main functionality for the Powerpoint file
+ * "reader". It is only a very basic class for now
+ *
+ * @author Nick Burch
+ */
+
+public class HSLFSlideShow
+{
+ private InputStream istream;
+ private POIFSFileSystem filesystem;
+
+ // Holds metadata on our document
+ private SummaryInformation sInf;
+ private DocumentSummaryInformation dsInf;
+ private CurrentUserAtom currentUser;
+
+ // Low level contents of the file
+ private byte[] _docstream;
+
+ // Low level contents
+ private Record[] _records;
+
+ /**
+ * Constructs a Powerpoint document from fileName. Parses the document
+ * and places all the important stuff into data structures.
+ *
+ * @param fileName The name of the file to read.
+ * @throws IOException if there is a problem while parsing the document.
+ */
+ public HSLFSlideShow(String fileName) throws IOException
+ {
+ this(new FileInputStream(fileName));
+ }
+
+ /**
+ * Constructs a Powerpoint document from an input stream. Parses the
+ * document and places all the important stuff into data structures.
+ *
+ * @param inputStream the source of the data
+ * @throws IOException if there is a problem while parsing the document.
+ */
+ public HSLFSlideShow(InputStream inputStream) throws IOException
+ {
+ //do Ole stuff
+ this(new POIFSFileSystem(inputStream));
+ istream = inputStream;
+ }
+
+ /**
+ * Constructs a Powerpoint document from a POIFS Filesystem. Parses the
+ * document and places all the important stuff into data structures.
+ *
+ * @param filesystem the POIFS FileSystem to read from
+ * @throws IOException if there is a problem while parsing the document.
+ */
+ public HSLFSlideShow(POIFSFileSystem filesystem) throws IOException
+ {
+ this.filesystem = filesystem;
+
+ // Go find a PowerPoint document in the stream
+ // Save anything useful we come across
+ readFIB();
+
+ // Look for Property Streams:
+ readProperties();
+ }
+
+
+ /**
+ * Shuts things down. Closes underlying streams etc
+ *
+ * @throws IOException
+ */
+ public void close() throws IOException
+ {
+ if(istream != null) {
+ istream.close();
+ }
+ filesystem = null;
+ }
+
+
+ /**
+ * Extracts the main document stream from the POI file then hands off
+ * to other functions that parse other areas.
+ *
+ * @throws IOException
+ */
+ private void readFIB() throws IOException
+ {
+ // Get the main document stream
+ DocumentEntry docProps =
+ (DocumentEntry)filesystem.getRoot().getEntry("PowerPoint Document");
+
+ // Grab the document stream
+ _docstream = new byte[docProps.getSize()];
+ filesystem.createDocumentInputStream("PowerPoint Document").read(_docstream);
+
+ // The format of records in a powerpoint file are:
+ // <little endian 2 byte "info">
+ // <little endian 2 byte "type">
+ // <little endian 4 byte "length">
+ // If it has a zero length, following it will be another record
+ // <xx xx yy yy 00 00 00 00> <xx xx yy yy zz zz zz zz>
+ // If it has a length, depending on its type it may have children or data
+ // If it has children, these will follow straight away
+ // <xx xx yy yy zz zz zz zz <xx xx yy yy zz zz zz zz>>
+ // If it has data, this will come straigh after, and run for the length
+ // <xx xx yy yy zz zz zz zz dd dd dd dd dd dd dd>
+ // All lengths given exclude the 8 byte record header
+ // (Data records are known as Atoms)
+
+ // Document should start with:
+ // 0F 00 E8 03 ## ## ## ##
+ // (type 1000 = document, info 00 0f is normal, rest is document length)
+ // 01 00 E9 03 28 00 00 00
+ // (type 1001 = document atom, info 00 01 normal, 28 bytes long)
+ // 80 16 00 00 E0 10 00 00 xx xx xx xx xx xx xx xx
+ // 05 00 00 00 0A 00 00 00 xx xx xx
+ // (the contents of the document atom, not sure what it means yet)
+ // (records then follow)
+
+ // When parsing a document, look to see if you know about that type
+ // of the current record. If you know it's a type that has children,
+ // process the record's data area looking for more records
+ // If you know about the type and it doesn't have children, either do
+ // something with the data (eg TextRun) or skip over it
+ // If you don't know about the type, play safe and skip over it (using
+ // its length to know where the next record will start)
+ //
+ // For now, this work is handled by Record.findChildRecords
+
+ _records = Record.findChildRecords(_docstream,0,_docstream.length);
+ }
+
+
+ /**
+ * Find the properties from the filesystem, and load them
+ */
+ public void readProperties() {
+ // DocumentSummaryInformation
+ dsInf = (DocumentSummaryInformation)getPropertySet("\005DocumentSummaryInformation");
+
+ // SummaryInformation
+ sInf = (SummaryInformation)getPropertySet("\005SummaryInformation");
+
+ // Current User
+ try {
+ currentUser = new CurrentUserAtom(filesystem);
+ } catch(IOException ie) {
+ System.err.println("Error finding Current User Atom:\n" + ie);
+ currentUser = new CurrentUserAtom();
+ }
+ }
+
+
+ /**
+ * For a given named property entry, either return it or null if
+ * if it wasn't found
+ */
+ public PropertySet getPropertySet(String setName) {
+ DocumentInputStream dis;
+ try {
+ // Find the entry, and get an input stream for it
+ dis = filesystem.createDocumentInputStream(setName);
+ } catch(IOException ie) {
+ // Oh well, doesn't exist
+ System.err.println("Error getting property set with name " + setName + "\n" + ie);
+ return null;
+ }
+
+ try {
+ // Create the Property Set
+ PropertySet set = PropertySetFactory.create(dis);
+ return set;
+ } catch(IOException ie) {
+ // Must be corrupt or something like that
+ System.err.println("Error creating property set with name " + setName + "\n" + ie);
+ } catch(org.apache.poi.hpsf.HPSFException he) {
+ // Oh well, doesn't exist
+ System.err.println("Error creating property set with name " + setName + "\n" + he);
+ }
+ return null;
+ }
+
+
+ /**
+ * Writes out the slideshow file the is represented by an instance of
+ * this class
+ * @param out The OutputStream to write to.
+ * @throws IOException If there is an unexpected IOException from the passed
+ * in OutputStream
+ */
+ public void write(OutputStream out) throws IOException {
+ // Get a new Filesystem to write into
+ POIFSFileSystem outFS = new POIFSFileSystem();
+
+ // Write out the Property Streams
+ if(sInf != null) {
+ writePropertySet("\005SummaryInformation",sInf,outFS);
+ }
+ if(dsInf != null) {
+ writePropertySet("\005DocumentSummaryInformation",dsInf,outFS);
+ }
+
+ // Need to take special care of PersistPtrHolder and UserEditAtoms
+ // Store where they used to be, and where they are now
+ Hashtable persistPtrHolderPos = new Hashtable();
+ Hashtable userEditAtomsPos = new Hashtable();
+ int lastUserEditAtomPos = -1;
+
+ // Write ourselves out
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ for(int i=0; i<_records.length; i++) {
+ // If it's a special record, record where it was and now is
+ if(_records[i] instanceof PersistPtrHolder) {
+ // Update position
+ PersistPtrHolder pph = (PersistPtrHolder)_records[i];
+ int oldPos = pph.getLastOnDiskOffset();
+ int newPos = baos.size();
+ pph.setLastOnDiskOffet(newPos);
+ persistPtrHolderPos.put(new Integer(oldPos),new Integer(newPos));
+ }
+ if(_records[i] instanceof UserEditAtom) {
+ // Update position
+ UserEditAtom uea = (UserEditAtom)_records[i];
+ int oldPos = uea.getLastOnDiskOffset();
+ int newPos = baos.size();
+ lastUserEditAtomPos = newPos;
+ uea.setLastOnDiskOffet(newPos);
+ userEditAtomsPos.put(new Integer(oldPos),new Integer(newPos));
+
+ // Update internal positions
+ if(uea.getLastUserEditAtomOffset() != 0) {
+ Integer ueNewPos = (Integer)userEditAtomsPos.get( new Integer( uea.getLastUserEditAtomOffset() ) );
+ uea.setLastUserEditAtomOffset(ueNewPos.intValue());
+ }
+ if(uea.getPersistPointersOffset() != 0) {
+ Integer ppNewPos = (Integer)persistPtrHolderPos.get( new Integer( uea.getPersistPointersOffset() ) );
+ uea.setPersistPointersOffset(ppNewPos.intValue());
+ }
+ }
+
+ // Finally, write out
+ _records[i].writeOut(baos);
+ }
+ ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
+ outFS.createDocument(bais,"PowerPoint Document");
+
+ // Update and write out the Current User atom
+ if(lastUserEditAtomPos != -1) {
+ currentUser.setCurrentEditOffset(lastUserEditAtomPos);
+ }
+ currentUser.writeToFS(outFS);
+
+ // Send the POIFSFileSystem object out
+ outFS.writeFilesystem(out);
+ }
+
+
+ /**
+ * Writes out a given ProperySet
+ */
+ private void writePropertySet(String name, PropertySet set, POIFSFileSystem fs) throws IOException {
+ try {
+ MutablePropertySet mSet = new MutablePropertySet(set);
+ ByteArrayOutputStream bOut = new ByteArrayOutputStream();
+ mSet.write(bOut);
+ byte[] data = bOut.toByteArray();
+ ByteArrayInputStream bIn = new ByteArrayInputStream(data);
+ fs.createDocument(bIn,name);
+ System.out.println("Wrote property set " + name + " of size " + data.length);
+ } catch(org.apache.poi.hpsf.WritingNotSupportedException wnse) {
+ System.err.println("Couldn't write property set with name " + name + " as not supported by HPSF yet");
+ }
+ }
+
+
+ /* ******************* fetching methods follow ********************* */
+
+
+ /**
+ * Returns an array of all the records found in the slideshow
+ */
+ public Record[] getRecords() { return _records; }
+
+ /**
+ * Returns an array of the bytes of the file. Only correct after a
+ * call to open or write - at all other times might be wrong!
+ */
+ public byte[] getUnderlyingBytes() { return _docstream; }
+
+ /**
+ * Fetch the Document Summary Information of the document
+ */
+ public DocumentSummaryInformation getDocumentSummaryInformation() { return dsInf; }
+
+ /**
+ * Fetch the Summary Information of the document
+ */
+ public SummaryInformation getSummaryInformation() { return sInf; }
+
+ /**
+ * Fetch the Current User Atom of the document
+ */
+ public CurrentUserAtom getCurrentUserAtom() { return currentUser; }
+}
--- /dev/null
+
+/* ====================================================================
+ Copyright 2002-2004 Apache Software Foundation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+
+
+package org.apache.poi.hslf.dev;
+
+import org.apache.poi.hslf.*;
+import org.apache.poi.hslf.model.*;
+import org.apache.poi.hslf.record.*;
+import org.apache.poi.hslf.usermodel.*;
+
+import java.io.*;
+
+/**
+ * Uses record level code to locate PPDrawing entries.
+ * Having found them, it sees if they have DDF Textbox records, and if so,
+ * searches those for text. Prints out any text it finds
+ */
+public class PPDrawingTextListing {
+ public static void main(String[] args) throws Exception {
+ if(args.length < 1) {
+ System.err.println("Need to give a filename");
+ System.exit(1);
+ }
+
+ HSLFSlideShow ss = new HSLFSlideShow(args[0]);
+
+ // Find PPDrawings at any second level position
+ Record[] records = ss.getRecords();
+ for(int i=0; i<records.length; i++) {
+ Record[] children = records[i].getChildRecords();
+ if(children != null && children.length != 0) {
+ for(int j=0; j<children.length; j++) {
+ if(children[j] instanceof PPDrawing) {
+ System.out.println("Found PPDrawing at " + j + " in top level record " + i + " (" + records[i].getRecordType() + ")" );
+
+ // Look for EscherTextboxWrapper's
+ PPDrawing ppd = (PPDrawing)children[j];
+ EscherTextboxWrapper[] wrappers = ppd.getTextboxWrappers();
+ System.out.println(" Has " + wrappers.length + " textbox wrappers within");
+
+ // Loop over the wrappers, showing what they contain
+ for(int k=0; k<wrappers.length; k++) {
+ EscherTextboxWrapper tbw = wrappers[k];
+ System.out.println(" " + k + " has " + tbw.getChildRecords().length + " PPT atoms within");
+
+ // Loop over the records, printing the text
+ Record[] pptatoms = tbw.getChildRecords();
+ for(int l=0; l<pptatoms.length; l++) {
+ String text = null;
+ if(pptatoms[l] instanceof TextBytesAtom) {
+ TextBytesAtom tba = (TextBytesAtom)pptatoms[l];
+ text = tba.getText();
+ }
+ if(pptatoms[l] instanceof TextCharsAtom) {
+ TextCharsAtom tca = (TextCharsAtom)pptatoms[l];
+ text = tca.getText();
+ }
+
+ if(text != null) {
+ text = text.replace('\r','\n');
+ System.out.println(" ''" + text + "''");
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
--- /dev/null
+
+/* ====================================================================
+ Copyright 2002-2004 Apache Software Foundation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+
+
+package org.apache.poi.hslf.dev;
+
+import org.apache.poi.hslf.*;
+import org.apache.poi.hslf.model.*;
+import org.apache.poi.hslf.record.*;
+import org.apache.poi.hslf.usermodel.*;
+
+import java.io.*;
+
+/**
+ * Uses record level code to locate SlideListWithText entries.
+ * Having found them, it sees if they have any text, and prints out
+ * what it finds.
+ */
+public class SLWTTextListing {
+ public static void main(String[] args) throws Exception {
+ if(args.length < 1) {
+ System.err.println("Need to give a filename");
+ System.exit(1);
+ }
+
+ HSLFSlideShow ss = new HSLFSlideShow(args[0]);
+
+ // Find the documents, and then their SLWT
+ Record[] records = ss.getRecords();
+ for(int i=0; i<records.length; i++) {
+ if(records[i].getRecordType() == 1000l) {
+ Record docRecord = records[i];
+ Record[] docChildren = docRecord.getChildRecords();
+ for(int j=0; j<docChildren.length; j++) {
+ if(docChildren[j] instanceof SlideListWithText) {
+ System.out.println("Found SLWT in document at " + i);
+ System.out.println(" Has " + docChildren[j].getChildRecords().length + " children");
+
+ // Grab the SlideAtomSet's, which contain
+ // a SlidePersistAtom and then a bunch of text
+ // + related records
+ SlideListWithText slwt = (SlideListWithText)docChildren[j];
+ SlideListWithText.SlideAtomsSet[] thisSets = slwt.getSlideAtomsSets();
+ System.out.println(" Has " + thisSets.length + " AtomSets in it");
+
+ // Loop over the sets, showing what they contain
+ for(int k=0; k<thisSets.length; k++) {
+ SlidePersistAtom spa = thisSets[k].getSlidePersistAtom();
+ System.out.println(" " + k + " has slide id " + spa.getSlideIdentifier() );
+ System.out.println(" " + k + " has ref id " + spa.getRefID() );
+
+ // Loop over the records, printing the text
+ Record[] slwtc = thisSets[k].getSlideRecords();
+ for(int l=0; l<slwtc.length; l++) {
+ String text = null;
+ if(slwtc[l] instanceof TextBytesAtom) {
+ TextBytesAtom tba = (TextBytesAtom)slwtc[l];
+ text = tba.getText();
+ }
+ if(slwtc[l] instanceof TextCharsAtom) {
+ TextCharsAtom tca = (TextCharsAtom)slwtc[l];
+ text = tca.getText();
+ }
+
+ if(text != null) {
+ text = text.replace('\r','\n');
+ System.out.println(" ''" + text + "''");
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
--- /dev/null
+
+/* ====================================================================
+ Copyright 2002-2004 Apache Software Foundation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+
+
+package org.apache.poi.hslf.dev;
+
+import org.apache.poi.hslf.*;
+import org.apache.poi.hslf.record.*;
+
+import java.io.*;
+
+/**
+ * Uses record level code to locate Notes and Slide records.
+ * Having found them, it asks their SlideAtom or NotesAtom entries
+ * what they are all about. Useful for checking the matching between
+ * Slides, Master Slides and Notes
+ */
+public class SlideAndNotesAtomListing {
+ public static void main(String[] args) throws Exception {
+ if(args.length < 1) {
+ System.err.println("Need to give a filename");
+ System.exit(1);
+ }
+
+ HSLFSlideShow ss = new HSLFSlideShow(args[0]);
+ System.out.println("");
+
+ // Find either Slides or Notes
+ Record[] records = ss.getRecords();
+ for(int i=0; i<records.length; i++) {
+ Record r = records[i];
+
+ // When we find them, print out their IDs
+ if(r instanceof Slide) {
+ Slide s = (Slide)r;
+ SlideAtom sa = s.getSlideAtom();
+ System.out.println("Found Slide at " + i);
+ System.out.println(" Slide's master ID is " + sa.getMasterID());
+ System.out.println(" Slide's notes ID is " + sa.getNotesID());
+ System.out.println("");
+ }
+ if(r instanceof Notes) {
+ Notes n = (Notes)r;
+ NotesAtom na = n.getNotesAtom();
+ System.out.println("Found Notes at " + i);
+ System.out.println(" Notes ID is " + na.getSlideID());
+ System.out.println("");
+ }
+ }
+ }
+}
--- /dev/null
+
+/* ====================================================================
+ Copyright 2002-2004 Apache Software Foundation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+
+
+package org.apache.poi.hslf.dev;
+
+import java.util.*;
+import java.io.*;
+
+import org.apache.poi.ddf.*;
+import org.apache.poi.hslf.*;
+import org.apache.poi.hslf.record.*;
+
+import org.apache.poi.util.LittleEndian;
+
+/**
+ * This class provides a way to view the contents of a powerpoint file.
+ * It will use the recored layer to grok the contents of the file, and
+ * will print out what it finds.
+ *
+ * @author Nick Burch
+ */
+
+public class SlideShowRecordDumper
+{
+ private HSLFSlideShow doc;
+
+ /**
+ * right now this function takes one parameter: a ppt file, and outputs
+ * a dump of what it contains
+ */
+ public static void main(String args[]) throws IOException
+ {
+ if(args.length == 0) {
+ System.err.println("Useage: SlideShowDumper <filename>");
+ return;
+ }
+
+ String filename = args[0];
+
+ SlideShowRecordDumper foo = new SlideShowRecordDumper(filename);
+
+ foo.printDump();
+ foo.close();
+ }
+
+
+ /**
+ * Constructs a Powerpoint dump from fileName. Parses the document
+ * and dumps out the contents
+ *
+ * @param fileName The name of the file to read.
+ * @throws IOException if there is a problem while parsing the document.
+ */
+ public SlideShowRecordDumper(String fileName) throws IOException
+ {
+ doc = new HSLFSlideShow(fileName);
+ }
+
+ /**
+ * Shuts things down. Closes underlying streams etc
+ *
+ * @throws IOException
+ */
+ public void close() throws IOException
+ {
+ if(doc != null) {
+ doc.close();
+ }
+ doc = null;
+ }
+
+
+ public void printDump() throws IOException {
+ // Prints out the records in the tree
+ walkTree(0,0,doc.getRecords());
+ }
+
+ public String makeHex(int number, int padding) {
+ String hex = Integer.toHexString(number).toUpperCase();
+ while(hex.length() < padding) {
+ hex = "0" + hex;
+ }
+ return hex;
+ }
+
+ public String reverseHex(String s) {
+ StringBuffer ret = new StringBuffer();
+
+ // Get to a multiple of two
+ if((s.length() / 2) * 2 != s.length()) { s = "0" + s; }
+
+ // Break up into blocks
+ char[] c = s.toCharArray();
+ for(int i=c.length; i>0; i-=2) {
+ ret.append(c[i-2]);
+ ret.append(c[i-1]);
+ if(i != 2) { ret.append(' '); }
+ }
+ return ret.toString();
+ }
+
+ public int getDiskLen(Record r) throws IOException {
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ r.writeOut(baos);
+ byte[] b = baos.toByteArray();
+ return b.length;
+ }
+
+
+ public void walkTree(int depth, int pos, Record[] records) throws IOException {
+ int indent = depth;
+ String ind = "";
+ for(int i=0; i<indent; i++) { ind += " "; }
+
+ for(int i=0; i<records.length; i++) {
+ Record r = records[i];
+
+ // Figure out how big it is
+ int len = getDiskLen(r);
+
+ // Grab the type as hex
+ String hexType = makeHex((int)r.getRecordType(),4);
+ String rHexType = reverseHex(hexType);
+
+ // Grab the hslf.record type
+ Class c = r.getClass();
+ String cname = c.toString();
+ if(cname.startsWith("class ")) {
+ cname = cname.substring(6);
+ }
+ if(cname.startsWith("org.apache.poi.hslf.record.")) {
+ cname = cname.substring(27);
+ }
+
+ // Display the record
+ System.out.println(ind + "At position " + pos + " (" + makeHex(pos,6) + "):");
+ System.out.println(ind + " Record is of type " + cname);
+ System.out.println(ind + " Type is " + r.getRecordType() + " (" + hexType + " -> " + rHexType + " )");
+ System.out.println(ind + " Len is " + (len-8) + " (" + makeHex((len-8),8) + "), on disk len is " + len );
+ System.out.println();
+
+ // If it has children, show them
+ if(r.getChildRecords() != null) {
+ walkTree((depth+3),pos+8,r.getChildRecords());
+ }
+
+ // Wind on the position marker
+ pos += len;
+ }
+ }
+}
--- /dev/null
+
+/* ====================================================================
+ Copyright 2002-2004 Apache Software Foundation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+
+
+package org.apache.poi.hslf.dev;
+
+import org.apache.poi.hslf.*;
+import org.apache.poi.hslf.record.*;
+
+import java.io.*;
+
+/**
+ * Uses record level code to locate UserEditAtom records, and other
+ * persistence related atoms. Tries to match them together, to help
+ * illuminate quite what all the offsets mean
+ */
+public class UserEditAndPersistListing {
+ public static void main(String[] args) throws Exception {
+ if(args.length < 1) {
+ System.err.println("Need to give a filename");
+ System.exit(1);
+ }
+
+ HSLFSlideShow ss = new HSLFSlideShow(args[0]);
+ System.out.println("");
+
+ // Find any persist ones first
+ Record[] records = ss.getRecords();
+ int pos = 0;
+ for(int i=0; i<records.length; i++) {
+ Record r = records[i];
+
+ if(r.getRecordType() == 6001l) {
+ // PersistPtrFullBlock
+ System.out.println("Found PersistPtrFullBlock at " + pos + " (" + Integer.toHexString(pos) + ")");
+ }
+ if(r.getRecordType() == 6002l) {
+ // PersistPtrIncrementalBlock
+ System.out.println("Found PersistPtrIncrementalBlock at " + pos + " (" + Integer.toHexString(pos) + ")");
+ }
+
+ // Increase the position by the on disk size
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ r.writeOut(baos);
+ pos += baos.size();
+ }
+
+ System.out.println("");
+
+ pos = 0;
+ // Now look for UserEditAtoms
+ for(int i=0; i<records.length; i++) {
+ Record r = records[i];
+
+ if(r instanceof UserEditAtom) {
+ UserEditAtom uea = (UserEditAtom)r;
+ System.out.println("Found UserEditAtom at " + pos + " (" + Integer.toHexString(pos) + ")");
+ System.out.println(" lastUserEditAtomOffset = " + uea.getLastUserEditAtomOffset() );
+ System.out.println(" persistPointersOffset = " + uea.getPersistPointersOffset() );
+ System.out.println(" docPersistRef = " + uea.getDocPersistRef() );
+ System.out.println(" maxPersistWritten = " + uea.getMaxPersistWritten() );
+ }
+
+ // Increase the position by the on disk size
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ r.writeOut(baos);
+ pos += baos.size();
+ }
+
+ System.out.println("");
+
+
+ // Query the CurrentUserAtom
+ CurrentUserAtom cua = ss.getCurrentUserAtom();
+ System.out.println("Checking Current User Atom");
+ System.out.println(" Thinks the CurrentEditOffset is " + cua.getCurrentEditOffset());
+
+ System.out.println("");
+ }
+}
--- /dev/null
+
+/* ====================================================================
+ Copyright 2002-2004 Apache Software Foundation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+
+
+package org.apache.poi.hslf.exceptions;
+
+/**
+ * This exception is thrown when we try to create a record, and the
+ * underlying data just doesn't match up
+ *
+ * @author Nick Burch
+ */
+
+public class InvalidRecordFormatException extends Exception
+{
+ public InvalidRecordFormatException(String s) {
+ super(s);
+ }
+}
--- /dev/null
+
+/* ====================================================================
+ Copyright 2002-2004 Apache Software Foundation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+
+
+package org.apache.poi.hslf.extractor;
+
+import java.io.*;
+import java.util.HashSet;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.hslf.*;
+import org.apache.poi.hslf.model.*;
+import org.apache.poi.hslf.usermodel.*;
+
+/**
+ * This class can be used to extract text from a PowerPoint file.
+ * Can optionally also get the notes from one.
+ *
+ * @author Nick Burch
+ */
+
+public class PowerPointExtractor
+{
+ private HSLFSlideShow _hslfshow;
+ private SlideShow _show;
+ private Slide[] _slides;
+ private Notes[] _notes;
+
+ /**
+ * Basic extractor. Returns all the text, and optionally all the notes
+ */
+ public static void main(String args[]) throws IOException
+ {
+ if(args.length < 1) {
+ System.err.println("Useage:");
+ System.err.println("\tPowerPointExtractor [-notes] <file>");
+ System.exit(1);
+ }
+
+ boolean notes = false;
+ String file;
+ if(args.length > 1) {
+ notes = true;
+ file = args[1];
+ } else {
+ file = args[0];
+ }
+
+ PowerPointExtractor ppe = new PowerPointExtractor(file);
+ System.out.println(ppe.getText(true,notes));
+ ppe.close();
+ }
+
+ /**
+ * Creates a PowerPointExtractor
+ * @param fileName
+ */
+ public PowerPointExtractor(String fileName) throws IOException {
+ _hslfshow = new HSLFSlideShow(fileName);
+ _show = new SlideShow(_hslfshow);
+ _slides = _show.getSlides();
+ _notes = _show.getNotes();
+ }
+
+ /**
+ * Creates a PowerPointExtractor
+ * @param iStream
+ */
+ public PowerPointExtractor(InputStream iStream) throws IOException {
+ _hslfshow = new HSLFSlideShow(iStream);
+ _show = new SlideShow(_hslfshow);
+ _slides = _show.getSlides();
+ _notes = _show.getNotes();
+ }
+
+ /**
+ * Creates a PowerPointExtractor
+ * @param fs
+ */
+ public PowerPointExtractor(POIFSFileSystem fs) throws IOException {
+ _hslfshow = new HSLFSlideShow(fs);
+ _show = new SlideShow(_hslfshow);
+ _slides = _show.getSlides();
+ _notes = _show.getNotes();
+ }
+
+
+ /**
+ * Shuts down the underlying streams
+ */
+ public void close() throws IOException {
+ _hslfshow.close();
+ _hslfshow = null;
+ _show = null;
+ _slides = null;
+ _notes = null;
+ }
+
+
+ /**
+ * Fetches all the slide text from the slideshow, but not the notes
+ */
+ public String getText() {
+ return getText(true,false);
+ }
+
+ /**
+ * Fetches all the notes text from the slideshow, but not the slide text
+ */
+ public String getNotes() {
+ return getText(false,true);
+ }
+
+ /**
+ * Fetches text from the slideshow, be it slide text or note text
+ * @param getSlideText fetch slide text
+ * @param getNoteText fetch note text
+ */
+ public String getText(boolean getSlideText, boolean getNoteText) {
+ StringBuffer ret = new StringBuffer();
+
+ if(getSlideText) {
+ for(int i=0; i<_slides.length; i++) {
+ Slide slide = _slides[i];
+ TextRun[] runs = slide.getTextRuns();
+ for(int j=0; j<runs.length; j++) {
+ TextRun run = runs[j];
+ String text = run.getText();
+ ret.append(text);
+ if(! text.endsWith("\n")) {
+ ret.append("\n");
+ }
+ }
+ }
+ if(getNoteText) {
+ ret.append(" ");
+ }
+ }
+
+ if(getNoteText) {
+ // Not currently using _notes, as that can have the notes of
+ // master sheets in. Grab Slide list, then work from there,
+ // but ensure no duplicates
+ HashSet seenNotes = new HashSet();
+ for(int i=0; i<_slides.length; i++) {
+ Notes notes = _slides[i].getNotesSheet();
+ if(notes == null) { continue; }
+ Integer id = new Integer(notes.getSheetNumber());
+ if(seenNotes.contains(id)) { continue; }
+ seenNotes.add(id);
+
+ TextRun[] runs = notes.getTextRuns();
+ if(runs != null && runs.length > 0) {
+ for(int j=0; j<runs.length; j++) {
+ TextRun run = runs[j];
+ String text = run.getText();
+ ret.append(text);
+ if(! text.endsWith("\n")) {
+ ret.append("\n");
+ }
+ }
+ }
+ }
+ }
+
+ return ret.toString();
+ }
+}
--- /dev/null
+
+/* ====================================================================
+ Copyright 2002-2004 Apache Software Foundation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+
+
+package org.apache.poi.hslf.model;
+
+import java.util.*;
+
+import org.apache.poi.hslf.record.*;
+import org.apache.poi.hslf.record.SlideListWithText.*;
+import org.apache.poi.util.LittleEndian;
+
+/**
+ * This class represents a slide's notes in a PowerPoint Document. It
+ * allows access to the text within, and the layout. For now, it only
+ * does the text side of things though
+ *
+ * @author Nick Burch
+ */
+
+public class Notes extends Sheet
+{
+
+ private int _sheetNo;
+ private org.apache.poi.hslf.record.Notes _notes;
+ private TextRun[] _runs;
+
+ /**
+ * Constructs a Notes Sheet from the given Notes record.
+ * Initialises TextRuns, to provide easier access to the text
+ *
+ * @param notes the Notes record to read from
+ */
+ public Notes (org.apache.poi.hslf.record.Notes notes) {
+ _notes = notes;
+
+ // Grab the sheet number, via the NotesAtom
+ _sheetNo = _notes.getNotesAtom().getSlideID();
+
+ // Now, build up TextRuns from pairs of TextHeaderAtom and
+ // one of TextBytesAtom or TextCharsAtom, found inside
+ // EscherTextboxWrapper's in the PPDrawing
+ _runs = findTextRuns(_notes.getPPDrawing());
+ }
+
+
+ // Accesser methods follow
+
+ /**
+ * Returns an array of all the TextRuns found
+ */
+ public TextRun[] getTextRuns() { return _runs; }
+
+ /**
+ * Returns the sheet number
+ */
+ public int getSheetNumber() { return _sheetNo; }
+}
--- /dev/null
+
+/* ====================================================================
+ Copyright 2002-2004 Apache Software Foundation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+
+
+package org.apache.poi.hslf.model;
+
+import org.apache.poi.hslf.record.*;
+import java.util.Vector;
+
+/**
+ * This class defines the common format of "Sheets" in a powerpoint
+ * document. Such sheets could be Slides, Notes, Master etc
+ *
+ * @author Nick Burch
+ */
+
+public abstract class Sheet
+{
+ /**
+ * Returns an array of all the TextRuns in the sheet.
+ */
+ public abstract TextRun[] getTextRuns();
+
+ /**
+ * Returns the sheet number
+ */
+ public abstract int getSheetNumber();
+
+ /**
+ * For a given PPDrawing, grab all the TextRuns
+ */
+ public static TextRun[] findTextRuns(PPDrawing ppdrawing) {
+ Vector runsV = new Vector();
+ EscherTextboxWrapper[] wrappers = ppdrawing.getTextboxWrappers();
+ for(int i=0; i<wrappers.length; i++) {
+ findTextRuns(wrappers[i].getChildRecords(),runsV);
+ }
+ TextRun[] runs = new TextRun[runsV.size()];
+ for(int i=0; i<runs.length; i++) {
+ runs[i] = (TextRun)runsV.get(i);
+ }
+ return runs;
+ }
+
+ /**
+ * Scans through the supplied record array, looking for
+ * a TextHeaderAtom followed by one of a TextBytesAtom or
+ * a TextCharsAtom. Builds up TextRuns from these
+ *
+ * @param records the records to build from
+ * @param found vector to add any found to
+ */
+ protected static void findTextRuns(Record[] records, Vector found) {
+ // Look for a TextHeaderAtom
+ for(int i=0; i<(records.length-1); i++) {
+ if(records[i] instanceof TextHeaderAtom) {
+ TextRun trun = null;
+ TextHeaderAtom tha = (TextHeaderAtom)records[i];
+ if(records[i+1] instanceof TextCharsAtom) {
+ TextCharsAtom tca = (TextCharsAtom)records[i+1];
+ trun = new TextRun(tha,tca);
+ } else if(records[i+1] instanceof TextBytesAtom) {
+ TextBytesAtom tba = (TextBytesAtom)records[i+1];
+ trun = new TextRun(tha,tba);
+ } else if(records[i+1].getRecordType() == 4010l) {
+ // Safe to ignore
+ } else {
+ System.err.println("Found a TextHeaderAtom not followed by a TextBytesAtom or TextCharsAtom: Followed by " + records[i+1].getRecordType());
+ continue;
+ }
+ found.add(trun);
+ i++;
+ }
+ }
+ }
+
+}
--- /dev/null
+
+/* ====================================================================
+ Copyright 2002-2004 Apache Software Foundation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+
+
+package org.apache.poi.hslf.model;
+
+import java.util.*;
+
+import org.apache.poi.hslf.record.*;
+import org.apache.poi.hslf.record.SlideListWithText.*;
+import org.apache.poi.util.LittleEndian;
+
+/**
+ * This class represents a slide in a PowerPoint Document. It allows
+ * access to the text within, and the layout. For now, it only does
+ * the text side of things though
+ *
+ * @author Nick Burch
+ */
+
+public class Slide extends Sheet
+{
+
+ private int _sheetNo;
+ private org.apache.poi.hslf.record.Slide _slide;
+ private SlideAtomsSet[] _atomSet;
+ private TextRun[] _runs;
+ private TextRun[] _otherRuns; // Any from the PPDrawing, shouldn't really be any though
+ private Notes _notes;
+
+ /**
+ * Constructs a Slide from the Slide record, and the SlideAtomsSets
+ * for ones not embeded in the PPDrawing.
+ * Initialises TextRuns, to provide easier access to the text
+ *
+ * @param slide the Slide record we're based on
+ * @param atomSet the SlideAtomsSet to get the text from
+ */
+ public Slide(org.apache.poi.hslf.record.Slide slide, Notes notes, SlideAtomsSet[] atomSet) {
+ _slide = slide;
+ _notes = notes;
+ _atomSet = atomSet;
+
+ // Grab the sheet number
+ //_sheetNo = _slide.getSlideAtom().getSheetNumber();
+ _sheetNo = -1;
+
+ // Grab the TextRuns from the PPDrawing
+ _otherRuns = findTextRuns(_slide.getPPDrawing());
+
+
+ // Ensure we've only got only copy of each SlideAtomSet
+ // When in doubt, prefere the later one
+ Hashtable seenSets = new Hashtable();
+ Vector useSets = new Vector();
+ for(int i=0; i<_atomSet.length; i++) {
+ SlideAtomsSet set = _atomSet[i];
+ int id = set.getSlidePersistAtom().getRefID();
+ Integer idI = new Integer(id);
+ if(seenSets.containsKey(idI)) {
+ // Replace old one
+ Integer replacePos = (Integer)seenSets.get(idI);
+ useSets.set(replacePos.intValue(),set);
+ } else {
+ // Use for now
+ useSets.add(set);
+ seenSets.put(idI,new Integer(useSets.size()-1));
+ }
+ }
+
+ // For the text coming in from the SlideAtomsSet:
+ // Build up TextRuns from pairs of TextHeaderAtom and
+ // one of TextBytesAtom or TextCharsAtom
+ Vector runSets = new Vector();
+ for(int i=0; i<useSets.size(); i++) {
+ SlideAtomsSet set = (SlideAtomsSet)useSets.get(i);
+ findTextRuns(set.getSlideRecords(),runSets);
+ }
+ // Build an array, more useful than a vector
+ _runs = new TextRun[runSets.size()];
+ for(int i=0; i<_runs.length; i++) {
+ _runs[i] = (TextRun)runSets.get(i);
+ }
+ }
+
+
+ // Accesser methods follow
+
+ /**
+ * Returns an array of all the TextRuns found
+ */
+ public TextRun[] getTextRuns() { return _runs; }
+
+ /**
+ * Returns the sheet number
+ */
+ public int getSheetNumber() { return _sheetNo; }
+
+ /**
+ * Returns the Notes Sheet for this slide, or null if there isn't one
+ */
+ public Notes getNotesSheet() { return _notes; }
+}
--- /dev/null
+
+/* ====================================================================
+ Copyright 2002-2004 Apache Software Foundation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+
+
+package org.apache.poi.hslf.model;
+
+import org.apache.poi.hslf.record.*;
+import org.apache.poi.util.StringUtil;
+
+/**
+ * This class represents a run of text in a powerpoint document. That
+ * run could be text on a sheet, or text in a note.
+ * It is only a very basic class for now
+ *
+ * @author Nick Burch
+ */
+
+public class TextRun
+{
+ private TextHeaderAtom _headerAtom;
+ private TextBytesAtom _byteAtom;
+ private TextCharsAtom _charAtom;
+ private boolean _isUnicode;
+
+ /**
+ * Constructs a Text Run from a Unicode text block
+ *
+ * @param tha the TextHeaderAtom that defines what's what
+ * @param tca the TextCharsAtom containing the text
+ */
+ public TextRun(TextHeaderAtom tha, TextCharsAtom tca) {
+ _headerAtom = tha;
+ _charAtom = tca;
+ _isUnicode = true;
+ }
+
+ /**
+ * Constructs a Text Run from a Ascii text block
+ *
+ * @param tha the TextHeaderAtom that defines what's what
+ * @param tba the TextBytesAtom containing the text
+ */
+ public TextRun(TextHeaderAtom tha, TextBytesAtom tba) {
+ _headerAtom = tha;
+ _byteAtom = tba;
+ _isUnicode = false;
+ }
+
+
+ // Accesser methods follow
+
+ /**
+ * Returns the text content of the run, which has been made safe
+ * for printing and other use.
+ */
+ public String getText() {
+ String rawText = getRawText();
+
+ // PowerPoint seems to store files with \r as the line break
+ // The messes things up on everything but a Mac, so translate
+ // them to \n
+ String text = rawText.replace('\r','\n');
+ return text;
+ }
+
+ /**
+ * Returns the raw text content of the run. This hasn't had any
+ * changes applied to it, and so is probably unlikely to print
+ * out nicely.
+ */
+ public String getRawText() {
+ if(_isUnicode) {
+ return _charAtom.getText();
+ } else {
+ return _byteAtom.getText();
+ }
+ }
+
+ /**
+ * Changes the text. Chance are, this won't work just yet, because
+ * we also need to update some other bits of the powerpoint file
+ * to match the change in the Text Atom, especially byte offsets
+ */
+ public void setText(String s) {
+ // If size changed, warn
+ if(s.length() != getText().length()) {
+ System.err.println("Warning: Your powerpoint file is probably no longer readable by powerpoint, as the text run has changed size!");
+ }
+
+ if(_isUnicode) {
+ // The atom can safely convert to unicode
+ _charAtom.setText(s);
+ } else {
+ // Will it fit in a 8 bit atom?
+ boolean hasMultibyte = StringUtil.hasMultibyte(s);
+ if(! hasMultibyte) {
+ // Fine to go into 8 bit atom
+ byte[] text = new byte[s.length()];
+ StringUtil.putCompressedUnicode(s,text,0);
+ _byteAtom.setText(text);
+ } else {
+ throw new RuntimeException("Setting of unicode text is currently only possible for Text Runs that are Unicode in the file, sorry. For now, please convert that text to us-ascii and re-try it");
+ }
+ }
+
+ }
+
+ /**
+ * Returns the type of the text, from the TextHeaderAtom.
+ * Possible values can be seen from TextHeaderAtom
+ * @see org.apache.poi.hslf.record.TextHeaderAtom
+ */
+ public int getRunType() {
+ return _headerAtom.getTextType();
+ }
+
+ /**
+ * Changes the type of the text. Values should be taken
+ * from TextHeaderAtom. No checking is done to ensure you
+ * set this to a valid value!
+ * @see org.apache.poi.hslf.record.TextHeaderAtom
+ */
+ public void setRunType(int type) {
+ _headerAtom.setTextType(type);
+ }
+}
--- /dev/null
+
+/* ====================================================================
+ Copyright 2002-2004 Apache Software Foundation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+
+
+package org.apache.poi.hslf.record;
+
+import java.io.*;
+import org.apache.poi.poifs.filesystem.*;
+import org.apache.poi.util.LittleEndian;
+import org.apache.poi.util.StringUtil;
+
+
+/**
+ * This is a special kind of Atom, becauase it doesn't live inside the
+ * PowerPoint document. Instead, it lives in a seperate stream in the
+ * document. As such, it has to be treaded specially
+ *
+ * @author Nick Burch
+ */
+
+public class CurrentUserAtom
+{
+ /** Standard Atom header */
+ public static final byte[] atomHeader = new byte[] { 0, 0, -10, 15 };
+ /** The Powerpoint magic numer */
+ public static final byte[] magicNumber = new byte[] { 95, -64, -111, -29 };
+ /** The Powerpoint 97 version, major and minor numbers */
+ public static final byte[] ppt97FileVer = new byte[] { 8, 00, -13, 03, 03, 00 };
+
+ /** The version, major and minor numbers */
+ private int docFinalVersionA;
+ private int docFinalVersionB;
+ private byte docMajorNo;
+ private byte docMinorNo;
+
+ /** The Offset into the file for the current edit */
+ private long currentEditOffset;
+ /** The Username of the last person to edit the file */
+ private String lastEditUser;
+ /** The document release version */
+ private long releaseVersion;
+
+ /** Only correct after reading in or writing out */
+ private byte[] _contents;
+
+
+ /* ********************* getter/setter follows *********************** */
+
+ public int getDocFinalVersionA() { return docFinalVersionA; }
+ public int getDocFinalVersionB() { return docFinalVersionB; }
+ public byte getDocMajorNo() { return docMajorNo; }
+ public byte getDocMinorNo() { return docMinorNo; }
+
+ public long getReleaseVersion() { return releaseVersion; }
+ public void setReleaseVersion(long rv) { releaseVersion = rv; }
+
+ /** Points to the UserEditAtom */
+ public long getCurrentEditOffset() { return currentEditOffset; }
+ public void setCurrentEditOffset(long id ) { currentEditOffset = id; }
+
+ public String getLastEditUsername() { return lastEditUser; }
+ public void setLastEditUsername(String u) { lastEditUser = u; }
+
+
+ /* ********************* real code follows *************************** */
+
+ /**
+ * Create a new Current User Atom
+ */
+ public CurrentUserAtom() {
+ _contents = new byte[0];
+ throw new RuntimeException("Creation support for Current User Atom not complete");
+ }
+
+ /**
+ * Find the Current User in the filesystem, and create from that
+ */
+ public CurrentUserAtom(POIFSFileSystem fs) throws IOException {
+ // Decide how big it is
+ DocumentEntry docProps =
+ (DocumentEntry)fs.getRoot().getEntry("Current User");
+ _contents = new byte[docProps.getSize()];
+
+ // Grab the contents
+ InputStream in = fs.createDocumentInputStream("Current User");
+ in.read(_contents);
+
+ // Set everything up
+ init();
+ }
+
+ /**
+ * Create things from the bytes
+ */
+ public CurrentUserAtom(byte[] b) {
+ _contents = b;
+ init();
+ }
+
+ /**
+ * Actually do the creation from a block of bytes
+ */
+ private void init() {
+ // Grab the edit offset
+ currentEditOffset = LittleEndian.getUInt(_contents,16);
+
+ // Grab the versions
+ docFinalVersionA = LittleEndian.getUShort(_contents,20);
+ docFinalVersionB = LittleEndian.getUShort(_contents,22);
+ docMajorNo = _contents[24];
+ docMinorNo = _contents[25];
+
+ // Get the username length
+ long usernameLen = LittleEndian.getUShort(_contents,20);
+
+ // Use this to grab the revision
+ releaseVersion = LittleEndian.getUInt(_contents,28+(int)usernameLen);
+
+ // Grab the unicode username, if stored
+ int start = 28+(int)usernameLen+4;
+ int len = 2*(int)usernameLen;
+
+ if(_contents.length >= start+len) {
+ byte[] textBytes = new byte[len];
+ System.arraycopy(_contents,start,textBytes,0,len);
+ lastEditUser = StringUtil.getFromUnicodeLE(textBytes);
+ } else {
+ // Fake from the 8 bit version
+ byte[] textBytes = new byte[(int)usernameLen];
+ System.arraycopy(_contents,28,textBytes,0,(int)usernameLen);
+ lastEditUser = StringUtil.getFromCompressedUnicode(textBytes,0,(int)usernameLen);
+ }
+ }
+
+
+ /**
+ * Writes ourselves back out
+ */
+ public void writeOut(OutputStream out) throws IOException {
+ // Decide on the size
+ // 8 = atom header
+ // 20 = up to name
+ // 4 = revision
+ // 3 * len = ascii + unicode
+ int size = 8 + 20 + 4 + (3 * lastEditUser.length());
+ _contents = new byte[size];
+
+ // First we have a 8 byte atom header
+ System.arraycopy(atomHeader,0,_contents,0,4);
+ // Size is 20+user len + revision len(4)
+ int atomSize = 20+4+lastEditUser.length();
+ LittleEndian.putInt(_contents,4,atomSize);
+
+ // Now we have the size of the details, which is 20
+ LittleEndian.putInt(_contents,8,20);
+
+ // Now the ppt magic number (4 bytes)
+ System.arraycopy(magicNumber,0,_contents,12,4);
+
+ // Now the current edit offset
+ LittleEndian.putInt(_contents,16,(int)currentEditOffset);
+
+ // Now the file versions, 2+2+1+1
+ LittleEndian.putShort(_contents,20,(short)docFinalVersionA);
+ LittleEndian.putShort(_contents,22,(short)docFinalVersionB);
+ _contents[24] = docMajorNo;
+ _contents[25] = docMinorNo;
+
+ // 2 bytes blank
+ _contents[26] = 0;
+ _contents[27] = 0;
+
+ // username in bytes in us ascii
+ byte[] asciiUN = new byte[lastEditUser.length()];
+ StringUtil.putCompressedUnicode(lastEditUser,asciiUN,0);
+ System.arraycopy(asciiUN,0,_contents,28,asciiUN.length);
+
+ // 4 byte release version
+ LittleEndian.putInt(_contents,28+asciiUN.length,(int)releaseVersion);
+
+ // username in unicode
+ byte [] ucUN = new byte[lastEditUser.length()*2];
+ StringUtil.putUnicodeLE(lastEditUser,ucUN,0);
+ System.arraycopy(ucUN,0,_contents,28+asciiUN.length+4,ucUN.length);
+
+ // Write out
+ out.write(_contents);
+ }
+
+ /**
+ * Writes ourselves back out to a filesystem
+ */
+ public void writeToFS(POIFSFileSystem fs) throws IOException {
+ // Grab contents
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ writeOut(baos);
+ ByteArrayInputStream bais =
+ new ByteArrayInputStream(baos.toByteArray());
+
+ // Write out
+ fs.createDocument(bais,"Current User");
+ }
+}
--- /dev/null
+
+/* ====================================================================
+ Copyright 2002-2004 Apache Software Foundation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+
+package org.apache.poi.hslf.record;
+
+import org.apache.poi.util.LittleEndian;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.ByteArrayOutputStream;
+
+/**
+ * If we come across a record we know has children of (potential)
+ * interest, but where the record itself is boring, we create one
+ * of these. It allows us to get at the children, but not much else
+ *
+ * @author Nick Burch
+ */
+
+public class DummyRecordWithChildren extends RecordContainer
+{
+ private Record[] _children;
+ private byte[] _header;
+ private long _type;
+
+ /**
+ * Create a new holder for a boring record with children
+ */
+ protected DummyRecordWithChildren(byte[] source, int start, int len) {
+ // Just grab the header, not the whole contents
+ _header = new byte[8];
+ System.arraycopy(source,start,_header,0,8);
+ _type = LittleEndian.getUShort(_header,2);
+
+ // Find our children
+ _children = Record.findChildRecords(source,start+8,len-8);
+ }
+
+ /**
+ * Return the value we were given at creation
+ */
+ public long getRecordType() { return _type; }
+
+ /**
+ * Return any children
+ */
+ public Record[] getChildRecords() { return _children; }
+
+ /**
+ * Write the contents of the record back, so it can be written
+ * to disk
+ */
+ public void writeOut(OutputStream out) throws IOException {
+ writeOut(_header[0],_header[1],_type,_children,out);
+ }
+}
--- /dev/null
+
+/* ====================================================================
+ Copyright 2002-2004 Apache Software Foundation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+
+package org.apache.poi.hslf.record;
+
+import org.apache.poi.ddf.*;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.ByteArrayOutputStream;
+
+/**
+ * A wrapper around a DDF (Escher) EscherTextbox Record. Causes the DDF
+ * Record to be accessible as if it were a HSLF record.
+ * Note: when asked to write out, will simply put any child records correctly
+ * into the Escher layer. A call to the escher layer to write out (by the
+ * parent PPDrawing) will do the actual write out
+ *
+ * @author Nick Burch
+ */
+
+public class EscherTextboxWrapper extends RecordContainer
+{
+ private EscherTextboxRecord _escherRecord;
+ private Record[] _children;
+ private long _type;
+
+ /**
+ * Returns the underlying DDF Escher Record
+ */
+ public EscherTextboxRecord getEscherRecord() { return _escherRecord; }
+
+ /**
+ * Creates the wrapper for the given DDF Escher Record and children
+ */
+ protected EscherTextboxWrapper(EscherTextboxRecord textbox) {
+ _escherRecord = textbox;
+ _type = (long)_escherRecord.getRecordId();
+
+ // Find the child records in the escher data
+ byte[] data = _escherRecord.getData();
+ _children = Record.findChildRecords(data,0,data.length);
+ }
+
+
+ /**
+ * Return the type of the escher record (normally in the 0xFnnn range)
+ */
+ public long getRecordType() { return _type; }
+
+ /**
+ * Return any children
+ */
+ public Record[] getChildRecords() { return _children; }
+
+ /**
+ * Stores the data for the child records back into the Escher layer.
+ * Doesn't actually do the writing out, that's left to the Escher
+ * layer to do. Must be called before writeOut/serialize is called
+ * on the underlying Escher object!
+ */
+ public void writeOut(OutputStream out) throws IOException {
+ // Write out our children, and stuff them into the Escher layer
+
+ // Grab the children's data
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ for(int i=0; i<_children.length; i++) {
+ _children[i].writeOut(baos);
+ }
+ byte[] data = baos.toByteArray();
+
+ // Save in the escher layer
+ _escherRecord.setData(data);
+ }
+}
--- /dev/null
+
+/* ====================================================================
+ Copyright 2002-2004 Apache Software Foundation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+
+package org.apache.poi.hslf.record;
+
+import org.apache.poi.util.LittleEndian;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.ByteArrayOutputStream;
+
+/**
+ * Master container for Notes. There is one of these for every page of
+ * notes, and they have certain specific children
+ *
+ * @author Nick Burch
+ */
+
+public class Notes extends RecordContainer
+{
+ private Record[] _children;
+ private byte[] _header;
+ private static long _type = 1008l;
+
+ // Links to our more interesting children
+ private NotesAtom notesAtom;
+ private PPDrawing ppDrawing;
+
+ /**
+ * Returns the NotesAtom of this Notes
+ */
+ public NotesAtom getNotesAtom() { return notesAtom; }
+ /**
+ * Returns the PPDrawing of this Notes, which has all the
+ * interesting data in it
+ */
+ public PPDrawing getPPDrawing() { return ppDrawing; }
+
+
+ /**
+ * Set things up, and find our more interesting children
+ */
+ protected Notes(byte[] source, int start, int len) {
+ // Grab the header
+ _header = new byte[8];
+ System.arraycopy(source,start,_header,0,8);
+
+ // Find our children
+ _children = Record.findChildRecords(source,start+8,len-8);
+
+ // Find the interesting ones in there
+ for(int i=0; i<_children.length; i++) {
+ if(_children[i] instanceof NotesAtom) {
+ notesAtom = (NotesAtom)_children[i];
+ //System.out.println("Found notes for sheet " + notesAtom.getSlideID());
+ }
+ if(_children[i] instanceof PPDrawing) {
+ ppDrawing = (PPDrawing)_children[i];
+ }
+ }
+ }
+
+
+ /**
+ * We are of type 1008
+ */
+ public long getRecordType() { return _type; }
+
+ /**
+ * Return any children
+ */
+ public Record[] getChildRecords() { return _children; }
+
+ /**
+ * Write the contents of the record back, so it can be written
+ * to disk
+ */
+ public void writeOut(OutputStream out) throws IOException {
+ writeOut(_header[0],_header[1],_type,_children,out);
+ }
+}
--- /dev/null
+
+/* ====================================================================
+ Copyright 2002-2004 Apache Software Foundation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+
+package org.apache.poi.hslf.record;
+
+import org.apache.poi.util.LittleEndian;
+import java.io.IOException;
+import java.io.OutputStream;
+
+/**
+ * A Notes Atom (type 1009). Holds information on the parent Notes, such
+ * as what slide it is tied to
+ *
+ * @author Nick Burch
+ */
+
+public class NotesAtom extends RecordAtom
+{
+ private byte[] _header;
+ private static long _type = 1009l;
+
+ private int slideID;
+ private boolean followMasterObjects;
+ private boolean followMasterScheme;
+ private boolean followMasterBackground;
+ private byte[] reserved;
+
+
+ public int getSlideID() { return slideID; }
+ public void setSlideID(int id) { slideID = id; }
+
+ public boolean getFollowMasterObjects() { return followMasterObjects; }
+ public boolean getFollowMasterScheme() { return followMasterScheme; }
+ public boolean getFollowMasterBackground() { return followMasterBackground; }
+ public void setFollowMasterObjects(boolean flag) { followMasterObjects = flag; }
+ public void setFollowMasterScheme(boolean flag) { followMasterScheme = flag; }
+ public void setFollowMasterBackground(boolean flag) { followMasterBackground = flag; }
+
+
+ /* *************** record code follows ********************** */
+
+ /**
+ * For the Notes Atom
+ */
+ protected NotesAtom(byte[] source, int start, int len) {
+ // Sanity Checking
+ if(len < 8) { len = 8; }
+
+ // Get the header
+ _header = new byte[8];
+ System.arraycopy(source,start,_header,0,8);
+
+ // Get the slide ID
+ slideID = (int)LittleEndian.getInt(source,start+8);
+
+ // Grok the flags, stored as bits
+ int flags = LittleEndian.getUShort(source,start+12);
+ if((flags&4) == 4) {
+ followMasterBackground = true;
+ } else {
+ followMasterBackground = false;
+ }
+ if((flags&2) == 2) {
+ followMasterScheme = true;
+ } else {
+ followMasterScheme = false;
+ }
+ if((flags&1) == 1) {
+ followMasterObjects = true;
+ } else {
+ followMasterObjects = false;
+ }
+
+ // There might be 2 more bytes, which are a reserved field
+ reserved = new byte[len-14];
+ System.arraycopy(source,start+14,reserved,0,reserved.length);
+ }
+
+ /**
+ * We are of type 1009
+ */
+ public long getRecordType() { return _type; }
+
+ /**
+ * Write the contents of the record back, so it can be written
+ * to disk
+ */
+ public void writeOut(OutputStream out) throws IOException {
+ // Header
+ out.write(_header);
+
+ // Slide ID
+ writeLittleEndian(slideID,out);
+
+ // Flags
+ short flags = 0;
+ if(followMasterObjects) { flags += 1; }
+ if(followMasterScheme) { flags += 2; }
+ if(followMasterBackground) { flags += 4; }
+ writeLittleEndian(flags,out);
+
+ // Reserved fields
+ out.write(reserved);
+ }
+}
--- /dev/null
+
+/* ====================================================================
+ Copyright 2002-2004 Apache Software Foundation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+
+package org.apache.poi.hslf.record;
+
+import org.apache.poi.util.LittleEndian;
+
+import org.apache.poi.ddf.*;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.List;
+import java.util.Vector;
+
+/**
+ * These are actually wrappers onto Escher drawings. Make use of
+ * the DDF classes to do useful things with them.
+ * For now, creates a tree of the Escher records, and then creates any
+ * PowerPoint (hslf) records found within the EscherTextboxRecord
+ * (msofbtClientTextbox) records.
+ * Also provides easy access to the EscherTextboxRecords, so that their
+ * text may be extracted and used in Sheets
+ *
+ * @author Nick Burch
+ */
+
+// For now, pretending to be an atom. Might not always be, but that
+// would require a wrapping class
+public class PPDrawing extends RecordAtom
+{
+ private byte[] _header;
+ private long _type;
+
+ private EscherRecord[] childRecords;
+ private EscherTextboxWrapper[] textboxWrappers;
+
+
+ /**
+ * Get access to the underlying Escher Records
+ */
+ public EscherRecord[] getEscherRecords() { return childRecords; }
+
+ /**
+ * Get access to the atoms inside Textboxes
+ */
+ public EscherTextboxWrapper[] getTextboxWrappers() { return textboxWrappers; }
+
+
+ /* ******************** record stuff follows ********************** */
+
+ /**
+ * Sets everything up, groks the escher etc
+ */
+ protected PPDrawing(byte[] source, int start, int len) {
+ // Get the header
+ _header = new byte[8];
+ System.arraycopy(source,start,_header,0,8);
+
+ // Get the type
+ _type = LittleEndian.getUShort(_header,2);
+
+ // Get the contents for now
+ byte[] contents = new byte[len];
+ System.arraycopy(source,start,contents,0,len);
+
+
+ // Build up a tree of Escher records contained within
+ DefaultEscherRecordFactory erf = new DefaultEscherRecordFactory();
+ Vector escherChildren = new Vector();
+ findEscherChildren(erf,contents,8,len-8,escherChildren);
+
+ childRecords = new EscherRecord[escherChildren.size()];
+ for(int i=0; i<childRecords.length; i++) {
+ childRecords[i] = (EscherRecord)escherChildren.get(i);
+ }
+
+ // Find and EscherTextboxRecord's, and wrap them up
+ Vector textboxes = new Vector();
+ findEscherTextboxRecord(childRecords, textboxes);
+ textboxWrappers = new EscherTextboxWrapper[textboxes.size()];
+ for(int i=0; i<textboxWrappers.length; i++) {
+ textboxWrappers[i] = (EscherTextboxWrapper)textboxes.get(i);
+ }
+ }
+
+ /**
+ * Tree walking way of finding Escher Child Records
+ */
+ private void findEscherChildren(DefaultEscherRecordFactory erf, byte[] source, int startPos, int lenToGo, Vector found) {
+ // Find the record
+ EscherRecord r = erf.createRecord(source,startPos);
+ // Fill it in
+ r.fillFields( source, startPos, erf );
+ // Save it
+ found.add(r);
+
+ // Wind on
+ int size = r.getRecordSize();
+ if(size < 8) {
+ System.err.println("Hit short DDF record at " + startPos + " - " + size);
+ }
+ startPos += size;
+ lenToGo -= size;
+ if(lenToGo >= 8) {
+ findEscherChildren(erf, source, startPos, lenToGo, found);
+ }
+ }
+
+ /**
+ * Look for EscherTextboxRecords
+ */
+ private void findEscherTextboxRecord(EscherRecord[] toSearch, Vector found) {
+ for(int i=0; i<toSearch.length; i++) {
+ if(toSearch[i] instanceof EscherTextboxRecord) {
+ EscherTextboxRecord tbr = (EscherTextboxRecord)toSearch[i];
+ EscherTextboxWrapper w = new EscherTextboxWrapper(tbr);
+ found.add(w);
+ } else {
+ // If it has children, walk them
+ if(toSearch[i].isContainerRecord()) {
+ List childrenL = toSearch[i].getChildRecords();
+ EscherRecord[] children = new EscherRecord[childrenL.size()];
+ for(int j=0; j< children.length; j++) {
+ children[j] = (EscherRecord)childrenL.get(j);
+ }
+ findEscherTextboxRecord(children,found);
+ }
+ }
+ }
+ }
+
+ /**
+ * We are type 1036
+ */
+ public long getRecordType() { return _type; }
+
+ /**
+ * We're pretending to be an atom, so return null
+ */
+ public Record[] getChildRecords() { return null; }
+
+ /**
+ * Write the contents of the record back, so it can be written
+ * to disk
+ * Walks the escher layer to get the contents
+ */
+ public void writeOut(OutputStream out) throws IOException {
+ // Ensure the escher layer reflects the text changes
+ for(int i=0; i<textboxWrappers.length; i++) {
+ textboxWrappers[i].writeOut(null);
+ }
+
+ // Find the new size of the escher children;
+ int newSize = 0;
+ for(int i=0; i<childRecords.length; i++) {
+ newSize += childRecords[i].getRecordSize();
+ }
+
+ // Update the size (header bytes 5-8)
+ LittleEndian.putInt(_header,4,newSize);
+
+ // Write out our header
+ out.write(_header);
+
+ // Now grab the children's data
+ byte[] b = new byte[newSize];
+ int done = 0;
+ for(int i=0; i<childRecords.length; i++) {
+ int written = childRecords[i].serialize( done, b );
+ done += written;
+ }
+
+ // Finally, write out the children
+ out.write(b);
+ }
+}
--- /dev/null
+
+/* ====================================================================
+ Copyright 2002-2004 Apache Software Foundation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+
+package org.apache.poi.hslf.record;
+
+import org.apache.poi.util.LittleEndian;
+import java.io.IOException;
+import java.io.OutputStream;
+
+/**
+ * General holder for PersistPtrFullBlock and PersistPtrIncrementalBlock
+ * records. We need to handle them specially, since we have to go around
+ * updating UserEditAtoms if they shuffle about on disk
+ *
+ * @author Nick Burch
+ */
+
+public class PersistPtrHolder extends PositionDependentRecordAtom
+{
+ private byte[] _contents;
+ private long _type;
+
+ /**
+ * Create a new holder for a PersistPtr record
+ */
+ protected PersistPtrHolder(byte[] source, int start, int len) {
+ // Sanity Checking - including whole header, so treat
+ // length as based of 0, not 8 (including header size based)
+ if(len < 4) { len = 4; }
+
+ // Store where we are found on disk
+ myLastOnDiskOffset = start;
+
+ // Treat as an atom, grab and hold everything
+ _contents = new byte[len];
+ System.arraycopy(source,start,_contents,0,len);
+ _type = LittleEndian.getUShort(_contents,2);
+ }
+
+ /**
+ * Return the value we were given at creation
+ */
+ public long getRecordType() { return _type; }
+
+ /**
+ * Write the contents of the record back, so it can be written
+ * to disk
+ */
+ public void writeOut(OutputStream out) throws IOException {
+ out.write(_contents);
+ }
+}
--- /dev/null
+
+/* ====================================================================
+ Copyright 2002-2004 Apache Software Foundation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+
+package org.apache.poi.hslf.record;
+
+/**
+ * A special (and dangerous) kind of Record Atom that cares about where
+ * it lives on the disk, or who has other Atoms that care about where
+ * this is on the disk.
+ *
+ * @author Nick Burch
+ */
+
+public abstract class PositionDependentRecordAtom extends RecordAtom
+{
+ /** Our location on the disk, as of the last write out */
+ protected int myLastOnDiskOffset;
+
+ /** Fetch our location on the disk, as of the last write out */
+ public int getLastOnDiskOffset() { return myLastOnDiskOffset; }
+
+ /**
+ * Update the Record's idea of where on disk it lives, after a write out.
+ * Use with care...
+ */
+ public void setLastOnDiskOffet(int offset) {
+ myLastOnDiskOffset = offset;
+ }
+}
--- /dev/null
+
+/* ====================================================================
+ Copyright 2002-2004 Apache Software Foundation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+
+
+package org.apache.poi.hslf.record;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.ByteArrayOutputStream;
+import java.util.Vector;
+import org.apache.poi.util.LittleEndian;
+
+
+/**
+ * This abstract class represents a record in the PowerPoint document.
+ * Record classes should extend with RecordContainer or RecordAtom, which
+ * extend this in turn.
+ *
+ * @author Nick Burch
+ */
+
+public abstract class Record
+{
+ /**
+ * Is this record type an Atom record (only has data),
+ * or is it a non-Atom record (has other records)?
+ */
+ public abstract boolean isAnAtom();
+
+ /**
+ * Returns the type (held as a little endian in bytes 3 and 4)
+ * that this class handles
+ */
+ public abstract long getRecordType();
+
+ /**
+ * Fetch all the child records of this record
+ * If this record is an atom, will return null
+ * If this record is a non-atom, but has no children, will return
+ * an empty array
+ */
+ public abstract Record[] getChildRecords();
+
+ /**
+ * Have the contents printer out into an OutputStream, used when
+ * writing a file back out to disk
+ * (Normally, atom classes will keep their bytes around, but
+ * non atom classes will just request the bytes from their
+ * children, then chuck on their header and return)
+ */
+ public abstract void writeOut(OutputStream o) throws IOException;
+
+ /**
+ * When writing out, write out a signed int (32bit) in Little Endian format
+ */
+ public static void writeLittleEndian(int i,OutputStream o) throws IOException {
+ byte[] bi = new byte[4];
+ LittleEndian.putInt(bi,i);
+ o.write(bi);
+ }
+ /**
+ * When writing out, write out a signed short (16bit) in Little Endian format
+ */
+ public static void writeLittleEndian(short s,OutputStream o) throws IOException {
+ byte[] bs = new byte[2];
+ LittleEndian.putShort(bs,s);
+ o.write(bs);
+ }
+
+ /**
+ * Default method for finding child records of a given record
+ */
+ public static Record[] findChildRecords(byte[] b, int start, int len) {
+ Vector children = new Vector(5);
+
+ // Jump our little way along, creating records as we go
+ int pos = start;
+ while(pos <= (start+len-8)) {
+ long type = LittleEndian.getUShort(b,pos+2);
+ long rlen = LittleEndian.getUInt(b,pos+4);
+
+ // Sanity check the length
+ int rleni = (int)rlen;
+ if(rleni < 0) { rleni = 0; }
+
+//System.out.println("Found a " + type + " at pos " + pos + " (" + Integer.toHexString(pos) + "), len " + rlen);
+ Record r = createRecordForType(type,b,pos,8+rleni);
+ children.add(r);
+ pos += 8;
+ pos += rlen;
+ }
+
+ // Turn the vector into an array, and return
+ Record[] cRecords = new Record[children.size()];
+ for(int i=0; i < children.size(); i++) {
+ cRecords[i] = (Record)children.get(i);
+ }
+ return cRecords;
+ }
+
+ /**
+ * For a given type (little endian bytes 3 and 4 in record header),
+ * byte array, start position and length:
+ * will return a Record object that will handle that record
+ *
+ * Remember that while PPT stores the record lengths as 8 bytes short
+ * (not including the size of the header), this code assumes you're
+ * passing in corrected lengths
+ */
+ protected static Record createRecordForType(long type, byte[] b, int start, int len) {
+ // Default is to use UnknownRecordPlaceholder
+ // When you create classes for new Records, add them here
+ switch((int)type) {
+ // Document
+ case 1000:
+ return new DummyRecordWithChildren(b,start,len);
+
+ // "Slide"
+ case 1006:
+ return new Slide(b,start,len);
+
+ // "SlideAtom"
+ case 1007:
+ return new SlideAtom(b,start,len);
+
+ // "Notes"
+ case 1008:
+ return new Notes(b,start,len);
+
+ // "NotesAtom" (Details on Notes sheets)
+ case 1009:
+ return new NotesAtom(b,start,len);
+
+ // "SlidePersistAtom" (Details on text for a sheet)
+ case 1011:
+ return new SlidePersistAtom(b,start,len);
+
+ // MainMaster (MetaSheet lives inside the PPDrawing inside this)
+ case 1016:
+ return new DummyRecordWithChildren(b,start,len);
+
+ // PPDrawing (MetaSheet lives inside this)
+ case 1036:
+ return new PPDrawing(b,start,len);
+
+ // TextHeaderAtom (Holds details on following text)
+ case 3999:
+ return new TextHeaderAtom(b,start,len);
+
+ // TextCharsAtom (Text in Unicode format)
+ case 4000:
+ return new TextCharsAtom(b,start,len);
+
+ // TextByteAtom (Text in ascii format)
+ case 4008:
+ return new TextBytesAtom(b,start,len);
+
+ // SlideListWithText (Many Sheets live inside here)
+ case 4080:
+ return new SlideListWithText(b,start,len);
+
+ // UserEditAtom (Holds pointers, last viewed etc)
+ case 4085:
+ return new UserEditAtom(b,start,len);
+
+ // PersistPtrFullBlock (Don't know what it holds, but do care about where it lives)
+ case 6001:
+ return new PersistPtrHolder(b,start,len);
+ // PersistPtrIncrementalBlock (Don't know what it holds, but do care about where it lives)
+ case 6002:
+ return new PersistPtrHolder(b,start,len);
+
+ default:
+ return new UnknownRecordPlaceholder(b,start,len);
+ }
+ }
+}
--- /dev/null
+
+/* ====================================================================
+ Copyright 2002-2004 Apache Software Foundation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+
+package org.apache.poi.hslf.record;
+
+/**
+ * Abstract class which all atom records will extend.
+ *
+ * @author Nick Burch
+ */
+
+public abstract class RecordAtom extends Record
+{
+ /**
+ * We are an atom
+ */
+ public boolean isAnAtom() { return true; }
+
+ /**
+ * We're an atom, returns null
+ */
+ public Record[] getChildRecords() { return null; }
+}
--- /dev/null
+
+/* ====================================================================
+ Copyright 2002-2004 Apache Software Foundation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+
+package org.apache.poi.hslf.record;
+
+import org.apache.poi.util.LittleEndian;
+import org.apache.poi.hslf.util.MutableByteArrayOutputStream;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.ByteArrayOutputStream;
+
+/**
+ * Abstract class which all container records will extend. Providers
+ * helpful methods for writing child records out to disk
+ *
+ * @author Nick Burch
+ */
+
+public abstract class RecordContainer extends Record
+{
+ /**
+ * We're not an atom
+ */
+ public boolean isAnAtom() { return false; }
+
+ /**
+ * Write out our header, and our children.
+ * @param headerA the first byte of the header
+ * @param headerB the second byte of the header
+ * @param type the record type
+ * @param children our child records
+ * @param out the stream to write to
+ */
+ public void writeOut(byte headerA, byte headerB, long type, Record[] children, OutputStream out) throws IOException {
+ // If we have a mutable output stream, take advantage of that
+ if(out instanceof MutableByteArrayOutputStream) {
+ MutableByteArrayOutputStream mout =
+ (MutableByteArrayOutputStream)out;
+
+ // Grab current size
+ int oldSize = mout.getBytesWritten();
+
+ // Write out our header, less the size
+ mout.write(new byte[] {headerA,headerB});
+ byte[] typeB = new byte[2];
+ LittleEndian.putShort(typeB,(short)type);
+ mout.write(typeB);
+ mout.write(new byte[4]);
+
+ // Write out the children
+ for(int i=0; i<children.length; i++) {
+ children[i].writeOut(mout);
+ }
+
+ // Update our header with the size
+ // Don't forget to knock 8 more off, since we don't include the
+ // header in the size
+ int length = mout.getBytesWritten() - oldSize - 8;
+ byte[] size = new byte[4];
+ LittleEndian.putInt(size,0,length);
+ mout.overwrite(size, oldSize+4);
+ } else {
+ // Going to have to do it a slower way, because we have
+ // to update the length come the end
+
+ // Create a ByteArrayOutputStream to hold everything in
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+
+ // Write out our header, less the size
+ baos.write(new byte[] {headerA,headerB});
+ byte[] typeB = new byte[2];
+ LittleEndian.putShort(typeB,(short)type);
+ baos.write(typeB);
+ baos.write(new byte[] {0,0,0,0});
+
+ // Write out our children
+ for(int i=0; i<children.length; i++) {
+ children[i].writeOut(baos);
+ }
+
+ // Grab the bytes back
+ byte[] toWrite = baos.toByteArray();
+
+ // Update our header with the size
+ // Don't forget to knock 8 more off, since we don't include the
+ // header in the size
+ LittleEndian.putInt(toWrite,4,(toWrite.length-8));
+
+ // Write out the bytes
+ out.write(toWrite);
+ }
+ }
+}
--- /dev/null
+
+/* ====================================================================
+ Copyright 2002-2004 Apache Software Foundation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+
+package org.apache.poi.hslf.record;
+
+import org.apache.poi.util.LittleEndian;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.ByteArrayOutputStream;
+
+/**
+ * Master container for Slides. There is one of these for every slide,
+ * and they have certain specific children
+ *
+ * @author Nick Burch
+ */
+
+public class Slide extends RecordContainer
+{
+ private Record[] _children;
+ private byte[] _header;
+ private static long _type = 1006l;
+
+ // Links to our more interesting children
+ private SlideAtom slideAtom;
+ private PPDrawing ppDrawing;
+
+ /**
+ * Returns the SlideAtom of this Slide
+ */
+ public SlideAtom getSlideAtom() { return slideAtom; }
+
+ /**
+ * Returns the PPDrawing of this Slide, which has all the
+ * interesting data in it
+ */
+ public PPDrawing getPPDrawing() { return ppDrawing; }
+
+
+ /**
+ * Set things up, and find our more interesting children
+ */
+ protected Slide(byte[] source, int start, int len) {
+ // Grab the header
+ _header = new byte[8];
+ System.arraycopy(source,start,_header,0,8);
+
+ // Find our children
+ _children = Record.findChildRecords(source,start+8,len-8);
+
+ // Find the interesting ones in there
+ for(int i=0; i<_children.length; i++) {
+ if(_children[i] instanceof SlideAtom) {
+ slideAtom = (SlideAtom)_children[i];
+ }
+ if(_children[i] instanceof PPDrawing) {
+ ppDrawing = (PPDrawing)_children[i];
+ }
+ }
+ }
+
+
+ /**
+ * We are of type 1006
+ */
+ public long getRecordType() { return _type; }
+
+ /**
+ * Return any children
+ */
+ public Record[] getChildRecords() { return _children; }
+
+ /**
+ * Write the contents of the record back, so it can be written
+ * to disk
+ */
+ public void writeOut(OutputStream out) throws IOException {
+ writeOut(_header[0],_header[1],_type,_children,out);
+ }
+}
--- /dev/null
+
+/* ====================================================================
+ Copyright 2002-2004 Apache Software Foundation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+
+package org.apache.poi.hslf.record;
+
+import org.apache.poi.util.LittleEndian;
+import java.io.IOException;
+import java.io.OutputStream;
+
+/**
+ * A Slide Atom (type 1007). Holds information on the parent Slide, what
+ * Master Slide it uses, what Notes is attached to it, that sort of thing.
+ * It also has a SSlideLayoutAtom embeded in it, but without the Atom header
+ *
+ * @author Nick Burch
+ */
+
+public class SlideAtom extends RecordAtom
+{
+ private byte[] _header;
+ private static long _type = 1007l;
+ public static final int MASTER_SLIDE_ID = 0;
+ public static final int USES_MASTER_SLIDE_ID = -2147483648;
+
+ private int masterID;
+ private int notesID;
+
+ private boolean followMasterObjects;
+ private boolean followMasterScheme;
+ private boolean followMasterBackground;
+ private SSlideLayoutAtom layoutAtom;
+ private byte[] reserved;
+
+
+ /** Get the ID of the master slide used. 0 if this is a master slide, otherwise -2147483648 */
+ public int getMasterID() { return masterID; }
+ /** Get the ID of the notes for this slide. 0 if doesn't have one */
+ public int getNotesID() { return notesID; }
+ /** Get the embeded SSlideLayoutAtom */
+ public SSlideLayoutAtom getSSlideLayoutAtom() { return layoutAtom; }
+
+ public boolean getFollowMasterObjects() { return followMasterObjects; }
+ public boolean getFollowMasterScheme() { return followMasterScheme; }
+ public boolean getFollowMasterBackground() { return followMasterBackground; }
+ public void setFollowMasterObjects(boolean flag) { followMasterObjects = flag; }
+ public void setFollowMasterScheme(boolean flag) { followMasterScheme = flag; }
+ public void setFollowMasterBackground(boolean flag) { followMasterBackground = flag; }
+
+
+ /* *************** record code follows ********************** */
+
+ /**
+ * For the Slide Atom
+ */
+ protected SlideAtom(byte[] source, int start, int len) {
+ // Sanity Checking
+ if(len < 30) { len = 30; }
+
+ // Get the header
+ _header = new byte[8];
+ System.arraycopy(source,start,_header,0,8);
+
+ // Grab the 12 bytes that is "SSlideLayoutAtom"
+ byte[] SSlideLayoutAtomData = new byte[12];
+ System.arraycopy(source,start+8,SSlideLayoutAtomData,0,12);
+ // Use them to build up the SSlideLayoutAtom
+ layoutAtom = new SSlideLayoutAtom(SSlideLayoutAtomData);
+
+ // Get the IDs of the master and notes
+ masterID = (int)LittleEndian.getInt(source,start+12+8);
+ notesID = (int)LittleEndian.getInt(source,start+16+8);
+
+ // Grok the flags, stored as bits
+ int flags = LittleEndian.getUShort(source,start+20+8);
+ if((flags&4) == 4) {
+ followMasterBackground = true;
+ } else {
+ followMasterBackground = false;
+ }
+ if((flags&2) == 2) {
+ followMasterScheme = true;
+ } else {
+ followMasterScheme = false;
+ }
+ if((flags&1) == 1) {
+ followMasterObjects = true;
+ } else {
+ followMasterObjects = false;
+ }
+
+ // If there's any other bits of data, keep them about
+ // 8 bytes header + 20 bytes to flags + 2 bytes flags = 30 bytes
+ reserved = new byte[len-30];
+ System.arraycopy(source,start+30,reserved,0,reserved.length);
+ }
+
+ /**
+ * We are of type 1007
+ */
+ public long getRecordType() { return _type; }
+
+ /**
+ * Write the contents of the record back, so it can be written
+ * to disk
+ */
+ public void writeOut(OutputStream out) throws IOException {
+ // Header
+ out.write(_header);
+
+ // SSSlideLayoutAtom stuff
+ layoutAtom.writeOut(out);
+
+ // IDs
+ writeLittleEndian(masterID,out);
+ writeLittleEndian(notesID,out);
+
+ // Flags
+ short flags = 0;
+ if(followMasterObjects) { flags += 1; }
+ if(followMasterScheme) { flags += 2; }
+ if(followMasterBackground) { flags += 4; }
+ writeLittleEndian(flags,out);
+
+ // Reserved data
+ out.write(reserved);
+ }
+
+
+ /**
+ * Holds the geometry of the Slide, and the ID of the placeholders
+ * on the slide.
+ * (Embeded inside SlideAtom is a SSlideLayoutAtom, without the
+ * usual record header. Since it's a fixed size and tied to
+ * the SlideAtom, we'll hold it here.)
+ */
+ public class SSlideLayoutAtom {
+ // The different kinds of geometry
+ public static final int TITLE_SLIDE = 0;
+ public static final int TITLE_BODY_SLIDE = 1;
+ public static final int TITLE_MASTER_SLIDE = 2;
+ public static final int MASTER_SLIDE = 3;
+ public static final int MASTER_NOTES = 4;
+ public static final int NOTES_TITLE_BODY = 5;
+ public static final int HANDOUT = 6; // Only header, footer and date placeholders
+ public static final int TITLE_ONLY = 7;
+ public static final int TITLE_2_COLUMN_BODY = 8;
+ public static final int TITLE_2_ROW_BODY = 9;
+ public static final int TITLE_2_COLUNM_RIGHT_2_ROW_BODY = 10;
+ public static final int TITLE_2_COLUNM_LEFT_2_ROW_BODY = 11;
+ public static final int TITLE_2_ROW_BOTTOM_2_COLUMN_BODY = 12;
+ public static final int TITLE_2_ROW_TOP_2_COLUMN_BODY = 13;
+ public static final int FOUR_OBJECTS = 14;
+ public static final int BIG_OBJECT = 15;
+ public static final int BLANK_SLIDE = 16;
+ public static final int VERTICAL_TITLE_BODY_LEFT = 17;
+ public static final int VERTICAL_TITLE_2_ROW_BODY_LEFT = 17;
+
+ /** What geometry type we are */
+ private int geometry;
+ /** What placeholder IDs we have */
+ private byte[] placeholderIDs;
+
+ /** Retrieve the geometry type */
+ public int getGeometryType() { return geometry; }
+
+ /**
+ * Create a new Embeded SSlideLayoutAtom, from 12 bytes of data
+ */
+ public SSlideLayoutAtom(byte[] data) {
+ if(data.length != 12) {
+ throw new RuntimeException("SSlideLayoutAtom created with byte array not 12 bytes long - was " + data.length + " bytes in size");
+ }
+
+ // Grab out our data
+ geometry = (int)LittleEndian.getInt(data,0);
+ placeholderIDs = new byte[8];
+ System.arraycopy(data,4,placeholderIDs,0,8);
+ }
+
+ /**
+ * Write the contents of the record back, so it can be written
+ * to disk. Skips the record header
+ */
+ public void writeOut(OutputStream out) throws IOException {
+ // Write the geometry
+ writeLittleEndian(geometry,out);
+ // Write the placeholder IDs
+ out.write(placeholderIDs);
+ }
+ }
+}
--- /dev/null
+
+/* ====================================================================
+ Copyright 2002-2004 Apache Software Foundation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+
+package org.apache.poi.hslf.record;
+
+import org.apache.poi.util.LittleEndian;
+import org.apache.poi.hslf.model.Sheet;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.Vector;
+
+/**
+ * These are tricky beasts. They contain the text of potentially
+ * many (normal) slides. They are made up of several sets of
+ * - SlidePersistAtom
+ * - TextHeaderAtom
+ * - TextBytesAtom / TextCharsAtom
+ * - StyleTextPropAtom (optional)
+ * - TextSpecInfoAtom (optional)
+ * - InteractiveInfo (optional)
+ * - TxInteractiveInfoAtom (optional)
+ * and then the next SlidePersistAtom.
+ *
+ * Eventually, Slides will find the blocks that interest them from all
+ * the SlideListWithText entries, and refere to them
+ *
+ * For now, we scan through looking for interesting bits, then creating
+ * the helpful Sheet from model for them
+ *
+ * @author Nick Burch
+ */
+
+// For now, pretend to be an atom
+public class SlideListWithText extends RecordContainer
+{
+ private Record[] _children;
+ private byte[] _header;
+ private static long _type = 4080;
+
+ private SlideAtomsSet[] slideAtomsSets;
+
+ /**
+ * Create a new holder for slide records
+ */
+ protected SlideListWithText(byte[] source, int start, int len) {
+ // Grab the header
+ _header = new byte[8];
+ System.arraycopy(source,start,_header,0,8);
+
+ // Find our children
+ _children = Record.findChildRecords(source,start+8,len-8);
+
+ // Group our children together into SlideAtomsSets
+ // That way, model layer code can just grab the sets to use,
+ // without having to try to match the children together
+ Vector sets = new Vector();
+ for(int i=0; i<_children.length; i++) {
+ if(_children[i] instanceof SlidePersistAtom) {
+ // Find where the next SlidePersistAtom is
+ int endPos = i+1;
+ while(endPos < _children.length && !(_children[endPos] instanceof SlidePersistAtom)) {
+ endPos += 1;
+ }
+
+ // Now, if not empty, create a SlideAtomsSets
+ int clen = endPos - i - 1;
+ if(clen == 0) { continue; }
+ Record[] spaChildren = new Record[clen];
+ System.arraycopy(_children,i+1,spaChildren,0,clen);
+ SlideAtomsSet set = new SlideAtomsSet((SlidePersistAtom)_children[i],spaChildren);
+ sets.add(set);
+
+ // Wind on
+ i += clen;
+ }
+ }
+
+ // Turn the vector into an array
+ slideAtomsSets = new SlideAtomsSet[sets.size()];
+ for(int i=0; i<slideAtomsSets.length; i++) {
+ slideAtomsSets[i] = (SlideAtomsSet)sets.get(i);
+ }
+ }
+
+
+ /**
+ * Get access to the SlideAtomsSets of the children of this record
+ */
+ public SlideAtomsSet[] getSlideAtomsSets() { return slideAtomsSets; }
+
+ /**
+ * Return the value we were given at creation
+ */
+ public long getRecordType() { return _type; }
+
+ /**
+ * We're pretending to be an atom, so return null
+ */
+ public Record[] getChildRecords() { return _children; }
+
+ /**
+ * Write the contents of the record back, so it can be written
+ * to disk
+ */
+ public void writeOut(OutputStream out) throws IOException {
+ writeOut(_header[0],_header[1],_type,_children,out);
+ }
+
+
+ /**
+ * Inner class to wrap up a matching set of records that hold the
+ * text for a given sheet. Contains the leading SlidePersistAtom,
+ * and all of the records until the next SlidePersistAtom. This
+ * includes sets of TextHeaderAtom and TextBytesAtom/TextCharsAtom,
+ * along with some others.
+ */
+ public class SlideAtomsSet {
+ private SlidePersistAtom slidePersistAtom;
+ private Record[] slideRecords;
+
+ /** Get the SlidePersistAtom, which gives details on the Slide this text is associated with */
+ public SlidePersistAtom getSlidePersistAtom() { return slidePersistAtom; }
+ /** Get the Text related records for this slide */
+ public Record[] getSlideRecords() { return slideRecords; }
+
+ /** Create one to hold the Records for one Slide's text */
+ public SlideAtomsSet(SlidePersistAtom s, Record[] r) {
+ slidePersistAtom = s;
+ slideRecords = r;
+ }
+ }
+}
--- /dev/null
+
+/* ====================================================================
+ Copyright 2002-2004 Apache Software Foundation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+
+package org.apache.poi.hslf.record;
+
+import org.apache.poi.util.LittleEndian;
+import java.io.IOException;
+import java.io.OutputStream;
+
+/**
+ * A SlidePersist Atom (type 1011). Holds information on the text of a
+ * given slide, which are stored in the same SlideListWithText
+ *
+ * @author Nick Burch
+ */
+
+public class SlidePersistAtom extends RecordAtom
+{
+ private byte[] _header;
+ private static long _type = 1011l;
+
+ /** Slide reference ID. A machine readable "page id" */
+ private int refID;
+ private boolean hasShapesOtherThanPlaceholders;
+ /** Number of placeholder texts that will follow in the SlideListWithText */
+ private int numPlaceholderTexts;
+ /** Less useful identifier */
+ private int slideIdentifier;
+ /** Reserved fields. Who knows what they do */
+ private byte[] reservedFields;
+
+ public int getRefID() { return refID; }
+ public int getSlideIdentifier() { return slideIdentifier; }
+ public int getNumPlaceholderTexts() { return numPlaceholderTexts; }
+ public boolean getHasShapesOtherThanPlaceholders() { return hasShapesOtherThanPlaceholders; }
+
+ /* *************** record code follows ********************** */
+
+ /**
+ * For the SlidePersist Atom
+ */
+ protected SlidePersistAtom(byte[] source, int start, int len) {
+ // Sanity Checking
+ if(len < 8) { len = 8; }
+
+ // Get the header
+ _header = new byte[8];
+ System.arraycopy(source,start,_header,0,8);
+
+ // Grab the reference ID
+ refID = (int)LittleEndian.getInt(source,start+8);
+
+ // Next up is a set of flags, but only bit 3 is used!
+ int flags = (int)LittleEndian.getInt(source,start+12);
+ if(flags == 4) {
+ hasShapesOtherThanPlaceholders = true;
+ } else {
+ hasShapesOtherThanPlaceholders = false;
+ }
+
+ // Now the number of Placeholder Texts
+ numPlaceholderTexts = (int)LittleEndian.getInt(source,start+16);
+
+ // Last useful one is the unique slide identifier
+ slideIdentifier = (int)LittleEndian.getInt(source,start+20);
+
+ // Finally you have typically 4 or 8 bytes of reserved fields,
+ // all zero running from 24 bytes in to the end
+ reservedFields = new byte[len-24];
+ System.arraycopy(source,start+24,reservedFields,0,reservedFields.length);
+ }
+
+ /**
+ * We are of type 1011
+ */
+ public long getRecordType() { return _type; }
+
+ /**
+ * Write the contents of the record back, so it can be written
+ * to disk
+ */
+ public void writeOut(OutputStream out) throws IOException {
+ // Header - size or type unchanged
+ out.write(_header);
+
+ // Compute the flags part - only bit 3 is used
+ int flags = 0;
+ if(hasShapesOtherThanPlaceholders) {
+ flags = 4;
+ }
+
+ // Write out our fields
+ writeLittleEndian(refID,out);
+ writeLittleEndian(flags,out);
+ writeLittleEndian(numPlaceholderTexts,out);
+ writeLittleEndian(slideIdentifier,out);
+ out.write(reservedFields);
+ }
+}
--- /dev/null
+
+/* ====================================================================
+ Copyright 2002-2004 Apache Software Foundation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+
+package org.apache.poi.hslf.record;
+
+import org.apache.poi.util.LittleEndian;
+import org.apache.poi.util.StringUtil;
+import java.io.IOException;
+import java.io.OutputStream;
+
+/**
+ * A TextBytesAtom (type 4008). Holds text in ascii form (unknown
+ * code page, for now assumed to be the default of
+ * org.apache.poi.util.StringUtil, which is the Excel default).
+ * The trailing return character is always stripped from this
+ *
+ * @author Nick Burch
+ */
+
+public class TextBytesAtom extends RecordAtom
+{
+ private byte[] _header;
+ private static long _type = 4008l;
+
+ /** The bytes that make up the text */
+ private byte[] _text;
+
+ /** Grabs the text. Uses the default codepage */
+ public String getText() {
+ return StringUtil.getFromCompressedUnicode(_text,0,_text.length);
+ }
+
+ /** Updates the text in the Atom. Must be 8 bit ascii */
+ public void setText(byte[] b) {
+ // Set the text
+ _text = b;
+
+ // Update the size (header bytes 5-8)
+ LittleEndian.putInt(_header,4,_text.length);
+ }
+
+ /* *************** record code follows ********************** */
+
+ /**
+ * For the TextBytes Atom
+ */
+ protected TextBytesAtom(byte[] source, int start, int len) {
+ // Sanity Checking
+ if(len < 8) { len = 8; }
+
+ // Get the header
+ _header = new byte[8];
+ System.arraycopy(source,start,_header,0,8);
+
+ // Grab the text
+ _text = new byte[len-8];
+ System.arraycopy(source,start+8,_text,0,len-8);
+ }
+
+ /**
+ * We are of type 4008
+ */
+ public long getRecordType() { return _type; }
+
+ /**
+ * Write the contents of the record back, so it can be written
+ * to disk
+ */
+ public void writeOut(OutputStream out) throws IOException {
+ // Header - size or type unchanged
+ out.write(_header);
+
+ // Write out our text
+ out.write(_text);
+ }
+}
--- /dev/null
+
+/* ====================================================================
+ Copyright 2002-2004 Apache Software Foundation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+
+package org.apache.poi.hslf.record;
+
+import org.apache.poi.util.LittleEndian;
+import org.apache.poi.util.StringUtil;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.UnsupportedEncodingException;
+
+/**
+ * A TextCharsAtom (type 4000). Holds text in byte swapped unicode form.
+ * The trailing return character is always stripped from this
+ *
+ * @author Nick Burch
+ */
+
+public class TextCharsAtom extends RecordAtom
+{
+ private byte[] _header;
+ private static long _type = 4000l;
+
+ /** The bytes that make up the text */
+ private byte[] _text;
+
+ /** Grabs the text. */
+ public String getText() {
+ return StringUtil.getFromUnicodeLE(_text);
+ }
+
+ /** Updates the text in the Atom. */
+ public void setText(String text) {
+ // Convert to little endian unicode
+ _text = new byte[text.length()*2];
+ StringUtil.putUnicodeLE(text,_text,0);
+
+ // Update the size (header bytes 5-8)
+ LittleEndian.putInt(_header,4,_text.length);
+ }
+
+ /* *************** record code follows ********************** */
+
+ /**
+ * For the TextChars Atom
+ */
+ protected TextCharsAtom(byte[] source, int start, int len) {
+ // Sanity Checking
+ if(len < 8) { len = 8; }
+
+ // Get the header
+ _header = new byte[8];
+ System.arraycopy(source,start,_header,0,8);
+
+ // Grab the text
+ _text = new byte[len-8];
+ System.arraycopy(source,start+8,_text,0,len-8);
+ }
+
+ /**
+ * We are of type 4000
+ */
+ public long getRecordType() { return _type; }
+
+ /**
+ * Write the contents of the record back, so it can be written
+ * to disk
+ */
+ public void writeOut(OutputStream out) throws IOException {
+ // Header - size or type unchanged
+ out.write(_header);
+
+ // Write out our text
+ out.write(_text);
+ }
+}
--- /dev/null
+
+/* ====================================================================
+ Copyright 2002-2004 Apache Software Foundation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+
+package org.apache.poi.hslf.record;
+
+import org.apache.poi.util.LittleEndian;
+import java.io.IOException;
+import java.io.OutputStream;
+
+/**
+ * A TextHeaderAtom (type 3999). Holds information on what kind of
+ * text is contained in the TextBytesAtom / TextCharsAtom that follows
+ * straight after
+ *
+ * @author Nick Burch
+ */
+
+public class TextHeaderAtom extends RecordAtom
+{
+ private byte[] _header;
+ private static long _type = 3999l;
+
+ public static final int TITLE_TYPE = 0;
+ public static final int BODY_TYPE = 1;
+ public static final int NOTES_TYPE = 2;
+ public static final int OTHER_TYPE = 4;
+ public static final int CENTRE_BODY_TYPE = 5;
+ public static final int CENTER_TITLE_TYPE = 6;
+ public static final int HALF_BODY_TYPE = 7;
+ public static final int QUARTER_BODY_TYPE = 8;
+
+ /** The kind of text it is */
+ private int textType;
+
+ public int getTextType() { return textType; }
+ public void setTextType(int type) { textType = type; }
+
+ /* *************** record code follows ********************** */
+
+ /**
+ * For the TextHeader Atom
+ */
+ protected TextHeaderAtom(byte[] source, int start, int len) {
+ // Sanity Checking - we're always 12 bytes long
+ if(len < 12) {
+ len = 12;
+ if(source.length - start < 12) {
+ throw new RuntimeException("Not enough data to form a TextHeaderAtom (always 12 bytes long) - found " + (source.length - start));
+ }
+ }
+
+ // Get the header
+ _header = new byte[8];
+ System.arraycopy(source,start,_header,0,8);
+
+ // Grab the type
+ textType = (int)LittleEndian.getInt(source,start+8);
+ }
+
+ /**
+ * We are of type 3999
+ */
+ public long getRecordType() { return _type; }
+
+ /**
+ * Write the contents of the record back, so it can be written
+ * to disk
+ */
+ public void writeOut(OutputStream out) throws IOException {
+ // Header - size or type unchanged
+ out.write(_header);
+
+ // Write out our type
+ writeLittleEndian(textType,out);
+ }
+}
--- /dev/null
+
+/* ====================================================================
+ Copyright 2002-2004 Apache Software Foundation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+
+package org.apache.poi.hslf.record;
+
+import org.apache.poi.util.LittleEndian;
+import java.io.IOException;
+import java.io.OutputStream;
+
+/**
+ * If we come across a record we don't know about, we create one of
+ * these. It allows us to keep track of what it contains, so we can
+ * write it back out to disk unchanged
+ *
+ * @author Nick Burch
+ */
+
+public class UnknownRecordPlaceholder extends RecordAtom
+{
+ private byte[] _contents;
+ private long _type;
+
+ /**
+ * Create a new holder for a record we don't grok
+ */
+ protected UnknownRecordPlaceholder(byte[] source, int start, int len) {
+ // Sanity Checking - including whole header, so treat
+ // length as based of 0, not 8 (including header size based)
+ if(len < 0) { len = 0; }
+
+ // Treat as an atom, grab and hold everything
+ _contents = new byte[len];
+ System.arraycopy(source,start,_contents,0,len);
+ _type = LittleEndian.getUShort(_contents,2);
+ }
+
+ /**
+ * Return the value we were given at creation
+ */
+ public long getRecordType() { return _type; }
+
+ /**
+ * Write the contents of the record back, so it can be written
+ * to disk
+ */
+ public void writeOut(OutputStream out) throws IOException {
+ out.write(_contents);
+ }
+}
--- /dev/null
+
+/* ====================================================================
+ Copyright 2002-2004 Apache Software Foundation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+
+package org.apache.poi.hslf.record;
+
+import org.apache.poi.util.LittleEndian;
+import java.io.IOException;
+import java.io.OutputStream;
+
+/**
+ * A UserEdit Atom (type 4085). Holds information which bits of the file
+ * were last used by powerpoint, the version of powerpoint last used etc.
+ *
+ * ** WARNING ** stores byte offsets from the start of the PPT stream to
+ * other records! If you change the size of any elements before one of
+ * these, you'll need to update the offsets!
+ *
+ * @author Nick Burch
+ */
+
+public class UserEditAtom extends PositionDependentRecordAtom
+{
+ public static final int LAST_VIEW_NONE = 0;
+ public static final int LAST_VIEW_SLIDE_VIEW = 1;
+ public static final int LAST_VIEW_OUTLINE_VIEW = 2;
+ public static final int LAST_VIEW_NOTES = 3;
+
+ private byte[] _header;
+ private static long _type = 4085l;
+ private byte[] reserved;
+
+ private int lastViewedSlideID;
+ private int pptVersion;
+ private int lastUserEditAtomOffset;
+ private int persistPointersOffset;
+ private int docPersistRef;
+ private int maxPersistWritten;
+ private short lastViewType;
+
+ // Somewhat user facing getters
+ public int getLastViewedSlideID() { return lastViewedSlideID; }
+ public short getLastViewType() { return lastViewType; }
+
+ // Scary internal getters
+ public int getLastUserEditAtomOffset() { return lastUserEditAtomOffset; }
+ public int getPersistPointersOffset() { return persistPointersOffset; }
+ public int getDocPersistRef() { return docPersistRef; }
+ public int getMaxPersistWritten() { return maxPersistWritten; }
+
+ // More scary internal setters
+ public void setLastUserEditAtomOffset(int offset) { lastUserEditAtomOffset = offset; }
+ public void setPersistPointersOffset(int offset) { persistPointersOffset = offset; }
+
+ /* *************** record code follows ********************** */
+
+ /**
+ * For the UserEdit Atom
+ */
+ protected UserEditAtom(byte[] source, int start, int len) {
+ // Sanity Checking
+ if(len < 34) { len = 34; }
+
+ // Store where we currently live on disk
+ myLastOnDiskOffset = start;
+
+ // Get the header
+ _header = new byte[8];
+ System.arraycopy(source,start,_header,0,8);
+
+ // Get the last viewed slide ID
+ lastViewedSlideID = (int)LittleEndian.getInt(source,start+0+8);
+
+ // Get the PPT version
+ pptVersion = (int)LittleEndian.getInt(source,start+4+8);
+
+ // Get the offset to the previous incremental save's UserEditAtom
+ // This will be the byte offset on disk where the previous one
+ // starts, or 0 if this is the first one
+ lastUserEditAtomOffset = (int)LittleEndian.getInt(source,start+8+8);
+
+ // Get the offset to the persist pointers
+ // This will be the byte offset on disk where the preceding
+ // PersistPtrFullBlock or PersistPtrIncrementalBlock starts
+ persistPointersOffset = (int)LittleEndian.getInt(source,start+12+8);
+
+ // Get the persist reference for the document persist object
+ // Normally seems to be 1
+ docPersistRef = (int)LittleEndian.getInt(source,start+16+8);
+
+ // Maximum number of persist objects written
+ maxPersistWritten = (int)LittleEndian.getInt(source,start+20+8);
+
+ // Last view type
+ lastViewType = (short)LittleEndian.getShort(source,start+24+8);
+
+ // There might be a few more bytes, which are a reserved field
+ reserved = new byte[len-26-8];
+ System.arraycopy(source,start+26+8,reserved,0,reserved.length);
+ }
+
+ /**
+ * We are of type 4085
+ */
+ public long getRecordType() { return _type; }
+
+ /**
+ * Write the contents of the record back, so it can be written
+ * to disk
+ */
+ public void writeOut(OutputStream out) throws IOException {
+ // Header
+ out.write(_header);
+
+ // Write out the values
+ writeLittleEndian(lastViewedSlideID,out);
+ writeLittleEndian(pptVersion,out);
+ writeLittleEndian(lastUserEditAtomOffset,out);
+ writeLittleEndian(persistPointersOffset,out);
+ writeLittleEndian(docPersistRef,out);
+ writeLittleEndian(maxPersistWritten,out);
+ writeLittleEndian(lastViewType,out);
+
+ // Reserved fields
+ out.write(reserved);
+ }
+}
--- /dev/null
+
+/* ====================================================================
+ Copyright 2002-2004 Apache Software Foundation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+
+
+package org.apache.poi.hslf.usermodel;
+
+import java.util.*;
+import java.io.*;
+
+import org.apache.poi.util.LittleEndian;
+
+import org.apache.poi.hslf.*;
+import org.apache.poi.hslf.model.*;
+import org.apache.poi.hslf.record.Record;
+import org.apache.poi.hslf.record.SlideAtom;
+import org.apache.poi.hslf.record.SlideListWithText;
+import org.apache.poi.hslf.record.SlideListWithText.*;
+
+/**
+ * This class is a friendly wrapper on top of the more scary HSLFSlideShow.
+ *
+ * TODO:
+ * - figure out how to match notes to their correct sheet
+ * (will involve understanding DocSlideList and DocNotesList)
+ * - handle Slide creation cleaner
+ *
+ * @author Nick Burch
+ */
+
+public class SlideShow
+{
+ // What we're based on
+ private HSLFSlideShow _hslfSlideShow;
+
+ // Low level contents, as taken from HSLFSlideShow
+ private Record[] _records;
+
+ // Friendly objects for people to deal with
+ private Slide[] _slides;
+ private Notes[] _notes;
+ // private MetaSheets[] _msheets;
+
+ /**
+ * right now this function takes one parameter: a ppt file, and outputs
+ * the text it can find for it
+ */
+ public static void main(String args[]) throws IOException
+ {
+ HSLFSlideShow basefoo = new HSLFSlideShow(args[0]);
+ SlideShow foo = new SlideShow(basefoo);
+
+ Slide[] slides = foo.getSlides();
+ for(int i=0; i<slides.length; i++) {
+ Slide slide = slides[i];
+ System.out.println("*Slide " + slide.getSheetNumber() + ":");
+ TextRun[] runs = slide.getTextRuns();
+ for(int j=0; j<runs.length; j++) {
+ TextRun run = runs[j];
+ System.out.println(" * Text run " + run.getRunType());
+ System.out.println("\n" + run.getText() + "\n");
+ }
+ }
+ }
+
+ /**
+ * Constructs a Powerpoint document from the underlying
+ * HSLFSlideShow object. Finds the model stuff from this
+ *
+ * @param hslfSlideShow the HSLFSlideShow to base on
+ */
+ public SlideShow(HSLFSlideShow hslfSlideShow) throws IOException
+ {
+ // Get useful things from our base slideshow
+ _hslfSlideShow = hslfSlideShow;
+ _records = _hslfSlideShow.getRecords();
+ byte[] _docstream = _hslfSlideShow.getUnderlyingBytes();
+
+
+ // For holding the Slide Records
+ Vector slidesV = new Vector(10);
+ // For holding the Notes Records
+ Vector notesV = new Vector(10);
+ // For holding the Meta Sheet Records
+ Vector metaSheetsV = new Vector(10);
+ // For holding Document Records
+ Vector documentsV = new Vector(10);
+ // For holding SlideListWithText Records
+ Vector slwtV = new Vector(10);
+
+ // Look for Notes, Slides and Documents
+ for(int i=0; i<_records.length; i++) {
+ if(_records[i] instanceof org.apache.poi.hslf.record.Notes) {
+ notesV.add(_records[i]);
+ }
+ if(_records[i] instanceof org.apache.poi.hslf.record.Slide) {
+ slidesV.add(_records[i]);
+ }
+ if(_records[i].getRecordType() == 1000l) {
+ documentsV.add(_records[i]);
+ }
+ }
+
+
+ // Also look for SlideListWithTexts in Documents
+ //
+ // Need to get the SlideAtomsSets for all of these. Then, query the
+ // SlidePersistAtom, and group stuff together between SLWT blocks
+ // based on the refID/slideID. Finally, build up a list of all the
+ // SlideAtomsSets for a given refID / slideID, and pass them on to
+ // the Slide when creating
+ //
+ // If a notes sheet exists, can normally match the Notes sheet ID
+ // to the slide ID in the SlidePersistAtom. Since there isn't always,
+ // and we can't find the ID in the slide, just order on the slide ID,
+ // and hand off to the Slides in turn.
+ // (Based on output from dev.SLWTTextListing and dev.SlideAndNotesAtomListing)
+ //
+ // There is often duplicate text, especially for the first few
+ // Slides. Currently, it's up to the Slide model code to detect
+ // and ignore those
+
+ for(int i=0; i<documentsV.size(); i++) {
+ Record docRecord = (Record)documentsV.get(i);
+ Record[] docChildren = docRecord.getChildRecords();
+ for(int j=0; j<docChildren.length; j++) {
+ if(docChildren[j] instanceof SlideListWithText) {
+ //System.out.println("Found SLWT in document " + i);
+ //System.out.println(" Has " + docChildren[j].getChildRecords().length + " children");
+ slwtV.add(docChildren[j]);
+ }
+ }
+ }
+
+ // For now, grab out all the sets of Atoms in the SlideListWithText's
+ // Only store those which aren't empty
+ Vector setsV = new Vector();
+ for(int i=0; i<slwtV.size(); i++) {
+ SlideListWithText slwt = (SlideListWithText)slwtV.get(i);
+ SlideAtomsSet[] thisSets = slwt.getSlideAtomsSets();
+ for(int j=0; j<thisSets.length; j++) {
+ setsV.add(thisSets[j]);
+ }
+ }
+
+
+ // Now, sort the SlideAtomSets together into groups for the same slide ID,
+ // and order them by the slide ID
+
+ // Find the unique IDs
+ HashSet uniqueSlideIDs = new HashSet();
+ for(int i=0; i<setsV.size(); i++) {
+ SlideAtomsSet thisSet = (SlideAtomsSet)setsV.get(i);
+ int id = thisSet.getSlidePersistAtom().getSlideIdentifier();
+ Integer idI = new Integer(id);
+ if(! uniqueSlideIDs.contains(idI) ) {
+ uniqueSlideIDs.add(idI);
+ }
+ }
+ int[] slideIDs = new int[uniqueSlideIDs.size()];
+ int pos = 0;
+ for(Iterator getIDs = uniqueSlideIDs.iterator(); getIDs.hasNext(); pos++) {
+ Integer id = (Integer)getIDs.next();
+ slideIDs[pos] = id.intValue();
+ }
+ // Sort
+ Arrays.sort(slideIDs);
+ // Group
+ Vector[] sortedSetsV = new Vector[slideIDs.length];
+ for(int i=0; i<setsV.size(); i++) {
+ SlideAtomsSet thisSet = (SlideAtomsSet)setsV.get(i);
+ int id = thisSet.getSlidePersistAtom().getSlideIdentifier();
+ int arrayPos = -1;
+ for(int j=0; j<slideIDs.length; j++) {
+ if(slideIDs[j] == id) { arrayPos = j; }
+ }
+ if(sortedSetsV[arrayPos] == null) { sortedSetsV[arrayPos] = new Vector(); }
+ sortedSetsV[arrayPos].add(thisSet);
+ }
+
+
+ // ******************* Do the real model layer creation ****************
+
+
+ // Create our Notes
+ // (Need to create first, as passed to the Slides)
+ _notes = new Notes[notesV.size()];
+ for(int i=0; i<_notes.length; i++) {
+ _notes[i] = new Notes((org.apache.poi.hslf.record.Notes)notesV.get(i));
+ }
+
+
+ // Create our Slides
+ _slides = new Slide[slidesV.size()];
+ for(int i=0; i<_slides.length; i++) {
+ // Grab the slide Record
+ org.apache.poi.hslf.record.Slide slideRecord = (org.apache.poi.hslf.record.Slide)slidesV.get(i);
+
+ // Do they have a Notes?
+ Notes thisNotes = null;
+ // Find their SlideAtom, and use this to check for a Notes
+ Record[] slideRecordChildren = slideRecord.getChildRecords();
+ for(int j=0; j<slideRecordChildren.length; j++) {
+ if(slideRecordChildren[j] instanceof SlideAtom) {
+ SlideAtom sa = (SlideAtom)slideRecordChildren[j];
+ int notesID = sa.getNotesID();
+ if(notesID != 0) {
+ for(int k=0; k<_notes.length; k++) {
+ if(_notes[k].getSheetNumber() == notesID) {
+ thisNotes = _notes[k];
+ }
+ }
+ }
+ }
+ }
+
+ // Grab the (hopefully) corresponding block of Atoms
+ SlideAtomsSet[] sets;
+ if(sortedSetsV.length > i) {
+ Vector thisSetsV = sortedSetsV[i];
+ sets = new SlideAtomsSet[thisSetsV.size()];
+ for(int j=0; j<sets.length; j++) {
+ sets[j] = (SlideAtomsSet)thisSetsV.get(j);
+ }
+ //System.out.println("For slide " + i + ", found " + sets.length + " Sets of text");
+ } else {
+ // Didn't find enough SlideAtomSets to give any to this sheet
+ sets = new SlideAtomsSet[0];
+ }
+
+ // Create the Slide model layer
+ _slides[i] = new Slide(slideRecord,thisNotes,sets);
+ }
+
+ }
+
+
+ /**
+ * Writes out the slideshow file the is represented by an instance of
+ * this class
+ * @param out The OutputStream to write to.
+ * @throws IOException If there is an unexpected IOException from the passed
+ * in OutputStream
+ */
+ public void write(OutputStream out) throws IOException {
+ _hslfSlideShow.write(out);
+ }
+
+
+ // Accesser methods follow
+
+ /**
+ * Returns an array of all the normal Slides found in the slideshow
+ */
+ public Slide[] getSlides() { return _slides; }
+
+ /**
+ * Returns an array of all the normal Notes found in the slideshow
+ */
+ public Notes[] getNotes() { return _notes; }
+
+ /**
+ * Returns an array of all the meta Sheets (master sheets etc)
+ * found in the slideshow
+ */
+ //public MetaSheet[] getMetaSheets() { return _msheets; }
+}
--- /dev/null
+
+/* ====================================================================
+ Copyright 2002-2004 Apache Software Foundation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+
+package org.apache.poi.hslf.util;
+
+import java.io.ByteArrayOutputStream;
+
+/**
+ * This class doesn't work yet, but is here to show the idea of a
+ * ByteArrayOutputStream where you can track how many bytes you've
+ * already written, and go back and write over a previous part of the stream
+ *
+ * @author Nick Burch
+ */
+
+public class MutableByteArrayOutputStream extends ByteArrayOutputStream
+{
+ /** Return how many bytes we've stuffed in so far */
+ public int getBytesWritten() { return -1; }
+
+ /** Write some bytes to the array */
+ public void write(byte[] b) {}
+ public void write(int b) {}
+
+ /** Write some bytes to an earlier bit of the array */
+ public void overwrite(byte[] b, int startPos) {}
+}
--- /dev/null
+
+/* ====================================================================
+ Copyright 2002-2004 Apache Software Foundation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+
+
+package org.apache.poi.hslf;
+
+
+import junit.framework.TestCase;
+import java.io.*;
+import org.apache.poi.poifs.filesystem.*;
+
+/**
+ * Tests that HSLFSlideShow writes the powerpoint bit of data back out
+ * correctly. Currently, that means being the same as what it read in
+ *
+ * @author Nick Burch (nick at torchbox dot com)
+ */
+public class TestReWrite extends TestCase {
+ // HSLFSlideShow primed on the test data
+ private HSLFSlideShow ss;
+ // POIFS primed on the test data
+ private POIFSFileSystem pfs;
+
+ public TestReWrite() throws Exception {
+ String dirname = System.getProperty("HSLF.testdata.path");
+ String filename = dirname + "/basic_test_ppt_file.ppt";
+ FileInputStream fis = new FileInputStream(filename);
+ pfs = new POIFSFileSystem(fis);
+ ss = new HSLFSlideShow(pfs);
+ }
+
+ public void testWritesOutTheSame() throws Exception {
+ // Write out to a byte array
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ ss.write(baos);
+
+ // Build an input stream of it
+ ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
+
+ // Use POIFS to query that lot
+ POIFSFileSystem npfs = new POIFSFileSystem(bais);
+
+ // Check that the "PowerPoint Document" sections have the same size
+ DocumentEntry oProps = (DocumentEntry)pfs.getRoot().getEntry("PowerPoint Document");
+ DocumentEntry nProps = (DocumentEntry)npfs.getRoot().getEntry("PowerPoint Document");
+ assertEquals(oProps.getSize(),nProps.getSize());
+
+ // Check that they contain the same data
+ byte[] _oData = new byte[oProps.getSize()];
+ byte[] _nData = new byte[nProps.getSize()];
+ pfs.createDocumentInputStream("PowerPoint Document").read(_oData);
+ npfs.createDocumentInputStream("PowerPoint Document").read(_nData);
+ for(int i=0; i<_oData.length; i++) {
+ System.out.println(i + "\t" + Integer.toHexString(i));
+ assertEquals(_oData[i], _nData[i]);
+ }
+ }
+}
--- /dev/null
+
+/* ====================================================================
+ Copyright 2002-2004 Apache Software Foundation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+
+
+package org.apache.poi.hslf;
+
+
+import junit.framework.TestCase;
+import java.io.*;
+import java.util.*;
+import org.apache.poi.hslf.record.*;
+import org.apache.poi.poifs.filesystem.*;
+
+/**
+ * Tests that HSLFSlideShow writes the powerpoint bit of data back out
+ * in a sane manner - i.e. records end up in the right place
+ *
+ * @author Nick Burch (nick at torchbox dot com)
+ */
+public class TestReWriteSanity extends TestCase {
+ // HSLFSlideShow primed on the test data
+ private HSLFSlideShow ss;
+ // POIFS primed on the test data
+ private POIFSFileSystem pfs;
+
+ public TestReWriteSanity() throws Exception {
+ String dirname = System.getProperty("HSLF.testdata.path");
+ String filename = dirname + "/basic_test_ppt_file.ppt";
+ FileInputStream fis = new FileInputStream(filename);
+ pfs = new POIFSFileSystem(fis);
+ ss = new HSLFSlideShow(pfs);
+ }
+
+ public void testUserEditAtomsRight() throws Exception {
+ // Write out to a byte array
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ ss.write(baos);
+
+ // Build an input stream of it
+ ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
+
+ // Create a new one from that
+ HSLFSlideShow wss = new HSLFSlideShow(bais);
+
+ // Find the location of the PersistPtrIncrementalBlocks and
+ // UserEditAtoms
+ Record[] r = wss.getRecords();
+ Hashtable pp = new Hashtable();
+ Hashtable ue = new Hashtable();
+ ue.put(new Integer(0),new Integer(0)); // Will show 0 if first
+ int pos = 0;
+ int lastUEPos = -1;
+
+ for(int i=0; i<r.length; i++) {
+ if(r[i] instanceof PersistPtrHolder) {
+ pp.put(new Integer(pos), r[i]);
+ }
+ if(r[i] instanceof UserEditAtom) {
+ ue.put(new Integer(pos), r[i]);
+ lastUEPos = pos;
+ }
+
+ ByteArrayOutputStream bc = new ByteArrayOutputStream();
+ r[i].writeOut(bc);
+ pos += bc.size();
+ }
+
+ // Check that the UserEditAtom's point to right stuff
+ for(int i=0; i<r.length; i++) {
+ if(r[i] instanceof UserEditAtom) {
+ UserEditAtom uea = (UserEditAtom)r[i];
+ int luPos = uea.getLastUserEditAtomOffset();
+ int ppPos = uea.getPersistPointersOffset();
+
+ assertTrue(pp.containsKey(new Integer(ppPos)));
+ assertTrue(ue.containsKey(new Integer(luPos)));
+ }
+ }
+
+ // Check that the CurrentUserAtom points to the right UserEditAtom
+ CurrentUserAtom cua = wss.getCurrentUserAtom();
+ int listedUEPos = (int)cua.getCurrentEditOffset();
+ assertEquals(lastUEPos,listedUEPos);
+ }
+}
--- /dev/null
+
+/* ====================================================================
+ Copyright 2002-2004 Apache Software Foundation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+
+
+package org.apache.poi.hslf;
+
+
+import junit.framework.TestCase;
+import org.apache.poi.hslf.record.*;
+
+/**
+ * Tests that HSLFSlideShow returns the right numbers of key records when
+ * it parses the test file
+ *
+ * @author Nick Burch (nick at torchbox dot com)
+ */
+public class TestRecordCounts extends TestCase {
+ // HSLFSlideShow primed on the test data
+ private HSLFSlideShow ss;
+
+ public TestRecordCounts() throws Exception {
+ String dirname = System.getProperty("HSLF.testdata.path");
+ String filename = dirname + "/basic_test_ppt_file.ppt";
+ ss = new HSLFSlideShow(filename);
+ }
+
+ public void testSheetsCount() throws Exception {
+ // Top level
+ Record[] r = ss.getRecords();
+
+ int count = 0;
+ for(int i=0; i<r.length; i++) {
+ if(r[i] instanceof Slide) {
+ count++;
+ }
+ }
+ // Currently still sees the Master Sheet, but might not in the future
+ assertEquals(3,count);
+ }
+
+ public void testNotesCount() throws Exception {
+ // Top level
+ Record[] r = ss.getRecords();
+
+ int count = 0;
+ for(int i=0; i<r.length; i++) {
+ if(r[i] instanceof Notes &&
+ r[i].getRecordType() == 1008l) {
+ count++;
+ }
+ }
+ // Two real sheets, plus the master sheet
+ assertEquals(3,count);
+ }
+
+ public void testSlideListWithTextCount() throws Exception {
+ // Second level
+ Record[] rt = ss.getRecords();
+ Record[] r = rt[0].getChildRecords();
+
+ int count = 0;
+ for(int i=0; i<r.length; i++) {
+ if(r[i] instanceof SlideListWithText &&
+ r[i].getRecordType() == 4080l) {
+ count++;
+ }
+ }
+ // Two real sheets, plus the master sheet
+ assertEquals(3,count);
+ }
+}
--- /dev/null
+
+/* ====================================================================
+ Copyright 2002-2004 Apache Software Foundation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+
+
+package org.apache.poi.hslf.extractor;
+
+
+import junit.framework.TestCase;
+
+/**
+ * Tests that the extractor correctly gets the text out of our sample file
+ *
+ * @author Nick Burch (nick at torchbox dot com)
+ */
+public class TextExtractor extends TestCase {
+ // Extractor primed on the test data
+ private PowerPointExtractor ppe;
+
+ public TextExtractor() throws Exception {
+ String dirname = System.getProperty("HSLF.testdata.path");
+ String filename = dirname + "/basic_test_ppt_file.ppt";
+ ppe = new PowerPointExtractor(filename);
+ }
+
+ public void testReadSheetText() throws Exception {
+ String sheetText = ppe.getText();
+ String expectText = "This is a test title\nThis is a test subtitle\nThis is on page 1\nThis is the title on page 2\nThis is page two\nIt has several blocks of text\nNone of them have formatting\n";
+
+ assertEquals(expectText.length(),sheetText.length());
+ char[] st = sheetText.toCharArray();
+ char[] et = expectText.toCharArray();
+ for(int i=0; i<et.length; i++) {
+ System.out.println(i + "\t" + et[i] + " " + st[i]);
+ assertEquals(et[i],st[i]);
+ }
+ assertEquals(expectText,sheetText);
+ }
+
+ public void testReadNoteText() throws Exception {
+ String notesText = ppe.getNotes();
+ String expectText = "These are the notes for page 1\nThese are the notes on page two, again lacking formatting\n";
+
+ assertEquals(expectText.length(),notesText.length());
+ char[] nt = notesText.toCharArray();
+ char[] et = expectText.toCharArray();
+ for(int i=0; i<et.length; i++) {
+ System.out.println(i + "\t" + et[i] + " " + nt[i]);
+ assertEquals(et[i],nt[i]);
+ }
+ assertEquals(expectText,notesText);
+ }
+}
--- /dev/null
+
+/* ====================================================================
+ Copyright 2002-2004 Apache Software Foundation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+
+
+package org.apache.poi.hslf.record;
+
+
+import junit.framework.TestCase;
+import java.io.ByteArrayOutputStream;
+
+/**
+ * Tests that SlidePersistAtom works properly
+ *
+ * @author Nick Burch (nick at torchbox dot com)
+ */
+public class TestSlidePersistAtom extends TestCase {
+ // From a real file
+ private byte[] data_a = new byte[] { 0, 0, 0xF3-256, 3, 0x14, 0, 0, 0,
+ 4, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 0,
+ 1, 0, 0, 0, 0, 0, 0 };
+
+ public void testRecordType() throws Exception {
+ SlidePersistAtom spa = new SlidePersistAtom(data_a, 0, data_a.length);
+ assertEquals(1011l, spa.getRecordType());
+ }
+ public void testFlags() throws Exception {
+ SlidePersistAtom spa = new SlidePersistAtom(data_a, 0, data_a.length);
+ assertEquals(4, spa.getRefID() );
+ assertEquals(true, spa.getHasShapesOtherThanPlaceholders() );
+ assertEquals(2, spa.getNumPlaceholderTexts() );
+ assertEquals(256, spa.getSlideIdentifier());
+ }
+
+ public void testWrite() throws Exception {
+ SlidePersistAtom spa = new SlidePersistAtom(data_a, 0, data_a.length);
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ spa.writeOut(baos);
+ byte[] b = baos.toByteArray();
+
+ assertEquals(data_a.length, b.length);
+ for(int i=0; i<data_a.length; i++) {
+ assertEquals(data_a[i],b[i]);
+ }
+ }
+}
--- /dev/null
+
+/* ====================================================================
+ Copyright 2002-2004 Apache Software Foundation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+
+
+package org.apache.poi.hslf.record;
+
+
+import junit.framework.TestCase;
+import java.io.ByteArrayOutputStream;
+
+/**
+ * Tests that TextBytesAtom works properly
+ *
+ * @author Nick Burch (nick at torchbox dot com)
+ */
+public class TestTextBytesAtom extends TestCase {
+ // From a real file
+ private byte[] data = new byte[] { 0, 0, 0xA8-256, 0x0f, 0x1c, 0, 0, 0,
+ 0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x74, 0x68,
+ 0x65, 0x20, 0x74, 0x69, 0x74, 0x6C, 0x65, 0x20, 0x6F, 0x6E,
+ 0x20, 0x70, 0x61, 0x67, 0x65, 0x20, 0x32 };
+ private String data_text = "This is the title on page 2";
+ private byte[] alt_data = new byte[] { 0, 0, 0xA8-256, 0x0F, 0x14, 0, 0, 0,
+ 0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20,
+ 0x74, 0x65, 0x73, 0x74, 0x20, 0x74, 0x69, 0x74, 0x6C, 0x65 };
+ private String alt_text = "This is a test title";
+
+ public void testRecordType() throws Exception {
+ TextBytesAtom tba = new TextBytesAtom(data,0,data.length);
+ assertEquals(4008l, tba.getRecordType());
+ }
+
+ public void testTextA() throws Exception {
+ TextBytesAtom tba = new TextBytesAtom(data,0,data.length);
+ assertEquals(data_text, tba.getText());
+ }
+ public void testTextB() throws Exception {
+ TextBytesAtom tba = new TextBytesAtom(alt_data,0,alt_data.length);
+ assertEquals(alt_text, tba.getText());
+ }
+
+ public void testChangeText() throws Exception {
+ TextBytesAtom tba = new TextBytesAtom(data,0,data.length);
+ tba.setText(alt_text.getBytes("ISO-8859-1"));
+
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ tba.writeOut(baos);
+ byte[] b = baos.toByteArray();
+
+ // Compare the header and the text
+ assertEquals(alt_data.length, b.length);
+ for(int i=0; i<alt_data.length; i++) {
+ assertEquals(alt_data[i],b[i]);
+ }
+ }
+
+ public void testWrite() throws Exception {
+ TextBytesAtom tba = new TextBytesAtom(data,0,data.length);
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ tba.writeOut(baos);
+ byte[] b = baos.toByteArray();
+
+ assertEquals(data.length, b.length);
+ for(int i=0; i<data.length; i++) {
+ assertEquals(data[i],b[i]);
+ }
+ }
+}
--- /dev/null
+
+/* ====================================================================
+ Copyright 2002-2004 Apache Software Foundation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+
+
+package org.apache.poi.hslf.record;
+
+
+import junit.framework.TestCase;
+import java.io.ByteArrayOutputStream;
+
+/**
+ * Tests that TextCharsAtom works properly
+ *
+ * @author Nick Burch (nick at torchbox dot com)
+ */
+public class TestTextCharsAtom extends TestCase {
+ // From a real file
+ private byte[] data = new byte[] { 0, 0, 0xA0-256, 0x0f, 0x08, 0, 0, 0,
+ 0x54, 0x00, 0x68, 0x00, 0x69, 0x00, 0x73, 0x00 };
+ private String data_text = "This";
+ private byte[] alt_data = new byte[] { 0, 0, 0xA0-256, 0x0F, 0x0a, 0, 0, 0,
+ 0x54, 0x00, 0x68, 0x00, 0x69, 0x00, 0x73, 0x00, 0xa3-256, 0x01 };
+ private String alt_text = "This\u01A3";
+
+ public void testRecordType() throws Exception {
+ TextCharsAtom tca = new TextCharsAtom(data,0,data.length);
+ assertEquals(4000l, tca.getRecordType());
+ }
+
+ public void testTextA() throws Exception {
+ TextCharsAtom tca = new TextCharsAtom(data,0,data.length);
+ assertEquals(data_text, tca.getText());
+ }
+ public void testTextB() throws Exception {
+ TextCharsAtom tca = new TextCharsAtom(alt_data,0,alt_data.length);
+ assertEquals(alt_text, tca.getText());
+ }
+
+ public void testChangeText() throws Exception {
+ TextCharsAtom tca = new TextCharsAtom(data,0,data.length);
+ tca.setText(alt_text);
+
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ tca.writeOut(baos);
+ byte[] b = baos.toByteArray();
+
+ // Compare the header and the text
+ assertEquals(alt_data.length, b.length);
+ for(int i=0; i<alt_data.length; i++) {
+ assertEquals(alt_data[i],b[i]);
+ }
+ }
+
+ public void testWrite() throws Exception {
+ TextCharsAtom tca = new TextCharsAtom(data,0,data.length);
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ tca.writeOut(baos);
+ byte[] b = baos.toByteArray();
+
+ assertEquals(data.length, b.length);
+ for(int i=0; i<data.length; i++) {
+ assertEquals(data[i],b[i]);
+ }
+ }
+}
--- /dev/null
+
+/* ====================================================================
+ Copyright 2002-2004 Apache Software Foundation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+
+
+package org.apache.poi.hslf.record;
+
+
+import junit.framework.TestCase;
+import java.io.ByteArrayOutputStream;
+
+/**
+ * Tests that TextHeaderAtom works properly
+ *
+ * @author Nick Burch (nick at torchbox dot com)
+ */
+public class TestTextHeaderAtom extends TestCase {
+ // From a real file
+ private byte[] notes_data = new byte[] { 0, 0, 0x9f-256, 0x0f, 4, 0, 0, 0, 2, 0, 0, 0};
+ private byte[] title_data = new byte[] { 0, 0, 0x9f-256, 0x0f, 4, 0, 0, 0, 0, 0, 0, 0 };
+ private byte[] body_data = new byte[] { 0, 0, 0x9f-256, 0x0f, 4, 0, 0, 0, 1, 0, 0, 0 };
+
+ public void testRecordType() throws Exception {
+ TextHeaderAtom tha = new TextHeaderAtom(notes_data,0,12);
+ assertEquals(3999l, tha.getRecordType());
+ }
+ public void testTypes() throws Exception {
+ TextHeaderAtom n_tha = new TextHeaderAtom(notes_data,0,12);
+ TextHeaderAtom t_tha = new TextHeaderAtom(title_data,0,12);
+ TextHeaderAtom b_tha = new TextHeaderAtom(body_data,0,12);
+ assertEquals(TextHeaderAtom.NOTES_TYPE, n_tha.getTextType());
+ assertEquals(TextHeaderAtom.TITLE_TYPE, t_tha.getTextType());
+ assertEquals(TextHeaderAtom.BODY_TYPE, b_tha.getTextType());
+ }
+
+ public void testWrite() throws Exception {
+ TextHeaderAtom tha = new TextHeaderAtom(notes_data,0,12);
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ tha.writeOut(baos);
+ byte[] b = baos.toByteArray();
+
+ assertEquals(notes_data.length, b.length);
+ for(int i=0; i<notes_data.length; i++) {
+ assertEquals(notes_data[i],b[i]);
+ }
+ }
+}
--- /dev/null
+
+/* ====================================================================
+ Copyright 2002-2004 Apache Software Foundation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+
+
+package org.apache.poi.hslf.usermodel;
+
+
+import junit.framework.TestCase;
+import org.apache.poi.hslf.*;
+import org.apache.poi.hslf.model.*;
+
+/**
+ * Tests that SlideShow returns the right number of Sheets and MetaSheets
+ *
+ * @author Nick Burch (nick at torchbox dot com)
+ */
+public class TestCounts extends TestCase {
+ // SlideShow primed on the test data
+ private SlideShow ss;
+
+ public TestCounts() throws Exception {
+ String dirname = System.getProperty("HSLF.testdata.path");
+ String filename = dirname + "/basic_test_ppt_file.ppt";
+ HSLFSlideShow hss = new HSLFSlideShow(filename);
+ ss = new SlideShow(hss);
+ }
+
+ public void testSheetsCount() throws Exception {
+ Slide[] slides = ss.getSlides();
+ // Two sheets, plus some crap related to the master sheet
+ assertEquals(3, slides.length);
+ }
+
+ public void testNotesCount() throws Exception {
+ Notes[] notes = ss.getNotes();
+ // Two sheets -> two notes, plus the notes on the slide master
+ assertEquals(3, notes.length);
+ }
+}
--- /dev/null
+
+/* ====================================================================
+ Copyright 2002-2004 Apache Software Foundation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+
+
+package org.apache.poi.hslf.usermodel;
+
+
+import junit.framework.TestCase;
+import org.apache.poi.hslf.*;
+import org.apache.poi.hslf.model.*;
+
+/**
+ * Tests that SlideShow returns MetaSheets which have the right text in them
+ *
+ * @author Nick Burch (nick at torchbox dot com)
+ */
+public class TestNotesText extends TestCase {
+ // SlideShow primed on the test data
+ private SlideShow ss;
+
+ public TestNotesText() throws Exception {
+ String dirname = System.getProperty("HSLF.testdata.path");
+ String filename = dirname + "/basic_test_ppt_file.ppt";
+ HSLFSlideShow hss = new HSLFSlideShow(filename);
+ ss = new SlideShow(hss);
+ }
+
+ public void testNotesOne() throws Exception {
+ Notes notes = ss.getNotes()[1];
+
+ String[] expectText = new String[] {"These are the notes for page 1"};
+ assertEquals(expectText.length, notes.getTextRuns().length);
+ for(int i=0; i<expectText.length; i++) {
+ assertEquals(expectText[i], notes.getTextRuns()[i].getText());
+ }
+ }
+
+ public void testNotesTwo() throws Exception {
+ Notes notes = ss.getNotes()[2];
+ String[] expectText = new String[] {"These are the notes on page two, again lacking formatting"};
+ assertEquals(expectText.length, notes.getTextRuns().length);
+ for(int i=0; i<expectText.length; i++) {
+ assertEquals(expectText[i], notes.getTextRuns()[i].getText());
+ }
+ }
+}
--- /dev/null
+
+/* ====================================================================
+ Copyright 2002-2004 Apache Software Foundation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+
+
+package org.apache.poi.hslf.usermodel;
+
+
+import junit.framework.TestCase;
+import org.apache.poi.hslf.*;
+import org.apache.poi.hslf.model.*;
+
+/**
+ * Tests that SlideShow returns Sheets which have the right text in them
+ *
+ * @author Nick Burch (nick at torchbox dot com)
+ */
+public class TestSheetText extends TestCase {
+ // SlideShow primed on the test data
+ private SlideShow ss;
+
+ public TestSheetText() throws Exception {
+ String dirname = System.getProperty("HSLF.testdata.path");
+ String filename = dirname + "/basic_test_ppt_file.ppt";
+ HSLFSlideShow hss = new HSLFSlideShow(filename);
+ ss = new SlideShow(hss);
+ }
+
+ public void testSheetOne() throws Exception {
+ Sheet slideOne = ss.getSlides()[0];
+
+ String[] expectText = new String[] {"This is a test title","This is a test subtitle\nThis is on page 1"};
+ assertEquals(expectText.length, slideOne.getTextRuns().length);
+ for(int i=0; i<expectText.length; i++) {
+ assertEquals(expectText[i], slideOne.getTextRuns()[i].getText());
+ }
+ }
+
+ public void testSheetTwo() throws Exception {
+ Sheet slideTwo = ss.getSlides()[1];
+ String[] expectText = new String[] {"This is the title on page 2","This is page two\nIt has several blocks of text\nNone of them have formatting"};
+ assertEquals(expectText.length, slideTwo.getTextRuns().length);
+ for(int i=0; i<expectText.length; i++) {
+ assertEquals(expectText[i], slideTwo.getTextRuns()[i].getText());
+ }
+ }
+}