aboutsummaryrefslogtreecommitdiffstats
path: root/poi-scratchpad/src/main/java/org/apache/poi/hslf/usermodel/HSLFSlideShowImpl.java
blob: 7ad8a9ce01865bf7dd7842dcc2296e9810581a33 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
/* ====================================================================
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
   this work for additional information regarding copyright ownership.
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
==================================================================== */

package org.apache.poi.hslf.usermodel;

import static org.apache.logging.log4j.util.Unbox.box;
import static org.apache.poi.hslf.usermodel.HSLFSlideShow.POWERPOINT_DOCUMENT;
import static org.apache.poi.hslf.usermodel.HSLFSlideShow.PP95_DOCUMENT;
import static org.apache.poi.hslf.usermodel.HSLFSlideShow.PP97_DOCUMENT;

import java.io.ByteArrayInputStream;
import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.SequenceInputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.NavigableMap;
import java.util.Objects;
import java.util.TreeMap;
import java.util.stream.Collectors;

import org.apache.commons.collections4.IteratorUtils;
import org.apache.commons.io.output.UnsynchronizedByteArrayOutputStream;
import org.apache.logging.log4j.Logger;
import org.apache.poi.logging.PoiLogManager;
import org.apache.poi.POIDocument;
import org.apache.poi.ddf.EscherBSERecord;
import org.apache.poi.ddf.EscherContainerRecord;
import org.apache.poi.ddf.EscherOptRecord;
import org.apache.poi.ddf.EscherRecord;
import org.apache.poi.hpsf.PropertySet;
import org.apache.poi.hslf.exceptions.CorruptPowerPointFileException;
import org.apache.poi.hslf.exceptions.HSLFException;
import org.apache.poi.hslf.exceptions.OldPowerPointFormatException;
import org.apache.poi.hslf.record.CurrentUserAtom;
import org.apache.poi.hslf.record.Document;
import org.apache.poi.hslf.record.DocumentEncryptionAtom;
import org.apache.poi.hslf.record.ExOleObjStg;
import org.apache.poi.hslf.record.PersistPtrHolder;
import org.apache.poi.hslf.record.PersistRecord;
import org.apache.poi.hslf.record.PositionDependentRecord;
import org.apache.poi.hslf.record.Record;
import org.apache.poi.hslf.record.RecordTypes;
import org.apache.poi.hslf.record.UserEditAtom;
import org.apache.poi.poifs.crypt.EncryptionInfo;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.DocumentInputStream;
import org.apache.poi.poifs.filesystem.Entry;
import org.apache.poi.poifs.filesystem.EntryUtils;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.sl.usermodel.PictureData;
import org.apache.poi.sl.usermodel.PictureData.PictureType;
import org.apache.poi.util.IOUtils;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.LittleEndianConsts;

/**
 * This class contains the main functionality for the Powerpoint file
 * "reader". It is only a very basic class for now
 */
public final class HSLFSlideShowImpl extends POIDocument implements Closeable {
    private static final Logger LOG = PoiLogManager.getLogger(HSLFSlideShowImpl.class);

    static final int UNSET_OFFSET = -1;

    //arbitrarily selected; may need to increase
    private static final int DEFAULT_MAX_RECORD_LENGTH = 200_000_000;
    private static final int MAX_DOCUMENT_SIZE = 100_000_000;
    private static int MAX_RECORD_LENGTH = DEFAULT_MAX_RECORD_LENGTH;
    private static final int MAX_IMAGE_LENGTH = 150_000_000;

    // Holds metadata on where things are in our document
    private CurrentUserAtom currentUser;

    // Low level contents of the file
    private byte[] _docstream;

    // Low level contents
    private Record[] _records;

    // Raw Pictures contained in the pictures stream
    private List<HSLFPictureData> _pictures;

    // Embedded objects stored in storage records in the document stream, lazily populated.
    private HSLFObjectData[] _objects;

    /**
     * @param length the max record length allowed for HSLFSlideShowImpl
     */
    public static void setMaxRecordLength(int length) {
        MAX_RECORD_LENGTH = length;
    }

    /**
     * @return the max record length allowed for HSLFSlideShowImpl
     */
    public static int getMaxRecordLength() {
        return MAX_RECORD_LENGTH;
    }

    /**
     * Constructs a Powerpoint document from fileName. Parses the document
     * and places all the important stuff into data structures.
     *
     * @param fileName The name of the file to read.
     * @throws IOException if there is a problem while parsing the document.
     */
    @SuppressWarnings("resource")
    public HSLFSlideShowImpl(String fileName) throws IOException {
        this(new POIFSFileSystem(new File(fileName)));
    }

    /**
     * Constructs a Powerpoint document from an input stream. Parses the
     * document and places all the important stuff into data structures.
     *
     * @param inputStream the source of the data
     * @throws IOException if there is a problem while parsing the document.
     */
    @SuppressWarnings("resource")
    public HSLFSlideShowImpl(InputStream inputStream) throws IOException {
        //do Ole stuff
        this(new POIFSFileSystem(inputStream));
    }

    /**
     * Constructs a Powerpoint document from a POIFS Filesystem. Parses the
     * document and places all the important stuff into data structures.
     *
     * @param filesystem the POIFS FileSystem to read from
     * @throws IOException if there is a problem while parsing the document.
     */
    public HSLFSlideShowImpl(POIFSFileSystem filesystem) throws IOException {
        this(filesystem.getRoot());
    }

    /**
     * Constructs a Powerpoint document from a specific point in a
     * POIFS Filesystem. Parses the document and places all the
     * important stuff into data structures.
     *
     * @param dir the POIFS directory to read from
     * @throws IOException if there is a problem while parsing the document.
     */
    public HSLFSlideShowImpl(DirectoryNode dir) throws IOException {
        super(handleDualStorage(dir));

        try {
            // First up, grab the "Current User" stream
            // We need this before we can detect Encrypted Documents
            readCurrentUserStream();

            // Next up, grab the data that makes up the
            //  PowerPoint stream
            readPowerPointStream();

            // Now, build records based on the PowerPoint stream
            buildRecords();

            // Look for any other streams
            readOtherStreams();
        } catch (RuntimeException | IOException e) {
            // clean up the filesystem when we cannot read it here to avoid
            // leaking file handles
            dir.getFileSystem().close();

            throw e;
        }
    }

    private static DirectoryNode handleDualStorage(DirectoryNode dir) throws IOException {
        // when there's a dual storage entry, use it, as the outer document can't be read quite probably ...
        if (!dir.hasEntryCaseInsensitive(PP97_DOCUMENT)) {
            return dir;
        }
        return (DirectoryNode) dir.getEntryCaseInsensitive(PP97_DOCUMENT);
    }

    /**
     * Constructs a new, empty, Powerpoint document.
     */
    public static HSLFSlideShowImpl create() {
        try (InputStream is = HSLFSlideShowImpl.class.getResourceAsStream("/org/apache/poi/hslf/data/empty.ppt")) {
            if (is == null) {
                throw new HSLFException("Missing resource 'empty.ppt'");
            }
            return new HSLFSlideShowImpl(is);
        } catch (IOException e) {
            throw new HSLFException(e);
        }
    }

    /**
     * Extracts the main PowerPoint document stream from the
     * POI file, ready to be passed
     *
     * @throws IOException when the powerpoint can't be read
     */
    private void readPowerPointStream() throws IOException {
        final DirectoryNode dir = getDirectory();

        if (!dir.hasEntryCaseInsensitive(POWERPOINT_DOCUMENT) && dir.hasEntryCaseInsensitive(PP95_DOCUMENT)) {
            throw new OldPowerPointFormatException("You seem to have supplied a PowerPoint95 file, which isn't supported");
        }

        // Get the main document stream
        final Entry entry = dir.getEntryCaseInsensitive(POWERPOINT_DOCUMENT);
        if (!(entry instanceof DocumentEntry)) {
            throw new IllegalArgumentException("Had unexpected type of entry for name: " + POWERPOINT_DOCUMENT + ": " + entry.getClass());
        }
        DocumentEntry docProps = (DocumentEntry) entry;

        // Grab the document stream
        int len = docProps.getSize();
        try (InputStream is = dir.createDocumentInputStream(docProps)) {
            _docstream = IOUtils.toByteArray(is, len, MAX_DOCUMENT_SIZE);
        }
    }

    /**
     * Builds the list of records, based on the contents
     * of the PowerPoint stream
     */
    private void buildRecords() throws IOException {
        // The format of records in a powerpoint file are:
        //   <little endian 2 byte "info">
        //   <little endian 2 byte "type">
        //   <little endian 4 byte "length">
        // If it has a zero length, following it will be another record
        //      <xx xx yy yy 00 00 00 00> <xx xx yy yy zz zz zz zz>
        // If it has a length, depending on its type it may have children or data
        // If it has children, these will follow straight away
        //      <xx xx yy yy zz zz zz zz <xx xx yy yy zz zz zz zz>>
        // If it has data, this will come straight after, and run for the length
        //      <xx xx yy yy zz zz zz zz dd dd dd dd dd dd dd>
        // All lengths given exclude the 8 byte record header
        // (Data records are known as Atoms)

        // Document should start with:
        //   0F 00 E8 03 ## ## ## ##
        //     (type 1000 = document, info 00 0f is normal, rest is document length)
        //   01 00 E9 03 28 00 00 00
        //     (type 1001 = document atom, info 00 01 normal, 28 bytes long)
        //   80 16 00 00 E0 10 00 00 xx xx xx xx xx xx xx xx
        //   05 00 00 00 0A 00 00 00 xx xx xx
        //     (the contents of the document atom, not sure what it means yet)
        //   (records then follow)

        // When parsing a document, look to see if you know about that type
        //  of the current record. If you know it's a type that has children,
        //  process the record's data area looking for more records
        // If you know about the type and it doesn't have children, either do
        //  something with the data (eg TextRun) or skip over it
        // If you don't know about the type, play safe and skip over it (using
        //  its length to know where the next record will start)
        //

        _records = read(_docstream, (int) currentUser.getCurrentEditOffset());
    }

    private Record[] read(byte[] docstream, int usrOffset) throws IOException {
        //sort found records by offset.
        //(it is not necessary but SlideShow.findMostRecentCoreRecords() expects them sorted)
        NavigableMap<Integer, Record> records = new TreeMap<>(); // offset -> record
        Map<Integer, Integer> persistIds = new HashMap<>(); // offset -> persistId
        initRecordOffsets(docstream, usrOffset, records, persistIds);
        HSLFSlideShowEncrypted decryptData = new HSLFSlideShowEncrypted(docstream, records);

        for (Map.Entry<Integer, Record> entry : records.entrySet()) {
            Integer offset = entry.getKey();
            Record record = entry.getValue();
            Integer persistId = persistIds.get(offset);
            if (record == null) {
                // all plain records have been already added,
                // only new records need to be decrypted (tbd #35897)
                decryptData.decryptRecord(docstream, persistId, offset);
                record = Record.buildRecordAtOffset(docstream, offset);
                entry.setValue(record);
            }

            if (record instanceof PersistRecord) {
                ((PersistRecord) record).setPersistId(persistId);
            }
        }

        decryptData.close();
        return records.values().toArray(new Record[0]);
    }

    private void initRecordOffsets(byte[] docstream, int usrOffset, NavigableMap<Integer, Record> recordMap, Map<Integer, Integer> offset2id) {
        while (usrOffset != 0) {
            Record builtRecord = Record.buildRecordAtOffset(docstream, usrOffset);
            if (!(builtRecord instanceof UserEditAtom)) {
                throw new CorruptPowerPointFileException("Did not have a user edit atom: " + builtRecord);
            }
            UserEditAtom usr = (UserEditAtom) builtRecord;

            recordMap.put(usrOffset, usr);

            int psrOffset = usr.getPersistPointersOffset();
            Record record = Record.buildRecordAtOffset(docstream, psrOffset);
            if (record == null) {
                throw new CorruptPowerPointFileException("Powerpoint document is missing a PersistPtrHolder at " + psrOffset);
            }
            if (!(record instanceof PersistPtrHolder)) {
                throw new CorruptPowerPointFileException("Record is not a PersistPtrHolder: " + record + " at " + psrOffset);
            }
            PersistPtrHolder ptr = (PersistPtrHolder) record;
            recordMap.put(psrOffset, ptr);

            for (Map.Entry<Integer, Integer> entry : ptr.getSlideLocationsLookup().entrySet()) {
                Integer offset = entry.getValue();
                Integer id = entry.getKey();
                recordMap.put(offset, null); // reserve a slot for the record
                offset2id.put(offset, id);
            }

            usrOffset = usr.getLastUserEditAtomOffset();

            // check for corrupted user edit atom and try to repair it
            // if the next user edit atom offset is already known, we would go into an endless loop
            if (usrOffset > 0 && recordMap.containsKey(usrOffset)) {
                // a user edit atom is usually located 36 byte before the smallest known record offset
                usrOffset = recordMap.firstKey() - 36;
                // check that we really are located on a user edit atom
                int ver_inst = LittleEndian.getUShort(docstream, usrOffset);
                int type = LittleEndian.getUShort(docstream, usrOffset + 2);
                int len = LittleEndian.getInt(docstream, usrOffset + 4);
                if (ver_inst == 0 && type == 4085 && (len == 0x1C || len == 0x20)) {
                    LOG.atWarn().log("Repairing invalid user edit atom");
                    usr.setLastUserEditAtomOffset(usrOffset);
                } else {
                    throw new CorruptPowerPointFileException("Powerpoint document contains invalid user edit atom");
                }
            }
        }
    }

    public DocumentEncryptionAtom getDocumentEncryptionAtom() {
        for (Record r : _records) {
            if (r instanceof DocumentEncryptionAtom) {
                return (DocumentEncryptionAtom) r;
            }
        }
        return null;
    }


    /**
     * Find the "Current User" stream, and load it
     */
    private void readCurrentUserStream() {
        try {
            currentUser = new CurrentUserAtom(getDirectory());
        } catch (IOException ie) {
            LOG.atError().withThrowable(ie).log("Error finding Current User Atom");
            currentUser = new CurrentUserAtom();
        }
    }

    /**
     * Find any other streams from the filesystem, and load them
     */
    private void readOtherStreams() {
        // Currently, there aren't any
    }

    /**
     * Find and read in pictures contained in this presentation.
     * This is lazily called as and when we want to touch pictures.
     */
    private void readPictures() throws IOException {

        // if the presentation doesn't contain pictures, will use an empty collection instead
        if (!getDirectory().hasEntryCaseInsensitive("Pictures")) {
            _pictures = new ArrayList<>();
            return;
        }

        final Entry en = getDirectory().getEntryCaseInsensitive("Pictures");
        if (!(en instanceof DocumentEntry)) {
            throw new IllegalArgumentException("Had unexpected type of entry for name: Pictures: " + en.getClass());
        }
        DocumentEntry entry = (DocumentEntry) en;
        EscherContainerRecord blipStore = getBlipStore();
        byte[] pictstream;
        try (DocumentInputStream is = getDirectory().createDocumentInputStream(entry)) {
            pictstream = IOUtils.toByteArray(is, entry.getSize(), MAX_IMAGE_LENGTH);
        }

        List<PictureFactory> factories = new ArrayList<>();
        try (HSLFSlideShowEncrypted decryptData = new HSLFSlideShowEncrypted(getDocumentEncryptionAtom())) {

            int pos = 0;
            // An empty picture record (length 0) will take up 8 bytes
            while (pos <= (pictstream.length - HSLFPictureData.PREAMBLE_SIZE)) {
                int offset = pos;

                decryptData.decryptPicture(pictstream, offset);

                // Image signature
                int signature = LittleEndian.getUShort(pictstream, pos);
                pos += LittleEndianConsts.SHORT_SIZE;
                // Image type + 0xF018
                int type = LittleEndian.getUShort(pictstream, pos);
                pos += LittleEndianConsts.SHORT_SIZE;
                // Image size (excluding the 8 byte header)
                int imgsize = LittleEndian.getInt(pictstream, pos);
                pos += LittleEndianConsts.INT_SIZE;

                // When parsing the BStoreDelay stream, [MS-ODRAW] says that we
                //  should terminate if the type isn't 0xf007 or 0xf018->0xf117
                if (!((type == 0xf007) || (type >= 0xf018 && type <= 0xf117))) {
                    break;
                }

                // The image size must be 0 or greater
                // (0 is allowed, but odd, since we do wind on by the header each
                //  time, so we won't get stuck)
                if (imgsize < 0) {
                    throw new CorruptPowerPointFileException("The file contains a picture, at position " + factories.size() + ", which has a negatively sized data length, so we can't trust any of the picture data");
                }

                // If the type (including the bonus 0xF018) is 0, skip it
                PictureType pt = PictureType.forNativeID(type - 0xF018);
                if (pt == null) {
                    LOG.atError().log("Problem reading picture: Invalid image type 0, on picture with length {}.\nYour document will probably become corrupted if you save it! Position: {}", box(imgsize),box(pos));
                } else {
                    //The pictstream can be truncated halfway through a picture.
                    //This is not a problem if the pictstream contains extra pictures
                    //that are not used in any slide -- BUG-60305
                    if (pos + imgsize > pictstream.length) {
                        LOG.atWarn().log("\"Pictures\" stream may have ended early. In some circumstances, this is not a problem; " +
                                "in others, this could indicate a corrupt file");
                        break;
                    }

                    // Copy the data, ready to pass to PictureData
                    byte[] imgdata = IOUtils.safelyClone(pictstream, pos, imgsize, MAX_RECORD_LENGTH);

                    factories.add(new PictureFactory(blipStore, pt, imgdata, offset, signature));
                }

                pos += imgsize;
            }
        }

        matchPicturesAndRecords(factories, blipStore);

        List<HSLFPictureData> pictures = new ArrayList<>();
        for (PictureFactory it : factories)      {
            try {
                HSLFPictureData pict = it.build();

                pict.setIndex(pictures.size() + 1);        // index is 1-based
                pictures.add(pict);
            } catch (IllegalArgumentException e) {
                LOG.atError().withThrowable(e).log("Problem reading picture. Your document will probably become corrupted if you save it!");
            }
        }

        _pictures = pictures;
    }

    /**
     * Matches all of the {@link PictureFactory PictureFactories} for a slideshow with {@link EscherBSERecord}s in the
     * Blip Store for the slideshow.
     * <p>
     * When reading a slideshow into memory, we have to match the records in the Blip Store with the factories
     * representing picture in the pictures stream. This can be difficult, as presentations might have incorrectly
     * formatted data. This function attempts to perform matching using multiple heuristics to increase the likelihood
     * of finding all pairs, while aiming to reduce the likelihood of associating incorrect pairs.
     *
     * @param factories Factories for creating {@link HSLFPictureData} out of the pictures stream.
     * @param blipStore Blip Store of the presentation being loaded.
     */
    private static void matchPicturesAndRecords(List<PictureFactory> factories, EscherContainerRecord blipStore) {
        // LinkedList because we're sorting and removing.
        LinkedList<PictureFactory> unmatchedFactories = new LinkedList<>(factories);
        unmatchedFactories.sort(Comparator.comparingInt(PictureFactory::getOffset));

        // Arrange records by offset. In the common case of a well-formed slideshow, where every factory has a
        //  matching record, this is somewhat wasteful, but is necessary to handle the uncommon case where multiple
        //  records share an offset.
        Map<Integer, List<EscherBSERecord>> unmatchedRecords = new HashMap<>();
        for (EscherRecord child : blipStore) {
            if (!(child instanceof EscherBSERecord)) {
                throw new CorruptPowerPointFileException("Did not have a EscherBSERecord: " + child);
            }
            EscherBSERecord record = (EscherBSERecord) child;
            unmatchedRecords.computeIfAbsent(record.getOffset(), k -> new ArrayList<>()).add(record);
        }

        // The first pass through the factories only pairs a factory with a record if we're very confident that they
        //  are a match. Confidence comes from a perfect match on the offset, and if necessary, the UID. Matched
        //  factories and records are removed from the unmatched collections.
        for (Iterator<PictureFactory> iterator = unmatchedFactories.iterator(); iterator.hasNext(); ) {
            PictureFactory factory = iterator.next();
            int physicalOffset = factory.getOffset();
            List<EscherBSERecord> recordsAtOffset = unmatchedRecords.get(physicalOffset);

            if (recordsAtOffset == null || recordsAtOffset.isEmpty()) {
                // There are no records that have an offset matching the physical offset in the stream. We'll do
                //  more complicated and less reliable matching for this factory after all "well known"
                //  image <-> record pairs have been found.
                LOG.atDebug().log("No records with offset {}", box(physicalOffset));
            } else if (recordsAtOffset.size() == 1) {
                // Only 1 record has the same offset as the target image. Assume these are a pair.
                factory.setRecord(recordsAtOffset.get(0));
                unmatchedRecords.remove(physicalOffset);
                iterator.remove();
            } else {

                // Multiple records share an offset. Perform additional matching based on UID.
                for (int i = 0; i < recordsAtOffset.size(); i++) {
                    EscherBSERecord record = recordsAtOffset.get(i);
                    byte[] recordUid = record.getUid();
                    byte[] imageHeader = Arrays.copyOf(factory.imageData, HSLFPictureData.CHECKSUM_SIZE);
                    if (Arrays.equals(recordUid, imageHeader)) {
                        factory.setRecord(record);
                        recordsAtOffset.remove(i);
                        iterator.remove();
                        break;
                    }
                }
            }
        }

        // At this point, any factories remaining didn't have a record with a matching offset. The second pass
        // through the factories pairs based on the UID. Factories for which a record with a matching UID cannot be
        // found will get a new record.
        List<EscherBSERecord> remainingRecords = unmatchedRecords.values()
                .stream()
                .flatMap(Collection::stream)
                .collect(Collectors.toList());

        for (PictureFactory factory : unmatchedFactories) {

            boolean matched = false;
            for (int i = remainingRecords.size() - 1; i >= 0; i--) {
                EscherBSERecord record = remainingRecords.get(i);
                byte[] recordUid = record.getUid();
                byte[] imageHeader = Arrays.copyOf(factory.imageData, HSLFPictureData.CHECKSUM_SIZE);
                if (Arrays.equals(recordUid, imageHeader)) {
                    remainingRecords.remove(i);
                    factory.setRecord(record);
                    record.setOffset(factory.getOffset());
                    matched = true;
                }
            }

            if (!matched) {
                // Synthesize a new record
                LOG.atDebug().log("No record found for picture at offset {}", box(factory.offset));
                EscherBSERecord record = HSLFSlideShow.addNewEscherBseRecord(blipStore, factory.type, factory.imageData, factory.offset);
                factory.setRecord(record);
            }
        }

        LOG.atDebug().log("Found {} unmatched records.", box(remainingRecords.size()));
    }

    /**
     * remove duplicated UserEditAtoms and merge PersistPtrHolder, i.e.
     * remove document edit history
     */
    public void normalizeRecords() {
        try {
            updateAndWriteDependantRecords(null, null);
        } catch (IOException e) {
            throw new CorruptPowerPointFileException(e);
        }
        _records = HSLFSlideShowEncrypted.normalizeRecords(_records);
    }


    /**
     * This is a helper functions, which is needed for adding new position dependent records
     * or finally write the slideshow to a file.
     *
     * @param os                 the stream to write to, if null only the references are updated
     * @param interestingRecords a map of interesting records (PersistPtrHolder and UserEditAtom)
     *                           referenced by their RecordType. Only the very last of each type will be saved to the map.
     *                           May be null, if not needed.
     */
    @SuppressWarnings("WeakerAccess")
    public void updateAndWriteDependantRecords(OutputStream os, Map<RecordTypes, PositionDependentRecord> interestingRecords)
            throws IOException {
        // For position dependent records, hold where they were and now are
        // As we go along, update, and hand over, to any Position Dependent
        //  records we happen across
        Map<Integer, Integer> oldToNewPositions = new HashMap<>();

        // First pass - figure out where all the position dependent
        //   records are going to end up, in the new scheme
        // (Annoyingly, some powerpoint files have PersistPtrHolders
        //  that reference slides after the PersistPtrHolder)
        UserEditAtom usr = null;
        PersistPtrHolder ptr = null;
        CountingOS cos = new CountingOS();
        for (Record record : _records) {
            // all top level records are position dependent
            if (!(record instanceof PositionDependentRecord)) {
                throw new CorruptPowerPointFileException("Record is not a position dependent record: " + record);
            }
            PositionDependentRecord pdr = (PositionDependentRecord) record;
            int oldPos = pdr.getLastOnDiskOffset();
            int newPos = cos.size();
            pdr.setLastOnDiskOffset(newPos);
            if (oldPos != UNSET_OFFSET) {
                // new records don't need a mapping, as they aren't in a relation yet
                oldToNewPositions.put(oldPos, newPos);
            }

            // Grab interesting records as they come past
            // this will only save the very last record of each type
            RecordTypes saveme = null;
            int recordType = (int) record.getRecordType();
            if (recordType == RecordTypes.PersistPtrIncrementalBlock.typeID) {
                saveme = RecordTypes.PersistPtrIncrementalBlock;
                ptr = (PersistPtrHolder) pdr;
            } else if (recordType == RecordTypes.UserEditAtom.typeID) {
                saveme = RecordTypes.UserEditAtom;
                usr = (UserEditAtom) pdr;
            }
            if (interestingRecords != null && saveme != null) {
                interestingRecords.put(saveme, pdr);
            }

            // Dummy write out, so the position winds on properly
            record.writeOut(cos);
        }
        cos.close();

        if (usr == null || ptr == null) {
            throw new HSLFException("UserEditAtom or PersistPtr can't be determined.");
        }

        Map<Integer, Integer> persistIds = new HashMap<>();
        for (Map.Entry<Integer, Integer> entry : ptr.getSlideLocationsLookup().entrySet()) {
            persistIds.put(oldToNewPositions.get(entry.getValue()), entry.getKey());
        }

        try (HSLFSlideShowEncrypted encData = new HSLFSlideShowEncrypted(getDocumentEncryptionAtom())) {
            for (Record record : _records) {
                // We've already figured out their new location, and
                // told them that
                // Tell them of the positions of the other records though
                PositionDependentRecord pdr = (PositionDependentRecord) record;
                Integer persistId = persistIds.get(pdr.getLastOnDiskOffset());
                if (persistId == null) {
                    persistId = 0;
                }

                // For now, we're only handling PositionDependentRecord's that
                // happen at the top level.
                // In future, we'll need the handle them everywhere, but that's
                // a bit trickier
                pdr.updateOtherRecordReferences(oldToNewPositions);

                // Whatever happens, write out that record tree
                if (os != null) {
                    record.writeOut(encData.encryptRecord(os, persistId, record));
                }
            }
        }

        // Update and write out the Current User atom
        int oldLastUserEditAtomPos = (int) currentUser.getCurrentEditOffset();
        Integer newLastUserEditAtomPos = oldToNewPositions.get(oldLastUserEditAtomPos);
        if (newLastUserEditAtomPos == null || usr.getLastOnDiskOffset() != newLastUserEditAtomPos) {
            throw new HSLFException("Couldn't find the new location of the last UserEditAtom that used to be at " + oldLastUserEditAtomPos);
        }
        currentUser.setCurrentEditOffset(usr.getLastOnDiskOffset());
    }

    /**
     * Writes out the slideshow to the currently open file.
     * <p>
     * This will fail (with an {@link IllegalStateException} if the
     * slideshow was opened read-only, opened from an {@link InputStream}
     * instead of a File, or if this is not the root document. For those cases,
     * you must use {@link #write(OutputStream)} or {@link #write(File)} to
     * write to a brand new document.
     *
     * @throws IOException           thrown on errors writing to the file
     * @throws IllegalStateException if this isn't from a writable File
     * @since POI 3.15 beta 3
     */
    @Override
    public void write() throws IOException {
        validateInPlaceWritePossible();

        // Write the PowerPoint streams to the current FileSystem
        // No need to do anything to other streams, already there!
        write(getDirectory().getFileSystem(), false);

        // Sync with the File on disk
        getDirectory().getFileSystem().writeFilesystem();
    }

    /**
     * Writes out the slideshow file the is represented by an instance
     * of this class.
     * <p>This will write out only the common OLE2 streams. If you require all
     * streams to be written out, use {@link #write(File, boolean)}
     * with {@code preserveNodes} set to {@code true}.
     *
     * @param newFile The File to write to.
     * @throws IOException If there is an unexpected IOException from writing to the File
     */
    @Override
    public void write(File newFile) throws IOException {
        // Write out, but only the common streams
        write(newFile, false);
    }

    /**
     * Writes out the slideshow file the is represented by an instance
     * of this class.
     * If you require all streams to be written out (eg Marcos, embedded
     * documents), then set {@code preserveNodes} set to {@code true}
     *
     * @param newFile       The File to write to.
     * @param preserveNodes Should all OLE2 streams be written back out, or only the common ones?
     * @throws IOException If there is an unexpected IOException from writing to the File
     */
    public void write(File newFile, boolean preserveNodes) throws IOException {
        // Get a new FileSystem to write into

        try (POIFSFileSystem outFS = POIFSFileSystem.create(newFile)) {
            // Write into the new FileSystem
            write(outFS, preserveNodes);

            // Send the POIFSFileSystem object out to the underlying stream
            outFS.writeFilesystem();
        }
    }

    /**
     * Writes out the slideshow file the is represented by an instance
     * of this class.
     * <p>This will write out only the common OLE2 streams. If you require all
     * streams to be written out, use {@link #write(OutputStream, boolean)}
     * with {@code preserveNodes} set to {@code true}.
     *
     * @param out The OutputStream to write to.
     * @throws IOException If there is an unexpected IOException from
     *                     the passed in OutputStream
     */
    @Override
    public void write(OutputStream out) throws IOException {
        // Write out, but only the common streams
        write(out, false);
    }

    /**
     * Writes out the slideshow file the is represented by an instance
     * of this class.
     * If you require all streams to be written out (eg Macros, embedded
     * documents), then set {@code preserveNodes} set to {@code true}
     *
     * @param out           The OutputStream to write to.
     * @param preserveNodes Should all OLE2 streams be written back out, or only the common ones?
     * @throws IOException If there is an unexpected IOException from
     *                     the passed in OutputStream
     */
    public void write(OutputStream out, boolean preserveNodes) throws IOException {
        // Get a new FileSystem to write into

        try (POIFSFileSystem outFS = new POIFSFileSystem()) {
            // Write into the new FileSystem
            write(outFS, preserveNodes);

            // Send the POIFSFileSystem object out to the underlying stream
            outFS.writeFilesystem(out);
        }
    }

    private void write(POIFSFileSystem outFS, boolean copyAllOtherNodes) throws IOException {
        // read properties and pictures, with old encryption settings where appropriate
        if (_pictures == null) {
            readPictures();
        }
        getDocumentSummaryInformation();

        // The list of entries we've written out
        final List<String> writtenEntries = new ArrayList<>(1);

        // set new encryption settings
        try (HSLFSlideShowEncrypted encryptedSS = new HSLFSlideShowEncrypted(getDocumentEncryptionAtom())) {
            _records = encryptedSS.updateEncryptionRecord(_records);

            // Write out the Property Streams
            writeProperties(outFS, writtenEntries);

            try (UnsynchronizedByteArrayOutputStream baos = UnsynchronizedByteArrayOutputStream.builder().get()) {

                // For position dependent records, hold where they were and now are
                // As we go along, update, and hand over, to any Position Dependent
                // records we happen across
                updateAndWriteDependantRecords(baos, null);

                // Update our cached copy of the bytes that make up the PPT stream
                _docstream = baos.toByteArray();
            }

            // Write the PPT stream into the POIFS layer
            ByteArrayInputStream bais = new ByteArrayInputStream(_docstream);
            outFS.createOrUpdateDocument(bais, POWERPOINT_DOCUMENT);
            writtenEntries.add(POWERPOINT_DOCUMENT);

            currentUser.setEncrypted(encryptedSS.getDocumentEncryptionAtom() != null);
            currentUser.writeToFS(outFS);
            writtenEntries.add("Current User");

            if (!_pictures.isEmpty()) {
                Enumeration<InputStream> pictEnum = IteratorUtils.asEnumeration(
                    _pictures.stream().map(data -> encryptOnePicture(encryptedSS, data)).iterator()
                );

                try (SequenceInputStream sis = new SequenceInputStream(pictEnum)) {
                    outFS.createOrUpdateDocument(sis, "Pictures");
                    writtenEntries.add("Pictures");
                } catch (IllegalStateException e) {
                    throw (IOException)e.getCause();
                }
            }
        }

        // If requested, copy over any other streams we spot, eg Macros
        if (copyAllOtherNodes) {
            EntryUtils.copyNodes(getDirectory().getFileSystem(), outFS, writtenEntries);
        }
    }

    private static InputStream encryptOnePicture(HSLFSlideShowEncrypted encryptedSS, HSLFPictureData data) {
        try (UnsynchronizedByteArrayOutputStream baos = UnsynchronizedByteArrayOutputStream.builder().get()) {
            data.write(baos);
            byte[] pictBytes = baos.toByteArray();
            encryptedSS.encryptPicture(pictBytes, 0);
            return new ByteArrayInputStream(pictBytes);
        } catch (IOException e) {
            throw new IllegalStateException(e);
        }
    }


    @Override
    public EncryptionInfo getEncryptionInfo() {
        DocumentEncryptionAtom dea = getDocumentEncryptionAtom();
        return (dea != null) ? dea.getEncryptionInfo() : null;
    }


    /* ******************* adding methods follow ********************* */

    /**
     * Adds a new root level record, at the end, but before the last
     * PersistPtrIncrementalBlock.
     */
    @SuppressWarnings({"UnusedReturnValue", "WeakerAccess"})
    public synchronized int appendRootLevelRecord(Record newRecord) {
        int addedAt = -1;
        Record[] r = new Record[_records.length + 1];
        boolean added = false;
        for (int i = (_records.length - 1); i >= 0; i--) {
            if (added) {
                // Just copy over
                r[i] = _records[i];
            } else {
                r[(i + 1)] = _records[i];
                if (_records[i] instanceof PersistPtrHolder) {
                    r[i] = newRecord;
                    added = true;
                    addedAt = i;
                }
            }
        }
        _records = r;
        return addedAt;
    }

    /**
     * Add a new picture to this presentation.
     *
     * @return offset of this picture in the Pictures stream
     */
    public int addPicture(HSLFPictureData img) {
        // Process any existing pictures if we haven't yet
        if (_pictures == null) {
            try {
                readPictures();
            } catch (IOException e) {
                throw new CorruptPowerPointFileException(e.getMessage());
            }
        }

        // Add the new picture in
        int offset = 0;
        if (!_pictures.isEmpty()) {
            HSLFPictureData prev = _pictures.get(_pictures.size() - 1);
            offset = prev.getOffset() + prev.getBseSize();
        }
        img.setIndex(_pictures.size() + 1);        // index is 1-based
        _pictures.add(img);
        return offset;
    }

    /* ******************* fetching methods follow ********************* */


    /**
     * Returns an array of all the records found in the slideshow
     */
    public Record[] getRecords() {
        return _records;
    }

    /**
     * Returns an array of the bytes of the file. Only correct after a
     * call to open or write - at all other times might be wrong!
     */
    public byte[] getUnderlyingBytes() {
        return _docstream;
    }

    /**
     * Fetch the Current User Atom of the document
     */
    public CurrentUserAtom getCurrentUserAtom() {
        return currentUser;
    }

    /**
     * Return list of pictures contained in this presentation
     *
     * @return list with the read pictures or an empty list if the
     * presentation doesn't contain pictures.
     */
    public List<HSLFPictureData> getPictureData() {
        if (_pictures == null) {
            try {
                readPictures();
            } catch (IOException e) {
                throw new CorruptPowerPointFileException(e.getMessage());
            }
        }

        return Collections.unmodifiableList(_pictures);
    }

    /**
     * Gets embedded object data from the slide show.
     *
     * @return the embedded objects.
     */
    public HSLFObjectData[] getEmbeddedObjects() {
        if (_objects == null) {
            List<HSLFObjectData> objects = new ArrayList<>();
            for (Record r : _records) {
                if (r instanceof ExOleObjStg) {
                    objects.add(new HSLFObjectData((ExOleObjStg) r));
                }
            }
            _objects = objects.toArray(new HSLFObjectData[0]);
        }
        return _objects;
    }

    private EscherContainerRecord getBlipStore() {
        Document documentRecord = null;
        for (Record record : _records) {
            if (record == null) {
                throw new CorruptPowerPointFileException("Did not have a valid record: " + record);
            }
            if (record.getRecordType() == RecordTypes.Document.typeID) {
                if (!(record instanceof Document)) {
                    throw new CorruptPowerPointFileException("Did not have a Document: " + record);
                }
                documentRecord = (Document) record;
                break;
            }
        }

        if (documentRecord == null) {
            throw new CorruptPowerPointFileException("Document record is missing");
        }

        if (documentRecord.getPPDrawingGroup() == null) {
            throw new CorruptPowerPointFileException("Drawing group is missing");
        }

        EscherContainerRecord blipStore;

        EscherContainerRecord dggContainer = documentRecord.getPPDrawingGroup().getDggContainer();
        blipStore = HSLFShape.getEscherChild(dggContainer, EscherContainerRecord.BSTORE_CONTAINER);
        if (blipStore == null) {
            blipStore = new EscherContainerRecord();
            blipStore.setRecordId(EscherContainerRecord.BSTORE_CONTAINER);

            dggContainer.addChildBefore(blipStore, EscherOptRecord.RECORD_ID);
        }
        return blipStore;
    }

    @Override
    public void close() throws IOException {
        // only close the filesystem, if we are based on the root node.
        // embedded documents/slideshows shouldn't close the parent container
        if (getDirectory().getParent() == null ||
            PP97_DOCUMENT.equals(getDirectory().getName())) {
            POIFSFileSystem fs = getDirectory().getFileSystem();
            if (fs != null) {
                fs.close();
            }
        }
    }

    @Override
    protected String getEncryptedPropertyStreamName() {
        return "EncryptedSummary";
    }

    void writePropertiesImpl() throws IOException {
        super.writeProperties();
    }

    PropertySet getPropertySetImpl(String setName) throws IOException {
        return super.getPropertySet(setName);
    }

    PropertySet getPropertySetImpl(String setName, EncryptionInfo encryptionInfo) throws IOException {
        return super.getPropertySet(setName, encryptionInfo);
    }

    void writePropertiesImpl(POIFSFileSystem outFS, List<String> writtenEntries) throws IOException {
        super.writeProperties(outFS, writtenEntries);
    }

    void validateInPlaceWritePossibleImpl() throws IllegalStateException {
        super.validateInPlaceWritePossible();
    }

    void clearDirectoryImpl() {
        super.clearDirectory();
    }

    boolean initDirectoryImpl() {
        return super.initDirectory();
    }

    void replaceDirectoryImpl(DirectoryNode newDirectory) throws IOException {
        super.replaceDirectory(newDirectory);
    }

    private static class CountingOS extends OutputStream {
        int count;

        @Override
        public void write(int b) {
            count++;
        }

        @Override
        public void write(byte[] b) {
            count += b.length;
        }

        @Override
        public void write(byte[] b, int off, int len) {
            count += len;
        }

        public int size() {
            return count;
        }
    }

    /**
     * Assists in creating {@link HSLFPictureData} when parsing a slideshow.
     *
     * This class is relied upon heavily by {@link #matchPicturesAndRecords(List, EscherContainerRecord)}.
     */
    static final class PictureFactory {
        final byte[] imageData;

        private final EscherContainerRecord recordContainer;
        private final PictureData.PictureType type;
        private final int offset;
        private final int signature;
        private EscherBSERecord record;

        PictureFactory(
                EscherContainerRecord recordContainer,
                PictureData.PictureType type,
                byte[] imageData,
                int offset,
                int signature
        ) {
            this.recordContainer = Objects.requireNonNull(recordContainer);
            this.type = Objects.requireNonNull(type);
            this.imageData = Objects.requireNonNull(imageData);
            this.offset = offset;
            this.signature = signature;
        }

        int getOffset() {
            return offset;
        }

        /**
         * Constructs a new {@link HSLFPictureData}.
         * <p>
         * The {@link EscherBSERecord} must have been set via {@link #setRecord(EscherBSERecord)} prior to invocation.
         */
        HSLFPictureData build() {
            Objects.requireNonNull(record, "Can't build an instance until the record has been assigned.");
            return HSLFPictureData.createFromSlideshowData(type, recordContainer, record, imageData, signature);
        }

        /**
         * Sets the {@link EscherBSERecord} with which this factory should create a {@link HSLFPictureData}.
         */
        PictureFactory setRecord(EscherBSERecord bse) {
            record = bse;
            return this;
        }
    }
}