123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110 |
- /* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- ==================================================================== */
-
- package org.apache.poi.openxml4j.opc;
-
- import static org.apache.poi.openxml4j.OpenXML4JTestDataSamples.getOutputFile;
- import static org.apache.poi.openxml4j.OpenXML4JTestDataSamples.getSampleFile;
- import static org.apache.poi.openxml4j.OpenXML4JTestDataSamples.getSampleFileName;
- import static org.apache.poi.openxml4j.OpenXML4JTestDataSamples.openSampleStream;
- import static org.apache.poi.openxml4j.opc.PackagingURIHelper.createPartName;
- import static org.apache.poi.xssf.usermodel.XSSFRelation.NS_WORDPROCESSINGML;
- import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
- import static org.junit.jupiter.api.Assertions.assertEquals;
- import static org.junit.jupiter.api.Assertions.assertFalse;
- import static org.junit.jupiter.api.Assertions.assertNotNull;
- import static org.junit.jupiter.api.Assertions.assertNull;
- import static org.junit.jupiter.api.Assertions.assertThrows;
- import static org.junit.jupiter.api.Assertions.assertTrue;
- import static org.junit.jupiter.api.Assertions.fail;
-
- import java.io.BufferedInputStream;
- import java.io.File;
- import java.io.FileInputStream;
- import java.io.FileOutputStream;
- import java.io.IOException;
- import java.io.InputStream;
- import java.io.OutputStream;
- import java.io.PushbackInputStream;
- import java.net.URI;
- import java.net.URISyntaxException;
- import java.nio.charset.StandardCharsets;
- import java.util.Arrays;
- import java.util.Enumeration;
- import java.util.HashMap;
- import java.util.List;
- import java.util.TreeMap;
- import java.util.function.BiConsumer;
- import java.util.regex.Pattern;
- import java.util.stream.Stream;
-
- import com.google.common.hash.Hashing;
- import com.google.common.io.Files;
- import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
- import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream;
- import org.apache.commons.compress.archivers.zip.ZipFile;
- import org.apache.commons.io.output.CountingOutputStream;
- import org.apache.commons.io.output.UnsynchronizedByteArrayOutputStream;
- import org.apache.logging.log4j.LogManager;
- import org.apache.logging.log4j.Logger;
- import org.apache.poi.EncryptedDocumentException;
- import org.apache.poi.POIDataSamples;
- import org.apache.poi.POITestCase;
- import org.apache.poi.extractor.ExtractorFactory;
- import org.apache.poi.extractor.POITextExtractor;
- import org.apache.poi.ooxml.POIXMLException;
- import org.apache.poi.ooxml.POIXMLTypeLoader;
- import org.apache.poi.ooxml.util.DocumentHelper;
- import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
- import org.apache.poi.openxml4j.exceptions.InvalidOperationException;
- import org.apache.poi.openxml4j.exceptions.NotOfficeXmlFileException;
- import org.apache.poi.openxml4j.opc.internal.ContentTypeManager;
- import org.apache.poi.openxml4j.opc.internal.FileHelper;
- import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart;
- import org.apache.poi.openxml4j.opc.internal.ZipHelper;
- import org.apache.poi.openxml4j.util.ZipSecureFile;
- import org.apache.poi.sl.usermodel.SlideShow;
- import org.apache.poi.sl.usermodel.SlideShowFactory;
- import org.apache.poi.ss.usermodel.Workbook;
- import org.apache.poi.ss.usermodel.WorkbookFactory;
- import org.apache.poi.util.IOUtils;
- import org.apache.poi.util.TempFile;
- import org.apache.poi.xssf.XSSFTestDataSamples;
- import org.apache.poi.xssf.streaming.SXSSFWorkbook;
- import org.apache.poi.xssf.usermodel.XSSFRelation;
- import org.apache.poi.xssf.usermodel.XSSFWorkbook;
- import org.apache.poi.xwpf.usermodel.XWPFRelation;
- import org.junit.jupiter.api.Disabled;
- import org.junit.jupiter.api.Test;
- import org.junit.jupiter.api.function.Executable;
- import org.junit.jupiter.params.ParameterizedTest;
- import org.junit.jupiter.params.provider.CsvSource;
- import org.w3c.dom.Document;
- import org.w3c.dom.Element;
- import org.w3c.dom.NodeList;
- import org.xml.sax.SAXException;
- import org.xml.sax.SAXParseException;
-
- public final class TestPackage {
- private static final Logger LOG = LogManager.getLogger(TestPackage.class);
- private static final String CONTENT_EXT_PROPS = "application/vnd.openxmlformats-officedocument.extended-properties+xml";
- private static final POIDataSamples xlsSamples = POIDataSamples.getSpreadSheetInstance();
-
- @Test
- void isStrictOoxmlFormat() throws IOException, InvalidFormatException {
- try (OPCPackage p = OPCPackage.open(getSampleFileName("TestPackageCommon.docx"), PackageAccess.READ)) {
- assertFalse(p.isStrictOoxmlFormat());
- }
- try (OPCPackage p = OPCPackage.open(xlsSamples.getFile("sample.xlsx"), PackageAccess.READ)) {
- assertFalse(p.isStrictOoxmlFormat());
- }
- try (OPCPackage p = OPCPackage.open(xlsSamples.getFile("sample.strict.xlsx"), PackageAccess.READ)) {
- assertTrue(p.isStrictOoxmlFormat());
- }
- }
-
-
- /**
- * Test that just opening and closing the file doesn't alter the document.
- */
- @Test
- void openSave() throws IOException, InvalidFormatException {
- String originalFile = getSampleFileName("TestPackageCommon.docx");
- File targetFile = getOutputFile("TestPackageOpenSaveTMP.docx");
-
- try (OPCPackage p = OPCPackage.open(originalFile, PackageAccess.READ_WRITE)) {
- try {
- p.save(targetFile.getAbsoluteFile());
-
- // Compare the original and newly saved document
- assertTrue(targetFile.exists());
- ZipFileAssert.assertEquals(new File(originalFile), targetFile);
- assertTrue(targetFile.delete());
- } finally {
- // use revert to not re-write the input file
- p.revert();
- }
- }
- }
-
- /**
- * Test that when we create a new Package, we give it
- * the correct default content types
- */
- @Test
- void createGetsContentTypes()
- throws IOException, InvalidFormatException, SecurityException, IllegalArgumentException {
- File targetFile = getOutputFile("TestCreatePackageTMP.docx");
-
- // Zap the target file, in case of an earlier run
- if(targetFile.exists()) {
- assertTrue(targetFile.delete());
- }
-
- try (OPCPackage pkg = OPCPackage.create(targetFile)) {
- try {
- // Check it has content types for rels and xml
- ContentTypeManager ctm = getContentTypeManager(pkg);
- assertEquals("application/xml", ctm.getContentType(createPartName("/foo.xml")));
- assertEquals(ContentTypes.RELATIONSHIPS_PART, ctm.getContentType(createPartName("/foo.rels")));
- assertNull(ctm.getContentType(createPartName("/foo.txt")));
- } finally {
- pkg.revert();
- }
- }
- }
-
- /**
- * Test package creation.
- */
- @Test
- void createPackageAddPart() throws IOException, InvalidFormatException {
- File targetFile = getOutputFile("TestCreatePackageTMP.docx");
-
- File expectedFile = getSampleFile("TestCreatePackageOUTPUT.docx");
-
- // Zap the target file, in case of an earlier run
- if(targetFile.exists()) {
- assertTrue(targetFile.delete());
- }
-
- // Create a package
- OPCPackage pkg = OPCPackage.create(targetFile);
- PackagePartName corePartName = createPartName("/word/document.xml");
-
- pkg.addRelationship(corePartName, TargetMode.INTERNAL,
- PackageRelationshipTypes.CORE_DOCUMENT, "rId1");
-
- PackagePart corePart = pkg.createPart(corePartName, XWPFRelation.DOCUMENT.getContentType());
-
- Document doc = DocumentHelper.createDocument();
- Element elDocument = doc.createElementNS(NS_WORDPROCESSINGML, "w:document");
- doc.appendChild(elDocument);
- Element elBody = doc.createElementNS(NS_WORDPROCESSINGML, "w:body");
- elDocument.appendChild(elBody);
- Element elParagraph = doc.createElementNS(NS_WORDPROCESSINGML, "w:p");
- elBody.appendChild(elParagraph);
- Element elRun = doc.createElementNS(NS_WORDPROCESSINGML, "w:r");
- elParagraph.appendChild(elRun);
- Element elText = doc.createElementNS(NS_WORDPROCESSINGML, "w:t");
- elRun.appendChild(elText);
- elText.setTextContent("Hello Open XML !");
-
- StreamHelper.saveXmlInStream(doc, corePart.getOutputStream());
- pkg.close();
-
- ZipFileAssert.assertEquals(expectedFile, targetFile);
- assertTrue(targetFile.delete());
- }
-
- /**
- * Tests that we can create a new package, add a core
- * document and another part, save and re-load and
- * have everything setup as expected
- */
- @Test
- void createPackageWithCoreDocument() throws IOException, InvalidFormatException, URISyntaxException, SAXException {
- UnsynchronizedByteArrayOutputStream baos = UnsynchronizedByteArrayOutputStream.builder().get();
- try (OPCPackage pkg = OPCPackage.create(baos)) {
-
- // Add a core document
- PackagePartName corePartName = createPartName("/xl/workbook.xml");
- // Create main part relationship
- pkg.addRelationship(corePartName, TargetMode.INTERNAL, PackageRelationshipTypes.CORE_DOCUMENT, "rId1");
- // Create main document part
- PackagePart corePart = pkg.createPart(corePartName, XSSFRelation.WORKBOOK.getContentType());
- // Put in some dummy content
- try (OutputStream coreOut = corePart.getOutputStream()) {
- coreOut.write("<dummy-xml />".getBytes(StandardCharsets.UTF_8));
- }
-
- // And another bit
- PackagePartName sheetPartName = createPartName("/xl/worksheets/sheet1.xml");
- PackageRelationship rel = corePart.addRelationship(
- sheetPartName, TargetMode.INTERNAL, XSSFRelation.WORKSHEET.getRelation(), "rSheet1");
- assertNotNull(rel);
-
- PackagePart part = pkg.createPart(sheetPartName, XSSFRelation.WORKSHEET.getContentType());
- assertNotNull(part);
-
- // Dummy content again
- try (OutputStream coreOut = corePart.getOutputStream()) {
- coreOut.write("<dummy-xml2 />".getBytes(StandardCharsets.UTF_8));
- }
-
- //add a relationship with internal target: "#Sheet1!A1"
- corePart.addRelationship(new URI("#Sheet1!A1"), TargetMode.INTERNAL, PackageRelationshipTypes.HYPERLINK_PART, "rId2");
-
- // Check things are as expected
- PackageRelationshipCollection coreRels =
- pkg.getRelationshipsByType(PackageRelationshipTypes.CORE_DOCUMENT);
- assertEquals(1, coreRels.size());
- PackageRelationship coreRel = coreRels.getRelationship(0);
- assertNotNull(coreRel);
- assertEquals("/", coreRel.getSourceURI().toString());
- assertEquals("/xl/workbook.xml", coreRel.getTargetURI().toString());
- assertNotNull(pkg.getPart(coreRel));
- }
-
-
- // Save and re-load
- File tmp = TempFile.createTempFile("testCreatePackageWithCoreDocument", ".zip");
- try (OutputStream fout = new FileOutputStream(tmp)) {
- baos.writeTo(fout);
- fout.flush();
- }
-
- try (OPCPackage pkg = OPCPackage.open(tmp.getPath())) {
- // Check still right
- PackageRelationshipCollection coreRels = pkg.getRelationshipsByType(PackageRelationshipTypes.CORE_DOCUMENT);
- assertEquals(1, coreRels.size());
- PackageRelationship coreRel = coreRels.getRelationship(0);
-
- assertNotNull(coreRel);
- assertEquals("/", coreRel.getSourceURI().toString());
- assertEquals("/xl/workbook.xml", coreRel.getTargetURI().toString());
- PackagePart corePart = pkg.getPart(coreRel);
- assertNotNull(corePart);
-
- PackageRelationshipCollection rels = corePart.getRelationshipsByType(PackageRelationshipTypes.HYPERLINK_PART);
- assertEquals(1, rels.size());
- PackageRelationship rel = rels.getRelationship(0);
- assertNotNull(rel);
- assertEquals("#Sheet1!A1", rel.getTargetURI().toString());
-
- assertMSCompatibility(pkg);
- }
- }
-
- private void assertMSCompatibility(OPCPackage pkg) throws IOException, InvalidFormatException, SAXException {
- PackagePartName relName = createPartName(PackageRelationship.getContainerPartRelationship());
- PackagePart relPart = pkg.getPart(relName);
-
- Document xmlRelationshipsDoc = DocumentHelper.readDocument(relPart.getInputStream());
-
- Element root = xmlRelationshipsDoc.getDocumentElement();
- NodeList nodeList = root.getElementsByTagName(PackageRelationship.RELATIONSHIP_TAG_NAME);
- int nodeCount = nodeList.getLength();
- for (int i = 0; i < nodeCount; i++) {
- Element element = (Element) nodeList.item(i);
- String value = element.getAttribute(PackageRelationship.TARGET_ATTRIBUTE_NAME);
- assertTrue(value.charAt(0) != '/', "Root target must not start with a leading slash ('/'): " + value);
- }
-
- }
-
- /**
- * Test package opening.
- */
- @Test
- void openPackage() throws IOException, InvalidFormatException {
- File targetFile = getOutputFile("TestOpenPackageTMP.docx");
-
- File inputFile = getSampleFile("TestOpenPackageINPUT.docx");
-
- File expectedFile = getSampleFile("TestOpenPackageOUTPUT.docx");
-
- // Copy the input file in the output directory
- FileHelper.copyFile(inputFile, targetFile);
-
- // Create a package
- OPCPackage pkg = OPCPackage.open(targetFile.getAbsolutePath());
-
- // Modify core part
- PackagePartName corePartName = createPartName("/word/document.xml");
-
- PackagePart corePart = pkg.getPart(corePartName);
-
- // Delete some part to have a valid document
- for (PackageRelationship rel : corePart.getRelationships()) {
- corePart.removeRelationship(rel.getId());
- pkg.removePart(createPartName(PackagingURIHelper
- .resolvePartUri(corePart.getPartName().getURI(), rel
- .getTargetURI())));
- }
-
- // Create a content
- Document doc = DocumentHelper.createDocument();
- Element elDocument = doc.createElementNS(NS_WORDPROCESSINGML, "w:document");
- doc.appendChild(elDocument);
- Element elBody = doc.createElementNS(NS_WORDPROCESSINGML, "w:body");
- elDocument.appendChild(elBody);
- Element elParagraph = doc.createElementNS(NS_WORDPROCESSINGML, "w:p");
- elBody.appendChild(elParagraph);
- Element elRun = doc.createElementNS(NS_WORDPROCESSINGML, "w:r");
- elParagraph.appendChild(elRun);
- Element elText = doc.createElementNS(NS_WORDPROCESSINGML, "w:t");
- elRun.appendChild(elText);
- elText.setTextContent("Hello Open XML !");
-
- StreamHelper.saveXmlInStream(doc, corePart.getOutputStream());
-
- // Save and close
- assertDoesNotThrow(pkg::close);
-
- ZipFileAssert.assertEquals(expectedFile, targetFile);
- assertTrue(targetFile.delete());
- }
-
- /**
- * Checks that we can write a package to a simple
- * OutputStream, in addition to the normal writing
- * to a file
- */
- @Test
- void saveToOutputStream() throws IOException, InvalidFormatException {
- String originalFile = getSampleFileName("TestPackageCommon.docx");
- File targetFile = getOutputFile("TestPackageOpenSaveTMP.docx");
-
- try (OPCPackage p = OPCPackage.open(originalFile, PackageAccess.READ_WRITE)) {
- try {
- try (FileOutputStream fout = new FileOutputStream(targetFile)) {
- p.save(fout);
- }
-
- // Compare the original and newly saved document
- assertTrue(targetFile.exists());
- ZipFileAssert.assertEquals(new File(originalFile), targetFile);
- assertTrue(targetFile.delete());
- } finally {
- // use revert to not re-write the input file
- p.revert();
- }
- }
- }
-
- /**
- * Checks that we can open+read a package from a
- * simple InputStream, in addition to the normal
- * reading from a file
- */
- @Test
- void openFromInputStream() throws IOException, InvalidFormatException {
- String originalFile = getSampleFileName("TestPackageCommon.docx");
-
- try (FileInputStream finp = new FileInputStream(originalFile);
- OPCPackage p = OPCPackage.open(finp)) {
- try {
- assertNotNull(p);
- assertNotNull(p.getRelationships());
- assertEquals(12, p.getParts().size());
-
- // Check it has the usual bits
- assertTrue(p.hasRelationships());
- assertTrue(p.containPart(createPartName("/_rels/.rels")));
- } finally {
- p.revert();
- }
- }
- }
-
- /**
- * TODO: fix and enable
- */
- @Test
- @Disabled
- void removePartRecursive() throws IOException, InvalidFormatException, URISyntaxException {
- String originalFile = getSampleFileName("TestPackageCommon.docx");
- File targetFile = getOutputFile("TestPackageRemovePartRecursiveOUTPUT.docx");
- File tempFile = getOutputFile("TestPackageRemovePartRecursiveTMP.docx");
-
- try (OPCPackage p = OPCPackage.open(originalFile, PackageAccess.READ_WRITE)) {
- p.removePartRecursive(createPartName(new URI("/word/document.xml")));
- p.save(tempFile.getAbsoluteFile());
-
- // Compare the original and newly saved document
- assertTrue(targetFile.exists());
- ZipFileAssert.assertEquals(targetFile, tempFile);
- assertTrue(targetFile.delete());
- p.revert();
- }
- }
-
- @Test
- void deletePart() throws InvalidFormatException, IOException {
- final TreeMap<PackagePartName, String> expectedValues = new TreeMap<>();
- final TreeMap<PackagePartName, String> values = new TreeMap<>();
-
- // Expected values
- expectedValues.put(createPartName("/_rels/.rels"), ContentTypes.RELATIONSHIPS_PART);
- expectedValues.put(createPartName("/docProps/app.xml"), CONTENT_EXT_PROPS);
- expectedValues.put(createPartName("/docProps/core.xml"), ContentTypes.CORE_PROPERTIES_PART);
- expectedValues.put(createPartName("/word/fontTable.xml"), XWPFRelation.FONT_TABLE.getContentType());
- expectedValues.put(createPartName("/word/media/image1.gif"), XWPFRelation.IMAGE_GIF.getContentType());
- expectedValues.put(createPartName("/word/settings.xml"), XWPFRelation.SETTINGS.getContentType());
- expectedValues.put(createPartName("/word/styles.xml"), XWPFRelation.STYLES.getContentType());
- expectedValues.put(createPartName("/word/theme/theme1.xml"), XWPFRelation.THEME.getContentType());
- expectedValues.put(createPartName("/word/webSettings.xml"), XWPFRelation.WEB_SETTINGS.getContentType());
-
- String filepath = getSampleFileName("sample.docx");
-
- try (OPCPackage p = OPCPackage.open(filepath, PackageAccess.READ_WRITE)) {
- try {
- // Remove the core part
- p.deletePart(createPartName("/word/document.xml"));
-
- for (PackagePart part : p.getParts()) {
- values.put(part.getPartName(), part.getContentType());
- LOG.atDebug().log(part.getPartName());
- }
-
- // Compare expected values with values return by the package
- for (PackagePartName partName : expectedValues.keySet()) {
- assertNotNull(values.get(partName));
- assertEquals(expectedValues.get(partName), values.get(partName));
- }
- } finally {
- // Don't save modifications
- p.revert();
- }
- }
- }
-
- @Test
- void deletePartRecursive() throws InvalidFormatException, IOException {
- final TreeMap<PackagePartName, String> expectedValues = new TreeMap<>();
- final TreeMap<PackagePartName, String> values = new TreeMap<>();
-
- // Expected values
- expectedValues.put(createPartName("/_rels/.rels"), ContentTypes.RELATIONSHIPS_PART);
- expectedValues.put(createPartName("/docProps/app.xml"), CONTENT_EXT_PROPS);
- expectedValues.put(createPartName("/docProps/core.xml"), ContentTypes.CORE_PROPERTIES_PART);
-
- String filepath = getSampleFileName("sample.docx");
-
- try (OPCPackage p = OPCPackage.open(filepath, PackageAccess.READ_WRITE)) {
- try {
- // Remove the core part
- p.deletePartRecursive(createPartName("/word/document.xml"));
-
- for (PackagePart part : p.getParts()) {
- values.put(part.getPartName(), part.getContentType());
- LOG.atDebug().log(part.getPartName());
- }
-
- // Compare expected values with values return by the package
- for (PackagePartName partName : expectedValues.keySet()) {
- assertNotNull(values.get(partName));
- assertEquals(expectedValues.get(partName), values.get(partName));
- }
- } finally {
- // Don't save modifications
- p.revert();
- }
- }
- }
-
- /**
- * Test that we can open a file by path, and then
- * write changes to it.
- */
- @Test
- void openFileThenOverwrite() throws IOException, InvalidFormatException {
- File tempFile = TempFile.createTempFile("poiTesting","tmp");
- File origFile = getSampleFile("TestPackageCommon.docx");
- FileHelper.copyFile(origFile, tempFile);
-
- // Open and close the temp file
- try (OPCPackage p = OPCPackage.open(tempFile.toString(), PackageAccess.READ_WRITE)) {
- assertNotNull(p);
- }
- // Delete it
- assertTrue(tempFile.delete());
-
- // Reset
- FileHelper.copyFile(origFile, tempFile);
- try (OPCPackage p = OPCPackage.open(tempFile.toString(), PackageAccess.READ_WRITE)) {
- // Save it to the same file - not allowed
- assertThrows(InvalidOperationException.class, () -> p.save(tempFile),
- "You shouldn't be able to call save(File) to overwrite the current file");
- }
- // Delete it
- assertTrue(tempFile.delete());
-
-
- // Open it read only, then close and delete - allowed
- FileHelper.copyFile(origFile, tempFile);
- try (OPCPackage p = OPCPackage.open(tempFile.toString(), PackageAccess.READ)) {
- assertNotNull(p);
- }
- assertTrue(tempFile.delete());
- }
-
- /**
- * Test that we can open a file by path, save it
- * to another file, then delete both
- */
- @Test
- void openFileThenSaveDelete() throws IOException, InvalidFormatException {
- File tempFile = TempFile.createTempFile("poiTesting","tmp");
- File tempFile2 = TempFile.createTempFile("poiTesting","tmp");
- File origFile = getSampleFile("TestPackageCommon.docx");
- FileHelper.copyFile(origFile, tempFile);
-
- // Open the temp file
- try (OPCPackage p = OPCPackage.open(tempFile.toString(), PackageAccess.READ_WRITE)) {
- // Save it to a different file
- p.save(tempFile2);
- }
-
- // Delete both the files
- assertTrue(tempFile.delete());
- assertTrue(tempFile2.delete());
- }
-
- private static ContentTypeManager getContentTypeManager(OPCPackage pkg) {
- return POITestCase.getFieldValue(OPCPackage.class, pkg, ContentTypeManager.class, "contentTypeManager");
- }
-
- @Test
- void getPartsByName() throws InvalidFormatException, IOException {
- String filepath = getSampleFileName("sample.docx");
-
- try (OPCPackage pkg = OPCPackage.open(filepath, PackageAccess.READ_WRITE)) {
- try {
- List<PackagePart> rs = pkg.getPartsByName(Pattern.compile("/word/.*?\\.xml"));
- HashMap<String, PackagePart> selected = new HashMap<>();
-
- for (PackagePart p : rs)
- selected.put(p.getPartName().getName(), p);
-
- assertEquals(6, selected.size());
- assertTrue(selected.containsKey("/word/document.xml"));
- assertTrue(selected.containsKey("/word/fontTable.xml"));
- assertTrue(selected.containsKey("/word/settings.xml"));
- assertTrue(selected.containsKey("/word/styles.xml"));
- assertTrue(selected.containsKey("/word/theme/theme1.xml"));
- assertTrue(selected.containsKey("/word/webSettings.xml"));
- } finally {
- // use revert to not re-write the input file
- pkg.revert();
- }
- }
- }
-
- @Test
- void getPartSize() throws IOException, InvalidFormatException {
- String filepath = getSampleFileName("sample.docx");
- try (OPCPackage pkg = OPCPackage.open(filepath, PackageAccess.READ)) {
- int checked = 0;
- for (PackagePart part : pkg.getParts()) {
- // Can get the size of zip parts
- if (part.getPartName().getName().equals("/word/document.xml")) {
- checked++;
- assertEquals(ZipPackagePart.class, part.getClass());
- assertEquals(6031L, part.getSize());
- }
- if (part.getPartName().getName().equals("/word/fontTable.xml")) {
- checked++;
- assertEquals(ZipPackagePart.class, part.getClass());
- assertEquals(1312L, part.getSize());
- }
-
- // But not from the others
- if (part.getPartName().getName().equals("/docProps/core.xml")) {
- checked++;
- assertEquals(PackagePropertiesPart.class, part.getClass());
- assertEquals(-1, part.getSize());
- }
- }
- // Ensure we actually found the parts we want to check
- assertEquals(3, checked);
- }
- }
-
- @Test
- void replaceContentType() throws IOException, InvalidFormatException {
- try (InputStream is = openSampleStream("sample.xlsx");
- OPCPackage p = OPCPackage.open(is)) {
- try {
- ContentTypeManager mgr = getContentTypeManager(p);
-
- assertTrue(mgr.isContentTypeRegister(XSSFRelation.WORKBOOK.getContentType()));
- assertFalse(mgr.isContentTypeRegister(XSSFRelation.MACROS_WORKBOOK.getContentType()));
- assertTrue(p.replaceContentType(XSSFRelation.WORKBOOK.getContentType(), XSSFRelation.MACROS_WORKBOOK.getContentType()));
-
- assertFalse(mgr.isContentTypeRegister(XSSFRelation.WORKBOOK.getContentType()));
- assertTrue(mgr.isContentTypeRegister(XSSFRelation.MACROS_WORKBOOK.getContentType()));
- } finally {
- p.revert();
- }
- }
- }
-
- @SuppressWarnings("unchecked")
- @ParameterizedTest
- @CsvSource({
- "SampleSS.xls, org.apache.poi.openxml4j.exceptions.OLE2NotOfficeXmlFileException, The supplied data appears to be in the OLE2 Format, You are calling the part of POI that deals with OOXML",
- "SampleSS.xml, org.apache.poi.openxml4j.exceptions.NotOfficeXmlFileException, The supplied data appears to be a raw XML file, Formats such as Office 2003 XML",
- "SampleSS.ods, org.apache.poi.openxml4j.exceptions.ODFNotOfficeXmlFileException, The supplied data appears to be in ODF, Formats like these (eg ODS",
- "SampleSS.txt, org.apache.poi.openxml4j.exceptions.NotOfficeXmlFileException, No valid entries or contents found, not a valid OOXML"
- })
- void NonOOXML_File(String file, String exClazzStr, String msg1, String msg2) throws Exception {
- Class<? extends Exception> exClazz = (Class<? extends Exception>)Class.forName(exClazzStr);
-
- try (InputStream stream = xlsSamples.openResourceAsStream(file)) {
- Executable[] trs = {
- () -> OPCPackage.open(stream),
- () -> OPCPackage.open(xlsSamples.getFile(file))
- };
- for (Executable tr : trs) {
- Exception ex = assertThrows(exClazz, tr, "Shouldn't be able to open "+file);
- Stream.of(msg1, msg2).forEach(mp -> assertTrue(ex.getMessage().contains(mp)));
- }
- }
- }
-
- /**
- * Zip bomb handling test
- *
- * see bug #50090 / #56865
- */
- @Test
- void zipBombCreateAndHandle()
- throws IOException, EncryptedDocumentException {
- UnsynchronizedByteArrayOutputStream bos = UnsynchronizedByteArrayOutputStream.builder().setBufferSize(2500000).get();
-
- try (ZipFile zipFile = ZipHelper.openZipFile(getSampleFile("sample.xlsx"));
- ZipArchiveOutputStream append = new ZipArchiveOutputStream(bos)) {
- assertNotNull(zipFile);
-
- // first, copy contents from existing war
- Enumeration<? extends ZipArchiveEntry> entries = zipFile.getEntries();
- while (entries.hasMoreElements()) {
- final ZipArchiveEntry eIn = entries.nextElement();
- final ZipArchiveEntry eOut = new ZipArchiveEntry(eIn.getName());
- eOut.setTime(eIn.getTime());
- eOut.setComment(eIn.getComment());
- eOut.setSize(eIn.getSize());
-
- append.putArchiveEntry(eOut);
- if (!eOut.isDirectory()) {
- try (InputStream is = zipFile.getInputStream(eIn)) {
- if ("[Content_Types].xml".equals(eOut.getName())) {
- byte[] suffix = "</Types>".getBytes(StandardCharsets.UTF_8);
- CountingOutputStream cos = new CountingOutputStream(append);
- IOUtils.copy(is, cos, eOut.getSize() - suffix.length);
-
- byte[] spam = new byte[0x7FFF];
- Arrays.fill(spam, (byte) ' ');
- // 0x7FFF0000 is the maximum for 32-bit zips, but less still works
- while (cos.getByteCount() < 0x7FFF00) {
- cos.write(spam);
- }
- cos.write(suffix);
- eOut.setSize(cos.getByteCount());
- } else {
- IOUtils.copy(is, append);
- }
- }
- }
- append.closeArchiveEntry();
- }
- }
-
- IOException ex = assertThrows(IOException.class, () -> WorkbookFactory.create(bos.toInputStream()));
- assertTrue(ex.getMessage().contains("Zip bomb detected!"));
- }
-
- @Test
- void testZipEntityExpansionTerminates() {
- IllegalStateException ex = assertThrows(
- IllegalStateException.class,
- () -> openXmlBombFile("poc-shared-strings.xlsx")
- );
- assertTrue(ex.getMessage().contains("The text would exceed the max allowed overall size of extracted text."));
- }
-
- @Test
- void testZipEntityExpansionSharedStringTableEvents() {
- boolean before = ExtractorFactory.getThreadPrefersEventExtractors();
- ExtractorFactory.setThreadPrefersEventExtractors(true);
- try {
- IllegalStateException ex = assertThrows(
- IllegalStateException.class,
- () -> openXmlBombFile("poc-shared-strings.xlsx")
- );
- assertTrue(ex.getMessage().contains("The text would exceed the max allowed overall size of extracted text."));
- } finally {
- ExtractorFactory.setThreadPrefersEventExtractors(before);
- }
- }
-
-
- @Test
- void testZipEntityExpansionExceedsMemory() {
- boolean originalFlag = POIXMLTypeLoader.DEFAULT_XML_OPTIONS.disallowDocTypeDeclaration();
- try {
- POIXMLTypeLoader.DEFAULT_XML_OPTIONS.setDisallowDocTypeDeclaration(false);
- IOException ex = assertThrows(
- IOException.class,
- () -> openXmlBombFile("poc-xmlbomb.xlsx")
- );
- assertTrue(ex.getMessage().contains("unable to parse shared strings table"));
- assertTrue(matchSAXEx(ex));
- } finally {
- POIXMLTypeLoader.DEFAULT_XML_OPTIONS.setDisallowDocTypeDeclaration(originalFlag);
- }
- }
-
- @Test
- void testZipEntityExpansionExceedsMemory2() {
- boolean originalFlag = POIXMLTypeLoader.DEFAULT_XML_OPTIONS.disallowDocTypeDeclaration();
- try {
- POIXMLTypeLoader.DEFAULT_XML_OPTIONS.setDisallowDocTypeDeclaration(false);
- IOException ex = assertThrows(
- IOException.class,
- () -> openXmlBombFile("poc-xmlbomb-empty.xlsx")
- );
- assertTrue(ex.getMessage().contains("unable to parse shared strings table"));
- assertTrue(matchSAXEx(ex));
- } finally {
- POIXMLTypeLoader.DEFAULT_XML_OPTIONS.setDisallowDocTypeDeclaration(originalFlag);
- }
- }
-
- private static boolean matchSAXEx(Exception root) {
- for (Throwable t = root; t != null; t = t.getCause()) {
- if (t.getClass().isAssignableFrom(SAXParseException.class) &&
- t.getMessage().contains("The parser has encountered more than")) {
- return true;
- }
- }
- return false;
- }
-
- private void openXmlBombFile(String file) throws IOException {
- final double minInf = ZipSecureFile.getMinInflateRatio();
- ZipSecureFile.setMinInflateRatio(0.002);
- try (POITextExtractor extractor = ExtractorFactory.createExtractor(XSSFTestDataSamples.getSampleFile(file))) {
- assertNotNull(extractor);
- extractor.getText();
- } finally {
- ZipSecureFile.setMinInflateRatio(minInf);
- }
- }
-
- @Test
- void zipBombCheckSizesWithinLimits() throws IOException, EncryptedDocumentException {
- getZipStatsAndConsume((max_size, min_ratio) -> {
- // use values close to, but within the limits
- ZipSecureFile.setMinInflateRatio(min_ratio - 0.002);
- assertEquals(min_ratio - 0.002, ZipSecureFile.getMinInflateRatio(), 0.00001);
- ZipSecureFile.setMaxEntrySize(max_size + 1);
- assertEquals(max_size + 1, ZipSecureFile.getMaxEntrySize());
- });
- }
-
- @Test
- void zipBombCheckSizesRatioTooSmall() {
- POIXMLException ex = assertThrows(
- POIXMLException.class,
- () -> getZipStatsAndConsume((max_size, min_ratio) -> {
- // check ratio out of bounds
- ZipSecureFile.setMinInflateRatio(min_ratio+0.002);
- })
- );
- assertTrue(ex.getMessage().contains("You can adjust this limit via ZipSecureFile.setMinInflateRatio()"));
- }
-
- @Test
- void zipBombCheckSizesSizeTooBig() throws EncryptedDocumentException {
- POIXMLException ex = assertThrows(
- POIXMLException.class,
- () -> getZipStatsAndConsume((max_size, min_ratio) -> {
- // check max entry size ouf of bounds
- ZipSecureFile.setMinInflateRatio(min_ratio-0.002);
- ZipSecureFile.setMaxEntrySize(max_size-200);
- })
- );
- assertTrue(ex.getMessage().contains("You can adjust this limit via ZipSecureFile.setMaxEntrySize()"));
- }
-
- private void getZipStatsAndConsume(BiConsumer<Long,Double> ratioCon) throws IOException {
- // use a test file with a xml file bigger than 100k (ZipArchiveThresholdInputStream.GRACE_ENTRY_SIZE)
- final File file = XSSFTestDataSamples.getSampleFile("poc-shared-strings.xlsx");
-
- double min_ratio = Double.MAX_VALUE;
- long max_size = 0;
- try (ZipFile zf = ZipHelper.openZipFile(file)) {
- assertNotNull(zf);
- Enumeration<? extends ZipArchiveEntry> entries = zf.getEntries();
- while (entries.hasMoreElements()) {
- ZipArchiveEntry ze = entries.nextElement();
- if (ze.getSize() == 0) {
- continue;
- }
- // add zip entry header ~ 128 bytes
- long size = ze.getSize()+128;
- double ratio = ze.getCompressedSize() / (double)size;
- min_ratio = Math.min(min_ratio, ratio);
- max_size = Math.max(max_size, size);
- }
- }
- ratioCon.accept(max_size, min_ratio);
-
- //noinspection EmptyTryBlock,unused
- try (Workbook wb = WorkbookFactory.create(file, null, true)) {
- } finally {
- // reset otherwise a lot of ooxml tests will fail
- ZipSecureFile.setMinInflateRatio(0.01d);
- ZipSecureFile.setMaxEntrySize(0xFFFFFFFFL);
- }
- }
-
- @Test
- void testConstructors() throws IOException {
- // verify the various ways to construct a ZipSecureFile
- File file = getSampleFile("sample.xlsx");
- try (ZipSecureFile zipFile = new ZipSecureFile(file)) {
- assertNotNull(zipFile.getName());
- }
-
- try (ZipSecureFile zipFile = new ZipSecureFile(file.getAbsolutePath())) {
- assertNotNull(zipFile.getName());
- }
- }
-
- @Test
- void testMaxTextSize() {
- long before = ZipSecureFile.getMaxTextSize();
- try {
- ZipSecureFile.setMaxTextSize(12345);
- assertEquals(12345, ZipSecureFile.getMaxTextSize());
- } finally {
- ZipSecureFile.setMaxTextSize(before);
- }
- }
-
- // bug 60128
- @Test
- void testCorruptFile() {
- File file = getSampleFile("invalid.xlsx");
- assertThrows(NotOfficeXmlFileException.class, () -> OPCPackage.open(file, PackageAccess.READ));
- }
-
- private interface CountingStream {
- InputStream create(InputStream is, int length);
- }
-
- // bug 61381
- @Test
- void testTooShortFilterStreams() throws IOException {
- for (String file : new String[]{"sample.xlsx","SampleSS.xls"}) {
- for (CountingStream cs : new CountingStream[]{PushbackInputStream::new, BufferedInputStream::new}) {
- try (InputStream is = cs.create(xlsSamples.openResourceAsStream(file), 2);
- Workbook wb = WorkbookFactory.create(is)) {
- assertEquals(3, wb.getNumberOfSheets());
- }
- }
- }
- }
-
- @Test
- void testBug56479() throws Exception {
- try (InputStream is = openSampleStream("dcterms_bug_56479.zip");
- OPCPackage p = OPCPackage.open(is)) {
-
- // Check we found the contents of it
- boolean foundCoreProps = false, foundDocument = false, foundTheme1 = false;
- for (final PackagePart part : p.getParts()) {
- final String partName = part.getPartName().toString();
- final String contentType = part.getContentType();
- switch (partName) {
- case "/docProps/core.xml":
- assertEquals(ContentTypes.CORE_PROPERTIES_PART, contentType);
- foundCoreProps = true;
- break;
- case "/word/document.xml":
- assertEquals(XWPFRelation.DOCUMENT.getContentType(), contentType);
- foundDocument = true;
- break;
- case "/word/theme/theme1.xml":
- assertEquals(XWPFRelation.THEME.getContentType(), contentType);
- foundTheme1 = true;
- break;
- }
- }
- assertTrue(foundCoreProps, "Core not found in " + p.getParts());
- assertFalse(foundDocument, "Document should not be found in " + p.getParts());
- assertFalse(foundTheme1, "Theme1 should not found in " + p.getParts());
- }
- }
-
- @Test
- void unparseableCentralDirectory() throws IOException {
- File f = getSampleFile("at.pzp.www_uploads_media_PP_Scheinecker-jdk6error.pptx");
- try (SlideShow<?,?> ppt = SlideShowFactory.create(f, null, true)) {
- assertNotNull(ppt);
- assertNotNull(ppt.getSlides().get(0));
- }
- }
-
- @Test
- void testClosingStreamOnException() throws IOException {
- File tmp = File.createTempFile("poi-test-truncated-zip", "");
-
- // create a corrupted zip file by truncating a valid zip file to the first 100 bytes
- try (InputStream is = openSampleStream("dcterms_bug_56479.zip");
- OutputStream os = new FileOutputStream(tmp)) {
- IOUtils.copy(is, os, 100);
- }
-
- // feed the corrupted zip file to OPCPackage
- // expected: the zip file is invalid
- // this test does not care if open() throws an exception or not.
- assertThrows(Exception.class, () -> OPCPackage.open(tmp, PackageAccess.READ));
-
- // If the stream is not closed on exception, it will keep a file descriptor to tmp,
- // and requests to the OS to delete the file will fail.
- assertTrue(tmp.delete(), "Can't delete tmp file");
- }
-
- /**
- * If ZipPackage is passed an invalid file, a call to close
- * (eg from the OPCPackage open method) should tidy up the
- * stream / file the broken file is being read from.
- * See bug #60128 for more
- */
- @Test
- void testTidyStreamOnInvalidFile1() throws Exception {
- openInvalidFile("SampleSS.ods", false);
- }
-
- @Test
- void testTidyStreamOnInvalidFile2() throws Exception {
- openInvalidFile("SampleSS.ods", true);
- }
-
- @Test
- void testTidyStreamOnInvalidFile3() throws Exception {
- openInvalidFile("SampleSS.txt", false);
- }
-
- @Test
- void testTidyStreamOnInvalidFile4() throws Exception {
- openInvalidFile("SampleSS.txt", true);
- }
-
- @Test
- void testBug62592() throws Exception {
- try (InputStream is = openSampleStream("62592.thmx")) {
- assertThrows(InvalidFormatException.class, () -> OPCPackage.open(is));
- }
- }
-
- @Test
- void testBug62592SequentialCallsToGetParts() throws Exception {
- //make absolutely certain that sequential calls don't throw InvalidFormatExceptions
- String originalFile = getSampleFileName("TestPackageCommon.docx");
- try (OPCPackage p2 = OPCPackage.open(originalFile, PackageAccess.READ)) {
- assertDoesNotThrow(p2::getParts);
- assertDoesNotThrow(p2::getParts);
- }
- }
-
- @Test
- void testDoNotCloseStream() throws IOException {
- // up to JDK 10 we did use Mockito here, but OutputStream is
- // an abstract class and fails mocking with some changes in JDK 11
- // so we use a simple empty output stream implementation instead
- OutputStream os = new OutputStream() {
- @Override
- public void write(int b) {
- }
-
- @Override
- public void close() {
- fail("close should not be called here");
- }
- };
-
- try (XSSFWorkbook wb = new XSSFWorkbook()) {
- wb.createSheet();
- wb.write(os);
- }
-
- try (SXSSFWorkbook wb = new SXSSFWorkbook()) {
- wb.createSheet();
- wb.write(os);
- }
- }
-
-
-
- private static void openInvalidFile(final String name, final boolean useStream) throws IOException {
- ZipPackage[] pkgTest = { null };
- try (final InputStream is = (useStream) ? xlsSamples.openResourceAsStream(name) : null) {
- assertThrows(NotOfficeXmlFileException.class, () -> {
- try (final ZipPackage pkg = (useStream)
- ? new ZipPackage(is, PackageAccess.READ)
- : new ZipPackage(xlsSamples.getFile(name), PackageAccess.READ)) {
- pkgTest[0] = pkg;
- assertNotNull(pkg.getZipArchive());
- assertFalse(pkg.getZipArchive().isClosed());
- pkg.getParts();
- }
- });
- } finally {
- if (pkgTest[0] != null) {
- assertNotNull(pkgTest[0].getZipArchive());
- assertTrue(pkgTest[0].getZipArchive().isClosed());
- }
- }
- }
-
- @SuppressWarnings("UnstableApiUsage")
- @Test
- void testBug63029() throws Exception {
- File testFile = getSampleFile("sample.docx");
- File tmpFile = getOutputFile("Bug63029.docx");
- Files.copy(testFile, tmpFile);
-
- int numPartsBefore = 0;
- String md5Before = Files.asByteSource(tmpFile).hash(Hashing.sha256()).toString();
-
- try(OPCPackage pkg = OPCPackage.open(tmpFile, PackageAccess.READ_WRITE))
- {
- numPartsBefore = pkg.getParts().size();
-
- // add a marshaller that will throw an exception on save
- pkg.addMarshaller("poi/junit", (part, out) -> {
- throw new RuntimeException("Bugzilla 63029");
- });
-
- pkg.createPart(createPartName("/poi/test.xml"), "poi/junit");
-
- RuntimeException ex = assertThrows(RuntimeException.class, pkg::close);
- // verify there was an exception while closing the file
- assertEquals("Fail to save: an error occurs while saving the package : Bugzilla 63029", ex.getMessage());
- }
-
- // assert that md5 after closing is the same, i.e. the source is left intact
- String md5After = Files.asByteSource(tmpFile).hash(Hashing.sha256()).toString();
- assertEquals(md5Before, md5After);
-
- // try to read the source file once again
- try ( OPCPackage pkg = OPCPackage.open(tmpFile, PackageAccess.READ_WRITE)){
- // the source is still a valid zip archive.
- // prior to the fix this used to throw NotOfficeXmlFileException("archive is not a ZIP archive")
-
- // assert that the number of parts remained the same
- assertEquals(pkg.getParts().size(), numPartsBefore);
- }
-
- }
- }
|