/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.openxml4j.opc;
import static org.apache.poi.openxml4j.OpenXML4JTestDataSamples.getOutputFile;
import static org.apache.poi.openxml4j.OpenXML4JTestDataSamples.getSampleFile;
import static org.apache.poi.openxml4j.OpenXML4JTestDataSamples.getSampleFileName;
import static org.apache.poi.openxml4j.OpenXML4JTestDataSamples.openSampleStream;
import static org.apache.poi.openxml4j.opc.PackagingURIHelper.createPartName;
import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.fail;
import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PushbackInputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.List;
import java.util.TreeMap;
import java.util.function.BiConsumer;
import java.util.regex.Pattern;
import java.util.stream.Stream;
import com.google.common.hash.Hashing;
import com.google.common.io.Files;
import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream;
import org.apache.commons.compress.archivers.zip.ZipFile;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.poi.EncryptedDocumentException;
import org.apache.poi.POIDataSamples;
import org.apache.poi.POITestCase;
import org.apache.poi.extractor.ExtractorFactory;
import org.apache.poi.extractor.POITextExtractor;
import org.apache.poi.ooxml.POIXMLException;
import org.apache.poi.ooxml.util.DocumentHelper;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.InvalidOperationException;
import org.apache.poi.openxml4j.exceptions.NotOfficeXmlFileException;
import org.apache.poi.openxml4j.opc.internal.ContentTypeManager;
import org.apache.poi.openxml4j.opc.internal.FileHelper;
import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart;
import org.apache.poi.openxml4j.opc.internal.ZipHelper;
import org.apache.poi.openxml4j.util.ZipSecureFile;
import org.apache.poi.sl.usermodel.SlideShow;
import org.apache.poi.sl.usermodel.SlideShowFactory;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.apache.poi.util.IOUtils;
import org.apache.poi.util.TempFile;
import org.apache.poi.xssf.XSSFTestDataSamples;
import org.apache.poi.xssf.streaming.SXSSFWorkbook;
import org.apache.poi.xssf.usermodel.XSSFRelation;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.poi.xwpf.usermodel.XWPFRelation;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.function.Executable;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.CsvSource;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
public final class TestPackage {
private static final Logger LOG = LogManager.getLogger(TestPackage.class);
private static final String NS_OOXML_WP_MAIN = "http://schemas.openxmlformats.org/wordprocessingml/2006/main";
private static final String CONTENT_EXT_PROPS = "application/vnd.openxmlformats-officedocument.extended-properties+xml";
private static final POIDataSamples xlsSamples = POIDataSamples.getSpreadSheetInstance();
@Test
void isStrictOoxmlFormat() throws IOException, InvalidFormatException {
try (OPCPackage p = OPCPackage.open(getSampleFileName("TestPackageCommon.docx"), PackageAccess.READ)) {
assertFalse(p.isStrictOoxmlFormat());
}
try (OPCPackage p = OPCPackage.open(xlsSamples.getFile("sample.xlsx"), PackageAccess.READ)) {
assertFalse(p.isStrictOoxmlFormat());
}
try (OPCPackage p = OPCPackage.open(xlsSamples.getFile("sample.strict.xlsx"), PackageAccess.READ)) {
assertTrue(p.isStrictOoxmlFormat());
}
}
/**
* Test that just opening and closing the file doesn't alter the document.
*/
@Test
void openSave() throws IOException, InvalidFormatException {
String originalFile = getSampleFileName("TestPackageCommon.docx");
File targetFile = getOutputFile("TestPackageOpenSaveTMP.docx");
try (OPCPackage p = OPCPackage.open(originalFile, PackageAccess.READ_WRITE)) {
try {
p.save(targetFile.getAbsoluteFile());
// Compare the original and newly saved document
assertTrue(targetFile.exists());
ZipFileAssert.assertEquals(new File(originalFile), targetFile);
assertTrue(targetFile.delete());
} finally {
// use revert to not re-write the input file
p.revert();
}
}
}
/**
* Test that when we create a new Package, we give it
* the correct default content types
*/
@Test
void createGetsContentTypes()
throws IOException, InvalidFormatException, SecurityException, IllegalArgumentException {
File targetFile = getOutputFile("TestCreatePackageTMP.docx");
// Zap the target file, in case of an earlier run
if(targetFile.exists()) {
assertTrue(targetFile.delete());
}
try (OPCPackage pkg = OPCPackage.create(targetFile)) {
try {
// Check it has content types for rels and xml
ContentTypeManager ctm = getContentTypeManager(pkg);
assertEquals("application/xml", ctm.getContentType(createPartName("/foo.xml")));
assertEquals(ContentTypes.RELATIONSHIPS_PART, ctm.getContentType(createPartName("/foo.rels")));
assertNull(ctm.getContentType(createPartName("/foo.txt")));
} finally {
pkg.revert();
}
}
}
/**
* Test package creation.
*/
@Test
void createPackageAddPart() throws IOException, InvalidFormatException {
File targetFile = getOutputFile("TestCreatePackageTMP.docx");
File expectedFile = getSampleFile("TestCreatePackageOUTPUT.docx");
// Zap the target file, in case of an earlier run
if(targetFile.exists()) {
assertTrue(targetFile.delete());
}
// Create a package
OPCPackage pkg = OPCPackage.create(targetFile);
PackagePartName corePartName = createPartName("/word/document.xml");
pkg.addRelationship(corePartName, TargetMode.INTERNAL,
PackageRelationshipTypes.CORE_DOCUMENT, "rId1");
PackagePart corePart = pkg.createPart(corePartName, XWPFRelation.DOCUMENT.getContentType());
Document doc = DocumentHelper.createDocument();
Element elDocument = doc.createElementNS(NS_OOXML_WP_MAIN, "w:document");
doc.appendChild(elDocument);
Element elBody = doc.createElementNS(NS_OOXML_WP_MAIN, "w:body");
elDocument.appendChild(elBody);
Element elParagraph = doc.createElementNS(NS_OOXML_WP_MAIN, "w:p");
elBody.appendChild(elParagraph);
Element elRun = doc.createElementNS(NS_OOXML_WP_MAIN, "w:r");
elParagraph.appendChild(elRun);
Element elText = doc.createElementNS(NS_OOXML_WP_MAIN, "w:t");
elRun.appendChild(elText);
elText.setTextContent("Hello Open XML !");
StreamHelper.saveXmlInStream(doc, corePart.getOutputStream());
pkg.close();
ZipFileAssert.assertEquals(expectedFile, targetFile);
assertTrue(targetFile.delete());
}
/**
* Tests that we can create a new package, add a core
* document and another part, save and re-load and
* have everything setup as expected
*/
@Test
void createPackageWithCoreDocument() throws IOException, InvalidFormatException, URISyntaxException, SAXException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
try (OPCPackage pkg = OPCPackage.create(baos)) {
// Add a core document
PackagePartName corePartName = createPartName("/xl/workbook.xml");
// Create main part relationship
pkg.addRelationship(corePartName, TargetMode.INTERNAL, PackageRelationshipTypes.CORE_DOCUMENT, "rId1");
// Create main document part
PackagePart corePart = pkg.createPart(corePartName, XSSFRelation.WORKBOOK.getContentType());
// Put in some dummy content
try (OutputStream coreOut = corePart.getOutputStream()) {
coreOut.write("".getBytes(StandardCharsets.UTF_8));
}
// And another bit
PackagePartName sheetPartName = createPartName("/xl/worksheets/sheet1.xml");
PackageRelationship rel = corePart.addRelationship(
sheetPartName, TargetMode.INTERNAL, XSSFRelation.WORKSHEET.getRelation(), "rSheet1");
assertNotNull(rel);
PackagePart part = pkg.createPart(sheetPartName, XSSFRelation.WORKSHEET.getContentType());
assertNotNull(part);
// Dummy content again
try (OutputStream coreOut = corePart.getOutputStream()) {
coreOut.write("".getBytes(StandardCharsets.UTF_8));
}
//add a relationship with internal target: "#Sheet1!A1"
corePart.addRelationship(new URI("#Sheet1!A1"), TargetMode.INTERNAL, PackageRelationshipTypes.HYPERLINK_PART, "rId2");
// Check things are as expected
PackageRelationshipCollection coreRels =
pkg.getRelationshipsByType(PackageRelationshipTypes.CORE_DOCUMENT);
assertEquals(1, coreRels.size());
PackageRelationship coreRel = coreRels.getRelationship(0);
assertNotNull(coreRel);
assertEquals("/", coreRel.getSourceURI().toString());
assertEquals("/xl/workbook.xml", coreRel.getTargetURI().toString());
assertNotNull(pkg.getPart(coreRel));
}
// Save and re-load
File tmp = TempFile.createTempFile("testCreatePackageWithCoreDocument", ".zip");
try (OutputStream fout = new FileOutputStream(tmp)) {
baos.writeTo(fout);
fout.flush();
}
try (OPCPackage pkg = OPCPackage.open(tmp.getPath())) {
// Check still right
PackageRelationshipCollection coreRels = pkg.getRelationshipsByType(PackageRelationshipTypes.CORE_DOCUMENT);
assertEquals(1, coreRels.size());
PackageRelationship coreRel = coreRels.getRelationship(0);
assertNotNull(coreRel);
assertEquals("/", coreRel.getSourceURI().toString());
assertEquals("/xl/workbook.xml", coreRel.getTargetURI().toString());
PackagePart corePart = pkg.getPart(coreRel);
assertNotNull(corePart);
PackageRelationshipCollection rels = corePart.getRelationshipsByType(PackageRelationshipTypes.HYPERLINK_PART);
assertEquals(1, rels.size());
PackageRelationship rel = rels.getRelationship(0);
assertNotNull(rel);
assertEquals("Sheet1!A1", rel.getTargetURI().getRawFragment());
assertMSCompatibility(pkg);
}
}
private void assertMSCompatibility(OPCPackage pkg) throws IOException, InvalidFormatException, SAXException {
PackagePartName relName = createPartName(PackageRelationship.getContainerPartRelationship());
PackagePart relPart = pkg.getPart(relName);
Document xmlRelationshipsDoc = DocumentHelper.readDocument(relPart.getInputStream());
Element root = xmlRelationshipsDoc.getDocumentElement();
NodeList nodeList = root.getElementsByTagName(PackageRelationship.RELATIONSHIP_TAG_NAME);
int nodeCount = nodeList.getLength();
for (int i = 0; i < nodeCount; i++) {
Element element = (Element) nodeList.item(i);
String value = element.getAttribute(PackageRelationship.TARGET_ATTRIBUTE_NAME);
assertTrue(value.charAt(0) != '/', "Root target must not start with a leading slash ('/'): " + value);
}
}
/**
* Test package opening.
*/
@Test
void openPackage() throws IOException, InvalidFormatException {
File targetFile = getOutputFile("TestOpenPackageTMP.docx");
File inputFile = getSampleFile("TestOpenPackageINPUT.docx");
File expectedFile = getSampleFile("TestOpenPackageOUTPUT.docx");
// Copy the input file in the output directory
FileHelper.copyFile(inputFile, targetFile);
// Create a package
OPCPackage pkg = OPCPackage.open(targetFile.getAbsolutePath());
// Modify core part
PackagePartName corePartName = createPartName("/word/document.xml");
PackagePart corePart = pkg.getPart(corePartName);
// Delete some part to have a valid document
for (PackageRelationship rel : corePart.getRelationships()) {
corePart.removeRelationship(rel.getId());
pkg.removePart(createPartName(PackagingURIHelper
.resolvePartUri(corePart.getPartName().getURI(), rel
.getTargetURI())));
}
// Create a content
Document doc = DocumentHelper.createDocument();
Element elDocument = doc.createElementNS(NS_OOXML_WP_MAIN, "w:document");
doc.appendChild(elDocument);
Element elBody = doc.createElementNS(NS_OOXML_WP_MAIN, "w:body");
elDocument.appendChild(elBody);
Element elParagraph = doc.createElementNS(NS_OOXML_WP_MAIN, "w:p");
elBody.appendChild(elParagraph);
Element elRun = doc.createElementNS(NS_OOXML_WP_MAIN, "w:r");
elParagraph.appendChild(elRun);
Element elText = doc.createElementNS(NS_OOXML_WP_MAIN, "w:t");
elRun.appendChild(elText);
elText.setTextContent("Hello Open XML !");
StreamHelper.saveXmlInStream(doc, corePart.getOutputStream());
// Save and close
assertDoesNotThrow(pkg::close);
ZipFileAssert.assertEquals(expectedFile, targetFile);
assertTrue(targetFile.delete());
}
/**
* Checks that we can write a package to a simple
* OutputStream, in addition to the normal writing
* to a file
*/
@Test
void saveToOutputStream() throws IOException, InvalidFormatException {
String originalFile = getSampleFileName("TestPackageCommon.docx");
File targetFile = getOutputFile("TestPackageOpenSaveTMP.docx");
try (OPCPackage p = OPCPackage.open(originalFile, PackageAccess.READ_WRITE)) {
try {
try (FileOutputStream fout = new FileOutputStream(targetFile)) {
p.save(fout);
}
// Compare the original and newly saved document
assertTrue(targetFile.exists());
ZipFileAssert.assertEquals(new File(originalFile), targetFile);
assertTrue(targetFile.delete());
} finally {
// use revert to not re-write the input file
p.revert();
}
}
}
/**
* Checks that we can open+read a package from a
* simple InputStream, in addition to the normal
* reading from a file
*/
@Test
void openFromInputStream() throws IOException, InvalidFormatException {
String originalFile = getSampleFileName("TestPackageCommon.docx");
try (FileInputStream finp = new FileInputStream(originalFile);
OPCPackage p = OPCPackage.open(finp)) {
try {
assertNotNull(p);
assertNotNull(p.getRelationships());
assertEquals(12, p.getParts().size());
// Check it has the usual bits
assertTrue(p.hasRelationships());
assertTrue(p.containPart(createPartName("/_rels/.rels")));
} finally {
p.revert();
}
}
}
/**
* TODO: fix and enable
*/
@Test
@Disabled
void removePartRecursive() throws IOException, InvalidFormatException, URISyntaxException {
String originalFile = getSampleFileName("TestPackageCommon.docx");
File targetFile = getOutputFile("TestPackageRemovePartRecursiveOUTPUT.docx");
File tempFile = getOutputFile("TestPackageRemovePartRecursiveTMP.docx");
try (OPCPackage p = OPCPackage.open(originalFile, PackageAccess.READ_WRITE)) {
p.removePartRecursive(createPartName(new URI("/word/document.xml")));
p.save(tempFile.getAbsoluteFile());
// Compare the original and newly saved document
assertTrue(targetFile.exists());
ZipFileAssert.assertEquals(targetFile, tempFile);
assertTrue(targetFile.delete());
p.revert();
}
}
@Test
void deletePart() throws InvalidFormatException, IOException {
final TreeMap expectedValues = new TreeMap<>();
final TreeMap values = new TreeMap<>();
// Expected values
expectedValues.put(createPartName("/_rels/.rels"), ContentTypes.RELATIONSHIPS_PART);
expectedValues.put(createPartName("/docProps/app.xml"), CONTENT_EXT_PROPS);
expectedValues.put(createPartName("/docProps/core.xml"), ContentTypes.CORE_PROPERTIES_PART);
expectedValues.put(createPartName("/word/fontTable.xml"), XWPFRelation.FONT_TABLE.getContentType());
expectedValues.put(createPartName("/word/media/image1.gif"), XWPFRelation.IMAGE_GIF.getContentType());
expectedValues.put(createPartName("/word/settings.xml"), XWPFRelation.SETTINGS.getContentType());
expectedValues.put(createPartName("/word/styles.xml"), XWPFRelation.STYLES.getContentType());
expectedValues.put(createPartName("/word/theme/theme1.xml"), XWPFRelation.THEME.getContentType());
expectedValues.put(createPartName("/word/webSettings.xml"), XWPFRelation.WEB_SETTINGS.getContentType());
String filepath = getSampleFileName("sample.docx");
try (OPCPackage p = OPCPackage.open(filepath, PackageAccess.READ_WRITE)) {
try {
// Remove the core part
p.deletePart(createPartName("/word/document.xml"));
for (PackagePart part : p.getParts()) {
values.put(part.getPartName(), part.getContentType());
LOG.atDebug().log(part.getPartName());
}
// Compare expected values with values return by the package
for (PackagePartName partName : expectedValues.keySet()) {
assertNotNull(values.get(partName));
assertEquals(expectedValues.get(partName), values.get(partName));
}
} finally {
// Don't save modifications
p.revert();
}
}
}
@Test
void deletePartRecursive() throws InvalidFormatException, IOException {
final TreeMap expectedValues = new TreeMap<>();
final TreeMap values = new TreeMap<>();
// Expected values
expectedValues.put(createPartName("/_rels/.rels"), ContentTypes.RELATIONSHIPS_PART);
expectedValues.put(createPartName("/docProps/app.xml"), CONTENT_EXT_PROPS);
expectedValues.put(createPartName("/docProps/core.xml"), ContentTypes.CORE_PROPERTIES_PART);
String filepath = getSampleFileName("sample.docx");
try (OPCPackage p = OPCPackage.open(filepath, PackageAccess.READ_WRITE)) {
try {
// Remove the core part
p.deletePartRecursive(createPartName("/word/document.xml"));
for (PackagePart part : p.getParts()) {
values.put(part.getPartName(), part.getContentType());
LOG.atDebug().log(part.getPartName());
}
// Compare expected values with values return by the package
for (PackagePartName partName : expectedValues.keySet()) {
assertNotNull(values.get(partName));
assertEquals(expectedValues.get(partName), values.get(partName));
}
} finally {
// Don't save modifications
p.revert();
}
}
}
/**
* Test that we can open a file by path, and then
* write changes to it.
*/
@Test
void openFileThenOverwrite() throws IOException, InvalidFormatException {
File tempFile = TempFile.createTempFile("poiTesting","tmp");
File origFile = getSampleFile("TestPackageCommon.docx");
FileHelper.copyFile(origFile, tempFile);
// Open and close the temp file
try (OPCPackage p = OPCPackage.open(tempFile.toString(), PackageAccess.READ_WRITE)) {
assertNotNull(p);
}
// Delete it
assertTrue(tempFile.delete());
// Reset
FileHelper.copyFile(origFile, tempFile);
try (OPCPackage p = OPCPackage.open(tempFile.toString(), PackageAccess.READ_WRITE)) {
// Save it to the same file - not allowed
assertThrows(InvalidOperationException.class, () -> p.save(tempFile),
"You shouldn't be able to call save(File) to overwrite the current file");
}
// Delete it
assertTrue(tempFile.delete());
// Open it read only, then close and delete - allowed
FileHelper.copyFile(origFile, tempFile);
try (OPCPackage p = OPCPackage.open(tempFile.toString(), PackageAccess.READ)) {
assertNotNull(p);
}
assertTrue(tempFile.delete());
}
/**
* Test that we can open a file by path, save it
* to another file, then delete both
*/
@Test
void openFileThenSaveDelete() throws IOException, InvalidFormatException {
File tempFile = TempFile.createTempFile("poiTesting","tmp");
File tempFile2 = TempFile.createTempFile("poiTesting","tmp");
File origFile = getSampleFile("TestPackageCommon.docx");
FileHelper.copyFile(origFile, tempFile);
// Open the temp file
try (OPCPackage p = OPCPackage.open(tempFile.toString(), PackageAccess.READ_WRITE)) {
// Save it to a different file
p.save(tempFile2);
}
// Delete both the files
assertTrue(tempFile.delete());
assertTrue(tempFile2.delete());
}
private static ContentTypeManager getContentTypeManager(OPCPackage pkg) {
return POITestCase.getFieldValue(OPCPackage.class, pkg, ContentTypeManager.class, "contentTypeManager");
}
@Test
void getPartsByName() throws InvalidFormatException, IOException {
String filepath = getSampleFileName("sample.docx");
try (OPCPackage pkg = OPCPackage.open(filepath, PackageAccess.READ_WRITE)) {
try {
List rs = pkg.getPartsByName(Pattern.compile("/word/.*?\\.xml"));
HashMap selected = new HashMap<>();
for (PackagePart p : rs)
selected.put(p.getPartName().getName(), p);
assertEquals(6, selected.size());
assertTrue(selected.containsKey("/word/document.xml"));
assertTrue(selected.containsKey("/word/fontTable.xml"));
assertTrue(selected.containsKey("/word/settings.xml"));
assertTrue(selected.containsKey("/word/styles.xml"));
assertTrue(selected.containsKey("/word/theme/theme1.xml"));
assertTrue(selected.containsKey("/word/webSettings.xml"));
} finally {
// use revert to not re-write the input file
pkg.revert();
}
}
}
@Test
void getPartSize() throws IOException, InvalidFormatException {
String filepath = getSampleFileName("sample.docx");
try (OPCPackage pkg = OPCPackage.open(filepath, PackageAccess.READ)) {
int checked = 0;
for (PackagePart part : pkg.getParts()) {
// Can get the size of zip parts
if (part.getPartName().getName().equals("/word/document.xml")) {
checked++;
assertEquals(ZipPackagePart.class, part.getClass());
assertEquals(6031L, part.getSize());
}
if (part.getPartName().getName().equals("/word/fontTable.xml")) {
checked++;
assertEquals(ZipPackagePart.class, part.getClass());
assertEquals(1312L, part.getSize());
}
// But not from the others
if (part.getPartName().getName().equals("/docProps/core.xml")) {
checked++;
assertEquals(PackagePropertiesPart.class, part.getClass());
assertEquals(-1, part.getSize());
}
}
// Ensure we actually found the parts we want to check
assertEquals(3, checked);
}
}
@Test
void replaceContentType() throws IOException, InvalidFormatException {
try (InputStream is = openSampleStream("sample.xlsx");
OPCPackage p = OPCPackage.open(is)) {
try {
ContentTypeManager mgr = getContentTypeManager(p);
assertTrue(mgr.isContentTypeRegister(XSSFRelation.WORKBOOK.getContentType()));
assertFalse(mgr.isContentTypeRegister(XSSFRelation.MACROS_WORKBOOK.getContentType()));
assertTrue(p.replaceContentType(XSSFRelation.WORKBOOK.getContentType(), XSSFRelation.MACROS_WORKBOOK.getContentType()));
assertFalse(mgr.isContentTypeRegister(XSSFRelation.WORKBOOK.getContentType()));
assertTrue(mgr.isContentTypeRegister(XSSFRelation.MACROS_WORKBOOK.getContentType()));
} finally {
p.revert();
}
}
}
@SuppressWarnings("unchecked")
@ParameterizedTest
@CsvSource({
"SampleSS.xls, org.apache.poi.openxml4j.exceptions.OLE2NotOfficeXmlFileException, The supplied data appears to be in the OLE2 Format, You are calling the part of POI that deals with OOXML",
"SampleSS.xml, org.apache.poi.openxml4j.exceptions.NotOfficeXmlFileException, The supplied data appears to be a raw XML file, Formats such as Office 2003 XML",
"SampleSS.ods, org.apache.poi.openxml4j.exceptions.ODFNotOfficeXmlFileException, The supplied data appears to be in ODF, Formats like these (eg ODS",
"SampleSS.txt, org.apache.poi.openxml4j.exceptions.NotOfficeXmlFileException, No valid entries or contents found, not a valid OOXML"
})
void NonOOXML_File(String file, String exClazzStr, String msg1, String msg2) throws Exception {
Class extends Exception> exClazz = (Class extends Exception>)Class.forName(exClazzStr);
try (InputStream stream = xlsSamples.openResourceAsStream(file)) {
Executable[] trs = {
() -> OPCPackage.open(stream),
() -> OPCPackage.open(xlsSamples.getFile(file))
};
for (Executable tr : trs) {
Exception ex = assertThrows(exClazz, tr, "Shouldn't be able to open "+file);
Stream.of(msg1, msg2).forEach(mp -> assertTrue(ex.getMessage().contains(mp)));
}
}
}
/**
* Zip bomb handling test
*
* see bug #50090 / #56865
*/
@Test
void zipBombCreateAndHandle()
throws IOException, EncryptedDocumentException {
ByteArrayOutputStream bos = new ByteArrayOutputStream(2500000);
try (ZipFile zipFile = ZipHelper.openZipFile(getSampleFile("sample.xlsx"));
ZipArchiveOutputStream append = new ZipArchiveOutputStream(bos)) {
assertNotNull(zipFile);
// first, copy contents from existing war
Enumeration extends ZipArchiveEntry> entries = zipFile.getEntries();
while (entries.hasMoreElements()) {
final ZipArchiveEntry eIn = entries.nextElement();
final ZipArchiveEntry eOut = new ZipArchiveEntry(eIn.getName());
eOut.setTime(eIn.getTime());
eOut.setComment(eIn.getComment());
eOut.setSize(eIn.getSize());
append.putArchiveEntry(eOut);
if (!eOut.isDirectory()) {
try (InputStream is = zipFile.getInputStream(eIn)) {
if (eOut.getName().equals("[Content_Types].xml")) {
ByteArrayOutputStream bos2 = new ByteArrayOutputStream();
IOUtils.copy(is, bos2);
long size = bos2.size() - "".length();
append.write(bos2.toByteArray(), 0, (int) size);
byte[] spam = new byte[0x7FFF];
Arrays.fill(spam, (byte) ' ');
// 0x7FFF0000 is the maximum for 32-bit zips, but less still works
while (size < 0x7FFF00) {
append.write(spam);
size += spam.length;
}
append.write("".getBytes(StandardCharsets.UTF_8));
size += 8;
eOut.setSize(size);
} else {
IOUtils.copy(is, append);
}
}
}
append.closeArchiveEntry();
}
}
IOException ex = assertThrows(
IOException.class,
() -> WorkbookFactory.create(new ByteArrayInputStream(bos.toByteArray()))
);
assertTrue(ex.getMessage().contains("Zip bomb detected!"));
}
@Test
void testZipEntityExpansionTerminates() {
IllegalStateException ex = assertThrows(
IllegalStateException.class,
() -> openXmlBombFile("poc-shared-strings.xlsx")
);
assertTrue(ex.getMessage().contains("The text would exceed the max allowed overall size of extracted text."));
}
@Test
void testZipEntityExpansionSharedStringTableEvents() {
boolean before = ExtractorFactory.getThreadPrefersEventExtractors();
ExtractorFactory.setThreadPrefersEventExtractors(true);
try {
IllegalStateException ex = assertThrows(
IllegalStateException.class,
() -> openXmlBombFile("poc-shared-strings.xlsx")
);
assertTrue(ex.getMessage().contains("The text would exceed the max allowed overall size of extracted text."));
} finally {
ExtractorFactory.setThreadPrefersEventExtractors(before);
}
}
@Test
void testZipEntityExpansionExceedsMemory() {
IOException ex = assertThrows(
IOException.class,
() -> openXmlBombFile("poc-xmlbomb.xlsx")
);
assertTrue(ex.getMessage().contains("unable to parse shared strings table"));
assertTrue(matchSAXEx(ex));
}
@Test
void testZipEntityExpansionExceedsMemory2() {
IOException ex = assertThrows(
IOException.class,
() -> openXmlBombFile("poc-xmlbomb-empty.xlsx")
);
assertTrue(ex.getMessage().contains("unable to parse shared strings table"));
assertTrue(matchSAXEx(ex));
}
private static boolean matchSAXEx(Exception root) {
for (Throwable t = root; t != null; t = t.getCause()) {
if (t.getClass().isAssignableFrom(SAXParseException.class) &&
t.getMessage().contains("The parser has encountered more than")) {
return true;
}
}
return false;
}
private void openXmlBombFile(String file) throws IOException {
final double minInf = ZipSecureFile.getMinInflateRatio();
ZipSecureFile.setMinInflateRatio(0.002);
try (POITextExtractor extractor = ExtractorFactory.createExtractor(XSSFTestDataSamples.getSampleFile(file))) {
assertNotNull(extractor);
extractor.getText();
} finally {
ZipSecureFile.setMinInflateRatio(minInf);
}
}
@Test
void zipBombCheckSizesWithinLimits() throws IOException, EncryptedDocumentException {
getZipStatsAndConsume((max_size, min_ratio) -> {
// use values close to, but within the limits
ZipSecureFile.setMinInflateRatio(min_ratio - 0.002);
assertEquals(min_ratio - 0.002, ZipSecureFile.getMinInflateRatio(), 0.00001);
ZipSecureFile.setMaxEntrySize(max_size + 1);
assertEquals(max_size + 1, ZipSecureFile.getMaxEntrySize());
});
}
@Test
void zipBombCheckSizesRatioTooSmall() {
POIXMLException ex = assertThrows(
POIXMLException.class,
() -> getZipStatsAndConsume((max_size, min_ratio) -> {
// check ratio out of bounds
ZipSecureFile.setMinInflateRatio(min_ratio+0.002);
})
);
assertTrue(ex.getMessage().contains("You can adjust this limit via ZipSecureFile.setMinInflateRatio()"));
}
@Test
void zipBombCheckSizesSizeTooBig() throws EncryptedDocumentException {
POIXMLException ex = assertThrows(
POIXMLException.class,
() -> getZipStatsAndConsume((max_size, min_ratio) -> {
// check max entry size ouf of bounds
ZipSecureFile.setMinInflateRatio(min_ratio-0.002);
ZipSecureFile.setMaxEntrySize(max_size-200);
})
);
assertTrue(ex.getMessage().contains("You can adjust this limit via ZipSecureFile.setMaxEntrySize()"));
}
private void getZipStatsAndConsume(BiConsumer ratioCon) throws IOException {
// use a test file with a xml file bigger than 100k (ZipArchiveThresholdInputStream.GRACE_ENTRY_SIZE)
final File file = XSSFTestDataSamples.getSampleFile("poc-shared-strings.xlsx");
double min_ratio = Double.MAX_VALUE;
long max_size = 0;
try (ZipFile zf = ZipHelper.openZipFile(file)) {
assertNotNull(zf);
Enumeration extends ZipArchiveEntry> entries = zf.getEntries();
while (entries.hasMoreElements()) {
ZipArchiveEntry ze = entries.nextElement();
if (ze.getSize() == 0) {
continue;
}
// add zip entry header ~ 128 bytes
long size = ze.getSize()+128;
double ratio = ze.getCompressedSize() / (double)size;
min_ratio = Math.min(min_ratio, ratio);
max_size = Math.max(max_size, size);
}
}
ratioCon.accept(max_size, min_ratio);
//noinspection EmptyTryBlock,unused
try (Workbook wb = WorkbookFactory.create(file, null, true)) {
} finally {
// reset otherwise a lot of ooxml tests will fail
ZipSecureFile.setMinInflateRatio(0.01d);
ZipSecureFile.setMaxEntrySize(0xFFFFFFFFL);
}
}
@Test
void testConstructors() throws IOException {
// verify the various ways to construct a ZipSecureFile
File file = getSampleFile("sample.xlsx");
try (ZipSecureFile zipFile = new ZipSecureFile(file)) {
assertNotNull(zipFile.getName());
}
try (ZipSecureFile zipFile = new ZipSecureFile(file.getAbsolutePath())) {
assertNotNull(zipFile.getName());
}
}
@Test
void testMaxTextSize() {
long before = ZipSecureFile.getMaxTextSize();
try {
ZipSecureFile.setMaxTextSize(12345);
assertEquals(12345, ZipSecureFile.getMaxTextSize());
} finally {
ZipSecureFile.setMaxTextSize(before);
}
}
// bug 60128
@Test
void testCorruptFile() {
File file = getSampleFile("invalid.xlsx");
assertThrows(NotOfficeXmlFileException.class, () -> OPCPackage.open(file, PackageAccess.READ));
}
private interface CountingStream {
InputStream create(InputStream is, int length);
}
// bug 61381
@Test
void testTooShortFilterStreams() throws IOException {
for (String file : new String[]{"sample.xlsx","SampleSS.xls"}) {
for (CountingStream cs : new CountingStream[]{PushbackInputStream::new, BufferedInputStream::new}) {
try (InputStream is = cs.create(xlsSamples.openResourceAsStream(file), 2);
Workbook wb = WorkbookFactory.create(is)) {
assertEquals(3, wb.getNumberOfSheets());
}
}
}
}
@Test
void testBug56479() throws Exception {
try (InputStream is = openSampleStream("dcterms_bug_56479.zip");
OPCPackage p = OPCPackage.open(is)) {
// Check we found the contents of it
boolean foundCoreProps = false, foundDocument = false, foundTheme1 = false;
for (final PackagePart part : p.getParts()) {
final String partName = part.getPartName().toString();
final String contentType = part.getContentType();
switch (partName) {
case "/docProps/core.xml":
assertEquals(ContentTypes.CORE_PROPERTIES_PART, contentType);
foundCoreProps = true;
break;
case "/word/document.xml":
assertEquals(XWPFRelation.DOCUMENT.getContentType(), contentType);
foundDocument = true;
break;
case "/word/theme/theme1.xml":
assertEquals(XWPFRelation.THEME.getContentType(), contentType);
foundTheme1 = true;
break;
}
}
assertTrue(foundCoreProps, "Core not found in " + p.getParts());
assertFalse(foundDocument, "Document should not be found in " + p.getParts());
assertFalse(foundTheme1, "Theme1 should not found in " + p.getParts());
}
}
@Test
void unparseableCentralDirectory() throws IOException {
File f = getSampleFile("at.pzp.www_uploads_media_PP_Scheinecker-jdk6error.pptx");
try (SlideShow,?> ppt = SlideShowFactory.create(f, null, true)) {
assertNotNull(ppt);
assertNotNull(ppt.getSlides().get(0));
}
}
@Test
void testClosingStreamOnException() throws IOException {
File tmp = File.createTempFile("poi-test-truncated-zip", "");
// create a corrupted zip file by truncating a valid zip file to the first 100 bytes
try (InputStream is = openSampleStream("dcterms_bug_56479.zip");
OutputStream os = new FileOutputStream(tmp)) {
IOUtils.copy(is, os, 100);
}
// feed the corrupted zip file to OPCPackage
// expected: the zip file is invalid
// this test does not care if open() throws an exception or not.
assertThrows(Exception.class, () -> OPCPackage.open(tmp, PackageAccess.READ));
// If the stream is not closed on exception, it will keep a file descriptor to tmp,
// and requests to the OS to delete the file will fail.
assertTrue(tmp.delete(), "Can't delete tmp file");
}
/**
* If ZipPackage is passed an invalid file, a call to close
* (eg from the OPCPackage open method) should tidy up the
* stream / file the broken file is being read from.
* See bug #60128 for more
*/
@Test
void testTidyStreamOnInvalidFile1() throws Exception {
openInvalidFile("SampleSS.ods", false);
}
@Test
void testTidyStreamOnInvalidFile2() throws Exception {
openInvalidFile("SampleSS.ods", true);
}
@Test
void testTidyStreamOnInvalidFile3() throws Exception {
openInvalidFile("SampleSS.txt", false);
}
@Test
void testTidyStreamOnInvalidFile4() throws Exception {
openInvalidFile("SampleSS.txt", true);
}
@Test
void testBug62592() throws Exception {
try (InputStream is = openSampleStream("62592.thmx")) {
assertThrows(InvalidFormatException.class, () -> OPCPackage.open(is));
}
}
@Test
void testBug62592SequentialCallsToGetParts() throws Exception {
//make absolutely certain that sequential calls don't throw InvalidFormatExceptions
String originalFile = getSampleFileName("TestPackageCommon.docx");
try (OPCPackage p2 = OPCPackage.open(originalFile, PackageAccess.READ)) {
assertDoesNotThrow(p2::getParts);
assertDoesNotThrow(p2::getParts);
}
}
@Test
void testDoNotCloseStream() throws IOException {
// up to JDK 10 we did use Mockito here, but OutputStream is
// an abstract class and fails mocking with some changes in JDK 11
// so we use a simple empty output stream implementation instead
OutputStream os = new OutputStream() {
@Override
public void write(int b) {
}
@Override
public void close() {
fail("close should not be called here");
}
};
try (XSSFWorkbook wb = new XSSFWorkbook()) {
wb.createSheet();
wb.write(os);
}
try (SXSSFWorkbook wb = new SXSSFWorkbook()) {
wb.createSheet();
wb.write(os);
}
}
private static void openInvalidFile(final String name, final boolean useStream) throws IOException {
ZipPackage[] pkgTest = { null };
try (final InputStream is = (useStream) ? xlsSamples.openResourceAsStream(name) : null) {
assertThrows(NotOfficeXmlFileException.class, () -> {
try (final ZipPackage pkg = (useStream)
? new ZipPackage(is, PackageAccess.READ)
: new ZipPackage(xlsSamples.getFile(name), PackageAccess.READ)) {
pkgTest[0] = pkg;
assertNotNull(pkg.getZipArchive());
assertFalse(pkg.getZipArchive().isClosed());
pkg.getParts();
}
});
} finally {
if (pkgTest[0] != null) {
assertNotNull(pkgTest[0].getZipArchive());
assertTrue(pkgTest[0].getZipArchive().isClosed());
}
}
}
@SuppressWarnings("UnstableApiUsage")
@Test
void testBug63029() throws Exception {
File testFile = getSampleFile("sample.docx");
File tmpFile = getOutputFile("Bug63029.docx");
Files.copy(testFile, tmpFile);
int numPartsBefore = 0;
String md5Before = Files.asByteSource(tmpFile).hash(Hashing.sha256()).toString();
try(OPCPackage pkg = OPCPackage.open(tmpFile, PackageAccess.READ_WRITE))
{
numPartsBefore = pkg.getParts().size();
// add a marshaller that will throw an exception on save
pkg.addMarshaller("poi/junit", (part, out) -> {
throw new RuntimeException("Bugzilla 63029");
});
pkg.createPart(createPartName("/poi/test.xml"), "poi/junit");
RuntimeException ex = assertThrows(RuntimeException.class, pkg::close);
// verify there was an exception while closing the file
assertEquals("Fail to save: an error occurs while saving the package : Bugzilla 63029", ex.getMessage());
}
// assert that md5 after closing is the same, i.e. the source is left intact
String md5After = Files.asByteSource(tmpFile).hash(Hashing.sha256()).toString();
assertEquals(md5Before, md5After);
// try to read the source file once again
try ( OPCPackage pkg = OPCPackage.open(tmpFile, PackageAccess.READ_WRITE)){
// the source is still a valid zip archive.
// prior to the fix this used to throw NotOfficeXmlFileException("archive is not a ZIP archive")
// assert that the number of parts remained the same
assertEquals(pkg.getParts().size(), numPartsBefore);
}
}
}