aboutsummaryrefslogtreecommitdiffstats
path: root/poi
diff options
context:
space:
mode:
authorDominik Stadler <centic@apache.org>2024-07-15 05:40:56 +0000
committerDominik Stadler <centic@apache.org>2024-07-15 05:40:56 +0000
commit5085e3d1b2af1ac20e6158b0a00797a0d3b6f2ba (patch)
treeaf1c26dbd052c082f6e55eedad2cd4634ce17492 /poi
parent1a07ee8d6a41a410c7567b3d58f9b1ee7815c941 (diff)
downloadpoi-5085e3d1b2af1ac20e6158b0a00797a0d3b6f2ba.tar.gz
poi-5085e3d1b2af1ac20e6158b0a00797a0d3b6f2ba.zip
Add initial support for SOURCE_DATE_EPOCH
This allows to create reproducible binary files without creation/modification-timestamp being set when environment variable SOURCE_DATE_EPOCH is set. See https://reproducible-builds.org/docs/source-date-epoch/ for the related specification. For now, we ensure that Zip-file entries set the modification time to 1970-01-01, which seems to be enough to make simple OOXML files reproducible. There are likely some other places where resulting files are not reproducible, some more testing will be necessary to identify other areas that should take this into account as well. git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1919236 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'poi')
-rw-r--r--poi/src/main/java/org/apache/poi/util/Reproducibility.java78
-rw-r--r--poi/src/test/java/org/apache/poi/ss/usermodel/BaseTestBugzillaIssues.java83
2 files changed, 96 insertions, 65 deletions
diff --git a/poi/src/main/java/org/apache/poi/util/Reproducibility.java b/poi/src/main/java/org/apache/poi/util/Reproducibility.java
new file mode 100644
index 0000000000..9ece2bc269
--- /dev/null
+++ b/poi/src/main/java/org/apache/poi/util/Reproducibility.java
@@ -0,0 +1,78 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.util;
+
+import java.io.IOException;
+
+import org.apache.commons.io.function.IORunnable;
+
+/**
+ * Helper class for allowing to produce so called
+ * "reproducible" output.
+ *
+ * I.e. multiple runs of the same steps should
+ * produce the same byte-by-byte result.
+ *
+ * This usually means that among other "randomness"
+ * timestamp should be avoided.
+ *
+ * This class provides a few useful bits to allow Apache POI to produce
+ * reproducible binary files.
+ *
+ * See https://reproducible-builds.org/ for more details.
+ */
+public class Reproducibility {
+ // Add some support for reproducible output files
+ // if SOURCE_DATE_EPOCH is set, we use timestamp "0" for
+ // entries in Zip files
+ // See https://reproducible-builds.org/docs/source-date-epoch/
+ // for the specification of SOURCE_DATE_EPOCH
+ private static boolean IS_SOURCE_DATE_EPOCH =
+ System.getenv("SOURCE_DATE_EPOCH") != null;
+
+ /**
+ * Check if the environment variable SOURCE_DATE_EPOCH is set.
+ *
+ * @return True if set, false otherwise
+ */
+ public static boolean isSourceDateEpoch() {
+ return IS_SOURCE_DATE_EPOCH;
+ }
+
+ /**
+ * Execute a runnable with SOURCE_DATE_EPOCH set.
+ *
+ * This is mostly only used in tests to check reproducibility
+ * of documents.
+ *
+ * @param r A runnable which executes the wanted steps with
+ * SOURCE_DATE_EPOCH defined
+ *
+ * @throws IOException if executing the runnable throws an IOException
+ * @throws RuntimeException if executing the runnable throws a RuntimeException
+ */
+ public static void runWithSourceDateEpoch(IORunnable r) throws IOException {
+ boolean before = IS_SOURCE_DATE_EPOCH;
+ IS_SOURCE_DATE_EPOCH = true;
+ try {
+ r.run();
+ } finally {
+ IS_SOURCE_DATE_EPOCH = before;
+ }
+ }
+}
diff --git a/poi/src/test/java/org/apache/poi/ss/usermodel/BaseTestBugzillaIssues.java b/poi/src/test/java/org/apache/poi/ss/usermodel/BaseTestBugzillaIssues.java
index a356c995a7..1a410e6f12 100644
--- a/poi/src/test/java/org/apache/poi/ss/usermodel/BaseTestBugzillaIssues.java
+++ b/poi/src/test/java/org/apache/poi/ss/usermodel/BaseTestBugzillaIssues.java
@@ -42,7 +42,6 @@ import org.apache.commons.io.output.UnsynchronizedByteArrayOutputStream;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
-import org.apache.poi.poifs.filesystem.FileMagic;
import org.apache.poi.ss.ITestDataProvider;
import org.apache.poi.ss.SpreadsheetVersion;
import org.apache.poi.ss.formula.FormulaParseException;
@@ -51,6 +50,7 @@ import org.apache.poi.ss.util.CellRangeAddress;
import org.apache.poi.ss.util.CellRangeAddressList;
import org.apache.poi.ss.util.PaneInformation;
import org.apache.poi.ss.util.SheetUtil;
+import org.apache.poi.util.Reproducibility;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
@@ -1815,70 +1815,23 @@ public abstract class BaseTestBugzillaIssues {
cell.setCellValue("Ernie & Bert are cool!");
cell.setCellFormula("A1 & \" are cool!\"");
- try (UnsynchronizedByteArrayOutputStream out1 = UnsynchronizedByteArrayOutputStream.builder().get();
- UnsynchronizedByteArrayOutputStream out2 = UnsynchronizedByteArrayOutputStream.builder().get()) {
- wb.write(out1);
- wb.write(out2);
-
- out1.flush();
- out2.flush();
-
- // to avoid flaky tests if the documents are written at slightly different timestamps
- // we clear some bytes which contain timestamps
- assertArrayEquals(
- removeTimestamp(out1.toByteArray()),
- removeTimestamp(out2.toByteArray()));
- }
+ Reproducibility.runWithSourceDateEpoch(
+ () -> {
+ try (UnsynchronizedByteArrayOutputStream out1 = UnsynchronizedByteArrayOutputStream.builder().get();
+ UnsynchronizedByteArrayOutputStream out2 = UnsynchronizedByteArrayOutputStream.builder().get()) {
+ wb.write(out1);
+ wb.write(out2);
+
+ out1.flush();
+ out2.flush();
+
+ // to avoid flaky tests if the documents are written at slightly different timestamps
+ // we clear some bytes which contain timestamps
+ assertArrayEquals(
+ out1.toByteArray(),
+ out2.toByteArray());
+ }
+ });
}
}
-
- private byte[] removeTimestamp(byte[] bytes) {
- if (FileMagic.valueOf(bytes) == FileMagic.OOXML) {
- // This removes the timestamp in the header of the ZIP-Format
- // see "Local file header" at https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT
- bytes[10] = 0;
- bytes[11] = 0;
- bytes[12] = 0;
- bytes[13] = 0;
-
- // there is a timestamp for every entry, so try to replace a few more byte-positions
- // to reduce flakiness of this test, however we likely do not yet cover all entries
- bytes[390] = 0;
- bytes[391] = 0;
- bytes[674] = 0;
- bytes[676] = 0;
- bytes[883] = 0;
- bytes[1207] = 0;
- bytes[1208] = 0;
- bytes[1432] = 0;
- bytes[1433] = 0;
- bytes[1434] = 0;
- bytes[1817] = 0;
- bytes[1818] = 0;
- bytes[2098] = 0;
- bytes[2099] = 0;
- bytes[2762] = 0;
- bytes[2763] = 0;
- bytes[2382] = 0;
- bytes[2383] = 0;
- bytes[2827] = 0;
- bytes[2828] = 0;
- bytes[2884] = 0;
- bytes[2885] = 0;
- bytes[2946] = 0;
- bytes[2947] = 0;
- bytes[3009] = 0;
- bytes[3010] = 0;
- bytes[3075] = 0;
- bytes[3076] = 0;
- bytes[3134] = 0;
- bytes[3135] = 0;
- bytes[3195] = 0;
- bytes[3196] = 0;
- bytes[3267] = 0;
- bytes[3268] = 0;
- }
-
- return bytes;
- }
}