+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi;
-
-import org.apache.poi.hpsf.DocumentSummaryInformation;
-import org.apache.poi.hpsf.SummaryInformation;
-import org.apache.poi.hpsf.extractor.HPSFPropertiesExtractor;
-import org.apache.poi.poifs.filesystem.DirectoryEntry;
-
-/**
- * Common Parent for OLE2 based Text Extractors
- * of POI Documents, such as .doc, .xls
- * You will typically find the implementation of
- * a given format's text extractor under
- * org.apache.poi.[format].extractor .
- *
- * @see org.apache.poi.hssf.extractor.ExcelExtractor
- * @see org.apache.poi.hslf.extractor.PowerPointExtractor
- * @see org.apache.poi.hdgf.extractor.VisioTextExtractor
- * @see org.apache.poi.hwpf.extractor.WordExtractor
- */
-public abstract class POIOLE2TextExtractor extends POITextExtractor {
- /** The POIDocument that's open */
- protected POIDocument document;
-
- /**
- * Creates a new text extractor for the given document
- *
- * @param document The POIDocument to use in this extractor.
- */
- public POIOLE2TextExtractor(POIDocument document) {
- this.document = document;
-
- // Ensure any underlying resources, such as open files,
- // will get cleaned up if the user calls #close()
- setFilesystem(document);
- }
-
- /**
- * Creates a new text extractor, using the same
- * document as another text extractor. Normally
- * only used by properties extractors.
- *
- * @param otherExtractor the extractor which document to be used
- */
- protected POIOLE2TextExtractor(POIOLE2TextExtractor otherExtractor) {
- this.document = otherExtractor.document;
- }
-
- /**
- * Returns the document information metadata for the document
- *
- * @return The Document Summary Information or null
- * if it could not be read for this document.
- */
- public DocumentSummaryInformation getDocSummaryInformation() {
- return document.getDocumentSummaryInformation();
- }
- /**
- * Returns the summary information metadata for the document.
- *
- * @return The Summary information for the document or null
- * if it could not be read for this document.
- */
- public SummaryInformation getSummaryInformation() {
- return document.getSummaryInformation();
- }
-
- /**
- * Returns an HPSF powered text extractor for the
- * document properties metadata, such as title and author.
- *
- * @return an instance of POIExtractor that can extract meta-data.
- */
- @Override
- public POITextExtractor getMetadataTextExtractor() {
- return new HPSFPropertiesExtractor(this);
- }
-
- /**
- * Return the underlying DirectoryEntry of this document.
- *
- * @return the DirectoryEntry that is associated with the POIDocument of this extractor.
- */
- public DirectoryEntry getRoot() {
- return document.getDirectory();
- }
-
- /**
- * Return the underlying POIDocument
- *
- * @return the underlying POIDocument
- */
- @Override
- public POIDocument getDocument() {
- return document;
- }
-}
\ No newline at end of file
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi;
+
+import java.io.File;
+import java.io.OutputStream;
+
+import org.apache.poi.poifs.filesystem.DirectoryNode;
+import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
+import org.apache.poi.poifs.filesystem.OPOIFSFileSystem;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+
+
+/**
+ * This holds the common functionality for all read-only
+ * POI Document classes, i.e. ones which don't support writing.
+ *
+ * @since POI 3.15 beta 3
+ */
+public abstract class POIReadOnlyDocument extends POIDocument {
+ public POIReadOnlyDocument(DirectoryNode dir) {
+ super(dir);
+ }
+ public POIReadOnlyDocument(NPOIFSFileSystem fs) {
+ super(fs);
+ }
+ public POIReadOnlyDocument(OPOIFSFileSystem fs) {
+ super(fs);
+ }
+ public POIReadOnlyDocument(POIFSFileSystem fs) {
+ super(fs);
+ }
+
+ /**
+ * Note - writing is not yet supported for this file format, sorry.
+ *
+ * @throws IllegalStateException If you call the method, as writing is not supported
+ */
+ @Override
+ public void write() {
+ throw new IllegalStateException("Writing is not yet implemented for this Document Format");
+ }
+ /**
+ * Note - writing is not yet supported for this file format, sorry.
+ *
+ * @throws IllegalStateException If you call the method, as writing is not supported
+ */
+ @Override
+ public void write(File file) {
+ throw new IllegalStateException("Writing is not yet implemented for this Document Format");
+ }
+ /**
+ * Note - writing is not yet supported for this file format, sorry.
+ *
+ * @throws IllegalStateException If you call the method, as writing is not supported
+ */
+ @Override
+ public void write(OutputStream out) {
+ throw new IllegalStateException("Writing is not yet implemented for this Document Format");
+ }
+}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi;
-
-import java.io.Closeable;
-import java.io.IOException;
-
-/**
- * Common Parent for Text Extractors
- * of POI Documents.
- * You will typically find the implementation of
- * a given format's text extractor under
- * org.apache.poi.[format].extractor .
- *
- * @see org.apache.poi.hssf.extractor.ExcelExtractor
- * @see org.apache.poi.hslf.extractor.PowerPointExtractor
- * @see org.apache.poi.hdgf.extractor.VisioTextExtractor
- * @see org.apache.poi.hwpf.extractor.WordExtractor
- */
-public abstract class POITextExtractor implements Closeable {
- private Closeable fsToClose;
-
- /**
- * Retrieves all the text from the document.
- * How cells, paragraphs etc are separated in the text
- * is implementation specific - see the javadocs for
- * a specific project for details.
- * @return All the text from the document
- */
- public abstract String getText();
-
- /**
- * Returns another text extractor, which is able to
- * output the textual content of the document
- * metadata / properties, such as author and title.
- *
- * @return the metadata and text extractor
- */
- public abstract POITextExtractor getMetadataTextExtractor();
-
- /**
- * Used to ensure file handle cleanup.
- *
- * @param fs filesystem to close
- */
- public void setFilesystem(Closeable fs) {
- fsToClose = fs;
- }
-
- /**
- * Allows to free resources of the Extractor as soon as
- * it is not needed any more. This may include closing
- * open file handles and freeing memory.
- *
- * The Extractor cannot be used after close has been called.
- */
- @Override
- public void close() throws IOException {
- if(fsToClose != null) {
- fsToClose.close();
- }
- }
-
- /**
- * @return the processed document
- */
- public abstract Object getDocument();
-}
+++ /dev/null
-
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-
-package org.apache.poi.dev;
-
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.util.Locale;
-import java.util.Properties;
-
-import javax.xml.parsers.DocumentBuilder;
-import javax.xml.parsers.DocumentBuilderFactory;
-import javax.xml.transform.OutputKeys;
-import javax.xml.transform.Result;
-import javax.xml.transform.Transformer;
-import javax.xml.transform.TransformerException;
-import javax.xml.transform.TransformerFactory;
-import javax.xml.transform.stream.StreamResult;
-import javax.xml.transform.stream.StreamSource;
-
-import org.apache.poi.util.XMLHelper;
-import org.w3c.dom.Document;
-import org.w3c.dom.Element;
-
-/**
- * Description of the Class
- *
- *@author andy
- *@since May 10, 2002
- */
-public class RecordGenerator {
- /**
- * The main program for the RecordGenerator class
- *
- *@param args The command line arguments
- *@exception Exception Description of the Exception
- */
- public static void main(String[] args)
- throws Exception {
- // Force load so that we don't start generating records and realise this hasn't compiled yet.
- Class.forName("org.apache.poi.generator.FieldIterator");
-
- if (args.length != 4) {
- System.out.println("Usage:");
- System.out.println(" java org.apache.poi.hssf.util.RecordGenerator RECORD_DEFINTIONS RECORD_STYLES DEST_SRC_PATH TEST_SRC_PATH");
- } else {
- generateRecords(args[0], args[1], args[2], args[3]);
- }
- }
-
-
- private static void generateRecords(String defintionsDir, String recordStyleDir, String destSrcPathDir, String testSrcPathDir)
- throws Exception {
- File definitionsFiles[] = new File(defintionsDir).listFiles();
- if (definitionsFiles == null) {
- System.err.println(defintionsDir+" is not a directory.");
- return;
- }
-
- for (File file : definitionsFiles) {
- if (file.isFile() &&
- (file.getName().endsWith("_record.xml") ||
- file.getName().endsWith("_type.xml")
- )
- ) {
- // Get record name and package
- DocumentBuilderFactory factory = XMLHelper.getDocumentBuilderFactory();
- DocumentBuilder builder = factory.newDocumentBuilder();
- Document document = builder.parse(file);
- Element record = document.getDocumentElement();
- String extendstg = record.getElementsByTagName("extends").item(0).getFirstChild().getNodeValue();
- String suffix = record.getElementsByTagName("suffix").item(0).getFirstChild().getNodeValue();
- String recordName = record.getAttributes().getNamedItem("name").getNodeValue();
- String packageName = record.getAttributes().getNamedItem("package").getNodeValue();
- packageName = packageName.replace('.', '/');
-
- // Generate record
- String destinationPath = destSrcPathDir + "/" + packageName;
- File destinationPathFile = new File(destinationPath);
- if(!destinationPathFile.mkdirs()) {
- throw new IOException("Could not create directory " + destinationPathFile);
- } else {
- System.out.println("Created destination directory: " + destinationPath);
- }
- String destinationFilepath = destinationPath + "/" + recordName + suffix + ".java";
- transform(file, new File(destinationFilepath),
- new File(recordStyleDir + "/" + extendstg.toLowerCase(Locale.ROOT) + ".xsl"));
- System.out.println("Generated " + suffix + ": " + destinationFilepath);
-
- // Generate test (if not already generated)
- destinationPath = testSrcPathDir + "/" + packageName;
- destinationPathFile = new File(destinationPath);
- if(!destinationPathFile.mkdirs()) {
- throw new IOException("Could not create directory " + destinationPathFile);
- } else {
- System.out.println("Created destination directory: " + destinationPath);
- }
- destinationFilepath = destinationPath + "/Test" + recordName + suffix + ".java";
- if (!new File(destinationFilepath).exists()) {
- String temp = (recordStyleDir + "/" + extendstg.toLowerCase(Locale.ROOT) + "_test.xsl");
- transform(file, new File(destinationFilepath), new File(temp));
- System.out.println("Generated test: " + destinationFilepath);
- } else {
- System.out.println("Skipped test generation: " + destinationFilepath);
- }
- }
- }
- }
-
-
-
- /**
- * <p>Executes an XSL transformation. This process transforms an XML input
- * file into a text output file controlled by an XSLT specification.</p>
- *
- * @param in the XML input file
- * @param out the text output file
- * @param xslt the XSLT specification, i.e. an XSL style sheet
- * @throws FileNotFoundException
- * @throws TransformerException
- */
- private static void transform(final File in, final File out, final File xslt)
- throws FileNotFoundException, TransformerException
- {
- final StreamSource ss = new StreamSource(xslt);
- final TransformerFactory tf = TransformerFactory.newInstance();
- final Transformer t;
- try
- {
- t = tf.newTransformer(ss);
- }
- catch (TransformerException ex)
- {
- System.err.println("Error compiling XSL style sheet " + xslt);
- throw ex;
- }
- final Properties p = new Properties();
- p.setProperty(OutputKeys.METHOD, "text");
- t.setOutputProperties(p);
- final Result result = new StreamResult(out);
- t.transform(new StreamSource(in), result);
- }
-
-}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.extractor;
+
+import org.apache.poi.POIDocument;
+import org.apache.poi.hpsf.DocumentSummaryInformation;
+import org.apache.poi.hpsf.SummaryInformation;
+import org.apache.poi.hpsf.extractor.HPSFPropertiesExtractor;
+import org.apache.poi.poifs.filesystem.DirectoryEntry;
+
+/**
+ * Common Parent for OLE2 based Text Extractors
+ * of POI Documents, such as .doc, .xls
+ * You will typically find the implementation of
+ * a given format's text extractor under
+ * org.apache.poi.[format].extractor .
+ *
+ * @see org.apache.poi.hssf.extractor.ExcelExtractor
+ * @see org.apache.poi.hslf.extractor.PowerPointExtractor
+ * @see org.apache.poi.hdgf.extractor.VisioTextExtractor
+ * @see org.apache.poi.hwpf.extractor.WordExtractor
+ */
+public abstract class POIOLE2TextExtractor extends POITextExtractor {
+ /** The POIDocument that's open */
+ protected POIDocument document;
+
+ /**
+ * Creates a new text extractor for the given document
+ *
+ * @param document The POIDocument to use in this extractor.
+ */
+ public POIOLE2TextExtractor(POIDocument document) {
+ this.document = document;
+
+ // Ensure any underlying resources, such as open files,
+ // will get cleaned up if the user calls #close()
+ setFilesystem(document);
+ }
+
+ /**
+ * Creates a new text extractor, using the same
+ * document as another text extractor. Normally
+ * only used by properties extractors.
+ *
+ * @param otherExtractor the extractor which document to be used
+ */
+ protected POIOLE2TextExtractor(POIOLE2TextExtractor otherExtractor) {
+ this.document = otherExtractor.document;
+ }
+
+ /**
+ * Returns the document information metadata for the document
+ *
+ * @return The Document Summary Information or null
+ * if it could not be read for this document.
+ */
+ public DocumentSummaryInformation getDocSummaryInformation() {
+ return document.getDocumentSummaryInformation();
+ }
+ /**
+ * Returns the summary information metadata for the document.
+ *
+ * @return The Summary information for the document or null
+ * if it could not be read for this document.
+ */
+ public SummaryInformation getSummaryInformation() {
+ return document.getSummaryInformation();
+ }
+
+ /**
+ * Returns an HPSF powered text extractor for the
+ * document properties metadata, such as title and author.
+ *
+ * @return an instance of POIExtractor that can extract meta-data.
+ */
+ @Override
+ public POITextExtractor getMetadataTextExtractor() {
+ return new HPSFPropertiesExtractor(this);
+ }
+
+ /**
+ * Return the underlying DirectoryEntry of this document.
+ *
+ * @return the DirectoryEntry that is associated with the POIDocument of this extractor.
+ */
+ public DirectoryEntry getRoot() {
+ return document.getDirectory();
+ }
+
+ /**
+ * Return the underlying POIDocument
+ *
+ * @return the underlying POIDocument
+ */
+ @Override
+ public POIDocument getDocument() {
+ return document;
+ }
+}
\ No newline at end of file
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.extractor;
+
+import java.io.Closeable;
+import java.io.IOException;
+
+/**
+ * Common Parent for Text Extractors
+ * of POI Documents.
+ * You will typically find the implementation of
+ * a given format's text extractor under
+ * org.apache.poi.[format].extractor .
+ *
+ * @see org.apache.poi.hssf.extractor.ExcelExtractor
+ * @see org.apache.poi.hslf.extractor.PowerPointExtractor
+ * @see org.apache.poi.hdgf.extractor.VisioTextExtractor
+ * @see org.apache.poi.hwpf.extractor.WordExtractor
+ */
+public abstract class POITextExtractor implements Closeable {
+ private Closeable fsToClose;
+
+ /**
+ * Retrieves all the text from the document.
+ * How cells, paragraphs etc are separated in the text
+ * is implementation specific - see the javadocs for
+ * a specific project for details.
+ * @return All the text from the document
+ */
+ public abstract String getText();
+
+ /**
+ * Returns another text extractor, which is able to
+ * output the textual content of the document
+ * metadata / properties, such as author and title.
+ *
+ * @return the metadata and text extractor
+ */
+ public abstract POITextExtractor getMetadataTextExtractor();
+
+ /**
+ * Used to ensure file handle cleanup.
+ *
+ * @param fs filesystem to close
+ */
+ public void setFilesystem(Closeable fs) {
+ fsToClose = fs;
+ }
+
+ /**
+ * Allows to free resources of the Extractor as soon as
+ * it is not needed any more. This may include closing
+ * open file handles and freeing memory.
+ *
+ * The Extractor cannot be used after close has been called.
+ */
+ @Override
+ public void close() throws IOException {
+ if(fsToClose != null) {
+ fsToClose.close();
+ }
+ }
+
+ /**
+ * @return the processed document
+ */
+ public abstract Object getDocument();
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hssf.usermodel;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.poi.hssf.record.BOFRecord;
+import org.apache.poi.hssf.record.DimensionsRecord;
+import org.apache.poi.hssf.record.EOFRecord;
+import org.apache.poi.hssf.record.FooterRecord;
+import org.apache.poi.hssf.record.HCenterRecord;
+import org.apache.poi.hssf.record.HeaderRecord;
+import org.apache.poi.hssf.record.PrintSetupRecord;
+import org.apache.poi.hssf.record.ProtectRecord;
+import org.apache.poi.hssf.record.Record;
+import org.apache.poi.hssf.record.RecordBase;
+import org.apache.poi.hssf.record.SCLRecord;
+import org.apache.poi.hssf.record.UnknownRecord;
+import org.apache.poi.hssf.record.VCenterRecord;
+import org.apache.poi.hssf.record.chart.AreaFormatRecord;
+import org.apache.poi.hssf.record.chart.AxisLineFormatRecord;
+import org.apache.poi.hssf.record.chart.AxisOptionsRecord;
+import org.apache.poi.hssf.record.chart.AxisParentRecord;
+import org.apache.poi.hssf.record.chart.AxisRecord;
+import org.apache.poi.hssf.record.chart.AxisUsedRecord;
+import org.apache.poi.hssf.record.chart.BarRecord;
+import org.apache.poi.hssf.record.chart.BeginRecord;
+import org.apache.poi.hssf.record.chart.CategorySeriesAxisRecord;
+import org.apache.poi.hssf.record.chart.ChartFormatRecord;
+import org.apache.poi.hssf.record.chart.ChartRecord;
+import org.apache.poi.hssf.record.chart.ChartTitleFormatRecord;
+import org.apache.poi.hssf.record.chart.DataFormatRecord;
+import org.apache.poi.hssf.record.chart.DefaultDataLabelTextPropertiesRecord;
+import org.apache.poi.hssf.record.chart.EndRecord;
+import org.apache.poi.hssf.record.chart.FontBasisRecord;
+import org.apache.poi.hssf.record.chart.FontIndexRecord;
+import org.apache.poi.hssf.record.chart.FrameRecord;
+import org.apache.poi.hssf.record.chart.LegendRecord;
+import org.apache.poi.hssf.record.chart.LineFormatRecord;
+import org.apache.poi.hssf.record.chart.LinkedDataRecord;
+import org.apache.poi.hssf.record.chart.PlotAreaRecord;
+import org.apache.poi.hssf.record.chart.PlotGrowthRecord;
+import org.apache.poi.hssf.record.chart.SeriesIndexRecord;
+import org.apache.poi.hssf.record.chart.SeriesRecord;
+import org.apache.poi.hssf.record.chart.SeriesTextRecord;
+import org.apache.poi.hssf.record.chart.SeriesToChartGroupRecord;
+import org.apache.poi.hssf.record.chart.SheetPropertiesRecord;
+import org.apache.poi.hssf.record.chart.TextRecord;
+import org.apache.poi.hssf.record.chart.TickRecord;
+import org.apache.poi.hssf.record.chart.UnitsRecord;
+import org.apache.poi.hssf.record.chart.ValueRangeRecord;
+import org.apache.poi.ss.formula.ptg.Area3DPtg;
+import org.apache.poi.ss.formula.ptg.AreaPtgBase;
+import org.apache.poi.ss.formula.ptg.Ptg;
+import org.apache.poi.ss.util.CellRangeAddress;
+import org.apache.poi.ss.util.CellRangeAddressBase;
+
+/**
+ * Has methods for construction of a chart object.
+ *
+ * @author Glen Stampoultzis (glens at apache.org)
+ */
+public final class HSSFChart {
+ private HSSFSheet sheet;
+ private ChartRecord chartRecord;
+
+ private LegendRecord legendRecord;
+ @SuppressWarnings("unused")
+ private ChartTitleFormatRecord chartTitleFormat;
+ private SeriesTextRecord chartTitleText;
+ private List<ValueRangeRecord> valueRanges = new ArrayList<>();
+
+ private HSSFChartType type = HSSFChartType.Unknown;
+
+ private List<HSSFSeries> series = new ArrayList<>();
+
+ public enum HSSFChartType {
+ Area {
+ @Override
+ public short getSid() {
+ return 0x101A;
+ }
+ },
+ Bar {
+ @Override
+ public short getSid() {
+ return 0x1017;
+ }
+ },
+ Line {
+ @Override
+ public short getSid() {
+ return 0x1018;
+ }
+ },
+ Pie {
+ @Override
+ public short getSid() {
+ return 0x1019;
+ }
+ },
+ Scatter {
+ @Override
+ public short getSid() {
+ return 0x101B;
+ }
+ },
+ Unknown {
+ @Override
+ public short getSid() {
+ return 0;
+ }
+ };
+
+ public abstract short getSid();
+ }
+
+ private HSSFChart(HSSFSheet sheet, ChartRecord chartRecord) {
+ this.chartRecord = chartRecord;
+ this.sheet = sheet;
+ }
+
+ /**
+ * Creates a bar chart. API needs some work. :)
+ * <p>
+ * NOTE: Does not yet work... checking it in just so others
+ * can take a look.
+ */
+ public void createBarChart( HSSFWorkbook workbook, HSSFSheet parentSheet )
+ {
+
+ List<Record> records = new ArrayList<>();
+ records.add( createMSDrawingObjectRecord() );
+ records.add( createOBJRecord() );
+ records.add( createBOFRecord() );
+ records.add(new HeaderRecord(""));
+ records.add(new FooterRecord(""));
+ records.add( createHCenterRecord() );
+ records.add( createVCenterRecord() );
+ records.add( createPrintSetupRecord() );
+ // unknown 33
+ records.add( createFontBasisRecord1() );
+ records.add( createFontBasisRecord2() );
+ records.add(new ProtectRecord(false));
+ records.add( createUnitsRecord() );
+ records.add( createChartRecord( 0, 0, 30434904, 19031616 ) );
+ records.add( createBeginRecord() );
+ records.add( createSCLRecord( (short) 1, (short) 1 ) );
+ records.add( createPlotGrowthRecord( 65536, 65536 ) );
+ records.add( createFrameRecord1() );
+ records.add( createBeginRecord() );
+ records.add( createLineFormatRecord(true) );
+ records.add( createAreaFormatRecord1() );
+ records.add( createEndRecord() );
+ records.add( createSeriesRecord() );
+ records.add( createBeginRecord() );
+ records.add( createTitleLinkedDataRecord() );
+ records.add( createValuesLinkedDataRecord() );
+ records.add( createCategoriesLinkedDataRecord() );
+ records.add( createDataFormatRecord() );
+ // records.add(createBeginRecord());
+ // unknown
+ // records.add(createEndRecord());
+ records.add( createSeriesToChartGroupRecord() );
+ records.add( createEndRecord() );
+ records.add( createSheetPropsRecord() );
+ records.add( createDefaultTextRecord( DefaultDataLabelTextPropertiesRecord.CATEGORY_DATA_TYPE_ALL_TEXT_CHARACTERISTIC ) );
+ records.add( createAllTextRecord() );
+ records.add( createBeginRecord() );
+ // unknown
+ records.add( createFontIndexRecord( 5 ) );
+ records.add( createDirectLinkRecord() );
+ records.add( createEndRecord() );
+ records.add( createDefaultTextRecord( (short) 3 ) ); // eek, undocumented text type
+ records.add( createUnknownTextRecord() );
+ records.add( createBeginRecord() );
+ records.add( createFontIndexRecord( (short) 6 ) );
+ records.add( createDirectLinkRecord() );
+ records.add( createEndRecord() );
+
+ records.add( createAxisUsedRecord( (short) 1 ) );
+ createAxisRecords( records );
+
+ records.add( createEndRecord() );
+ records.add( createDimensionsRecord() );
+ records.add( createSeriesIndexRecord(2) );
+ records.add( createSeriesIndexRecord(1) );
+ records.add( createSeriesIndexRecord(3) );
+ records.add(EOFRecord.instance);
+
+
+
+ parentSheet.insertChartRecords( records );
+ workbook.insertChartRecord();
+ }
+
+ /**
+ * Returns all the charts for the given sheet.
+ *
+ * NOTE: You won't be able to do very much with
+ * these charts yet, as this is very limited support
+ */
+ public static HSSFChart[] getSheetCharts(HSSFSheet sheet) {
+ List<HSSFChart> charts = new ArrayList<>();
+ HSSFChart lastChart = null;
+ HSSFSeries lastSeries = null;
+ // Find records of interest
+ List<RecordBase> records = sheet.getSheet().getRecords();
+ for(RecordBase r : records) {
+
+ if(r instanceof ChartRecord) {
+ lastSeries = null;
+ lastChart = new HSSFChart(sheet,(ChartRecord)r);
+ charts.add(lastChart);
+ } else if (r instanceof LinkedDataRecord) {
+ LinkedDataRecord linkedDataRecord = (LinkedDataRecord) r;
+ if (lastSeries != null) {
+ lastSeries.insertData(linkedDataRecord);
+ }
+ }
+
+ if (lastChart == null) {
+ continue;
+ }
+
+ if (r instanceof LegendRecord) {
+ lastChart.legendRecord = (LegendRecord)r;
+ } else if(r instanceof SeriesRecord) {
+ HSSFSeries series = new HSSFSeries( (SeriesRecord)r );
+ lastChart.series.add(series);
+ lastSeries = series;
+ } else if(r instanceof ChartTitleFormatRecord) {
+ lastChart.chartTitleFormat = (ChartTitleFormatRecord)r;
+ } else if(r instanceof SeriesTextRecord) {
+ // Applies to a series, unless we've seen a legend already
+ SeriesTextRecord str = (SeriesTextRecord)r;
+ if(lastChart.legendRecord == null && lastChart.series.size() > 0) {
+ HSSFSeries series = lastChart.series.get(lastChart.series.size()-1);
+ series.seriesTitleText = str;
+ } else {
+ lastChart.chartTitleText = str;
+ }
+ } else if(r instanceof ValueRangeRecord){
+ lastChart.valueRanges.add((ValueRangeRecord)r);
+ } else if (r instanceof Record) {
+ Record record = (Record) r;
+ for (HSSFChartType type : HSSFChartType.values()) {
+ if (type == HSSFChartType.Unknown) {
+ continue;
+ }
+ if (record.getSid() == type.getSid()) {
+ lastChart.type = type;
+ break;
+ }
+ }
+ }
+ }
+
+ return charts.toArray( new HSSFChart[charts.size()] );
+ }
+
+ /** Get the X offset of the chart */
+ public int getChartX() { return chartRecord.getX(); }
+ /** Get the Y offset of the chart */
+ public int getChartY() { return chartRecord.getY(); }
+ /** Get the width of the chart. {@link ChartRecord} */
+ public int getChartWidth() { return chartRecord.getWidth(); }
+ /** Get the height of the chart. {@link ChartRecord} */
+ public int getChartHeight() { return chartRecord.getHeight(); }
+
+ /** Sets the X offset of the chart */
+ public void setChartX(int x) { chartRecord.setX(x); }
+ /** Sets the Y offset of the chart */
+ public void setChartY(int y) { chartRecord.setY(y); }
+ /** Sets the width of the chart. {@link ChartRecord} */
+ public void setChartWidth(int width) { chartRecord.setWidth(width); }
+ /** Sets the height of the chart. {@link ChartRecord} */
+ public void setChartHeight(int height) { chartRecord.setHeight(height); }
+
+ /**
+ * Returns the series of the chart
+ */
+ public HSSFSeries[] getSeries() {
+ return series.toArray(new HSSFSeries[series.size()]);
+ }
+
+ /**
+ * Returns the chart's title, if there is one,
+ * or null if not
+ */
+ public String getChartTitle() {
+ if(chartTitleText != null) {
+ return chartTitleText.getText();
+ }
+ return null;
+ }
+
+ /**
+ * Changes the chart's title, but only if there
+ * was one already.
+ * TODO - add in the records if not
+ */
+ public void setChartTitle(String title) {
+ if(chartTitleText != null) {
+ chartTitleText.setText(title);
+ } else {
+ throw new IllegalStateException("No chart title found to change");
+ }
+ }
+
+ /**
+ * Set value range (basic Axis Options)
+ * @param axisIndex 0 - primary axis, 1 - secondary axis
+ * @param minimum minimum value; Double.NaN - automatic; null - no change
+ * @param maximum maximum value; Double.NaN - automatic; null - no change
+ * @param majorUnit major unit value; Double.NaN - automatic; null - no change
+ * @param minorUnit minor unit value; Double.NaN - automatic; null - no change
+ */
+ public void setValueRange( int axisIndex, Double minimum, Double maximum, Double majorUnit, Double minorUnit){
+ ValueRangeRecord valueRange = valueRanges.get( axisIndex );
+ if( valueRange == null ) return;
+ if( minimum != null ){
+ valueRange.setAutomaticMinimum(minimum.isNaN());
+ valueRange.setMinimumAxisValue(minimum);
+ }
+ if( maximum != null ){
+ valueRange.setAutomaticMaximum(maximum.isNaN());
+ valueRange.setMaximumAxisValue(maximum);
+ }
+ if( majorUnit != null ){
+ valueRange.setAutomaticMajor(majorUnit.isNaN());
+ valueRange.setMajorIncrement(majorUnit);
+ }
+ if( minorUnit != null ){
+ valueRange.setAutomaticMinor(minorUnit.isNaN());
+ valueRange.setMinorIncrement(minorUnit);
+ }
+ }
+
+ private SeriesIndexRecord createSeriesIndexRecord( int index )
+ {
+ SeriesIndexRecord r = new SeriesIndexRecord();
+ r.setIndex((short)index);
+ return r;
+ }
+
+ private DimensionsRecord createDimensionsRecord()
+ {
+ DimensionsRecord r = new DimensionsRecord();
+ r.setFirstRow(0);
+ r.setLastRow(31);
+ r.setFirstCol((short)0);
+ r.setLastCol((short)1);
+ return r;
+ }
+
+ private HCenterRecord createHCenterRecord()
+ {
+ HCenterRecord r = new HCenterRecord();
+ r.setHCenter(false);
+ return r;
+ }
+
+ private VCenterRecord createVCenterRecord()
+ {
+ VCenterRecord r = new VCenterRecord();
+ r.setVCenter(false);
+ return r;
+ }
+
+ private PrintSetupRecord createPrintSetupRecord()
+ {
+ PrintSetupRecord r = new PrintSetupRecord();
+ r.setPaperSize((short)0);
+ r.setScale((short)18);
+ r.setPageStart((short)1);
+ r.setFitWidth((short)1);
+ r.setFitHeight((short)1);
+ r.setLeftToRight(false);
+ r.setLandscape(false);
+ r.setValidSettings(true);
+ r.setNoColor(false);
+ r.setDraft(false);
+ r.setNotes(false);
+ r.setNoOrientation(false);
+ r.setUsePage(false);
+ r.setHResolution((short)0);
+ r.setVResolution((short)0);
+ r.setHeaderMargin(0.5);
+ r.setFooterMargin(0.5);
+ r.setCopies((short)15); // what the ??
+ return r;
+ }
+
+ private FontBasisRecord createFontBasisRecord1()
+ {
+ FontBasisRecord r = new FontBasisRecord();
+ r.setXBasis((short)9120);
+ r.setYBasis((short)5640);
+ r.setHeightBasis((short)200);
+ r.setScale((short)0);
+ r.setIndexToFontTable((short)5);
+ return r;
+ }
+
+ private FontBasisRecord createFontBasisRecord2()
+ {
+ FontBasisRecord r = createFontBasisRecord1();
+ r.setIndexToFontTable((short)6);
+ return r;
+ }
+
+ private BOFRecord createBOFRecord()
+ {
+ BOFRecord r = new BOFRecord();
+ r.setVersion((short)600);
+ r.setType((short)20);
+ r.setBuild((short)0x1CFE);
+ r.setBuildYear((short)1997);
+ r.setHistoryBitMask(0x40C9);
+ r.setRequiredVersion(106);
+ return r;
+ }
+
+ private UnknownRecord createOBJRecord()
+ {
+ byte[] data = {
+ (byte) 0x15, (byte) 0x00, (byte) 0x12, (byte) 0x00, (byte) 0x05, (byte) 0x00, (byte) 0x02, (byte) 0x00, (byte) 0x11, (byte) 0x60, (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0xB8, (byte) 0x03,
+ (byte) 0x87, (byte) 0x03, (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x00,
+ };
+
+ return new UnknownRecord( (short) 0x005D, data );
+ }
+
+ private UnknownRecord createMSDrawingObjectRecord()
+ {
+ // Since we haven't created this object yet we'll just put in the raw
+ // form for the moment.
+
+ byte[] data = {
+ (byte)0x0F, (byte)0x00, (byte)0x02, (byte)0xF0, (byte)0xC0, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x10, (byte)0x00, (byte)0x08, (byte)0xF0, (byte)0x08, (byte)0x00, (byte)0x00, (byte)0x00,
+ (byte)0x02, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x02, (byte)0x04, (byte)0x00, (byte)0x00, (byte)0x0F, (byte)0x00, (byte)0x03, (byte)0xF0, (byte)0xA8, (byte)0x00, (byte)0x00, (byte)0x00,
+ (byte)0x0F, (byte)0x00, (byte)0x04, (byte)0xF0, (byte)0x28, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x01, (byte)0x00, (byte)0x09, (byte)0xF0, (byte)0x10, (byte)0x00, (byte)0x00, (byte)0x00,
+ (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00,
+ (byte)0x02, (byte)0x00, (byte)0x0A, (byte)0xF0, (byte)0x08, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x04, (byte)0x00, (byte)0x00, (byte)0x05, (byte)0x00, (byte)0x00, (byte)0x00,
+ (byte)0x0F, (byte)0x00, (byte)0x04, (byte)0xF0, (byte)0x70, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x92, (byte)0x0C, (byte)0x0A, (byte)0xF0, (byte)0x08, (byte)0x00, (byte)0x00, (byte)0x00,
+ (byte)0x02, (byte)0x04, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x0A, (byte)0x00, (byte)0x00, (byte)0x93, (byte)0x00, (byte)0x0B, (byte)0xF0, (byte)0x36, (byte)0x00, (byte)0x00, (byte)0x00,
+ (byte)0x7F, (byte)0x00, (byte)0x04, (byte)0x01, (byte)0x04, (byte)0x01, (byte)0xBF, (byte)0x00, (byte)0x08, (byte)0x00, (byte)0x08, (byte)0x00, (byte)0x81, (byte)0x01, (byte)0x4E, (byte)0x00,
+ (byte)0x00, (byte)0x08, (byte)0x83, (byte)0x01, (byte)0x4D, (byte)0x00, (byte)0x00, (byte)0x08, (byte)0xBF, (byte)0x01, (byte)0x10, (byte)0x00, (byte)0x11, (byte)0x00, (byte)0xC0, (byte)0x01,
+ (byte)0x4D, (byte)0x00, (byte)0x00, (byte)0x08, (byte)0xFF, (byte)0x01, (byte)0x08, (byte)0x00, (byte)0x08, (byte)0x00, (byte)0x3F, (byte)0x02, (byte)0x00, (byte)0x00, (byte)0x02, (byte)0x00,
+ (byte)0xBF, (byte)0x03, (byte)0x00, (byte)0x00, (byte)0x08, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x10, (byte)0xF0, (byte)0x12, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00,
+ (byte)0x04, (byte)0x00, (byte)0xC0, (byte)0x02, (byte)0x0A, (byte)0x00, (byte)0xF4, (byte)0x00, (byte)0x0E, (byte)0x00, (byte)0x66, (byte)0x01, (byte)0x20, (byte)0x00, (byte)0xE9, (byte)0x00,
+ (byte)0x00, (byte)0x00, (byte)0x11, (byte)0xF0, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00
+ };
+
+ return new UnknownRecord((short)0x00EC, data);
+ }
+
+ private void createAxisRecords( List<Record> records )
+ {
+ records.add( createAxisParentRecord() );
+ records.add( createBeginRecord() );
+ records.add( createAxisRecord( AxisRecord.AXIS_TYPE_CATEGORY_OR_X_AXIS ) );
+ records.add( createBeginRecord() );
+ records.add( createCategorySeriesAxisRecord() );
+ records.add( createAxisOptionsRecord() );
+ records.add( createTickRecord1() );
+ records.add( createEndRecord() );
+ records.add( createAxisRecord( AxisRecord.AXIS_TYPE_VALUE_AXIS ) );
+ records.add( createBeginRecord() );
+ records.add( createValueRangeRecord() );
+ records.add( createTickRecord2() );
+ records.add( createAxisLineFormatRecord( AxisLineFormatRecord.AXIS_TYPE_MAJOR_GRID_LINE ) );
+ records.add( createLineFormatRecord(false) );
+ records.add( createEndRecord() );
+ records.add( createPlotAreaRecord() );
+ records.add( createFrameRecord2() );
+ records.add( createBeginRecord() );
+ records.add( createLineFormatRecord2() );
+ records.add( createAreaFormatRecord2() );
+ records.add( createEndRecord() );
+ records.add( createChartFormatRecord() );
+ records.add( createBeginRecord() );
+ records.add( createBarRecord() );
+ // unknown 1022
+ records.add( createLegendRecord() );
+ records.add( createBeginRecord() );
+ // unknown 104f
+ records.add( createTextRecord() );
+ records.add( createBeginRecord() );
+ // unknown 104f
+ records.add( createLinkedDataRecord() );
+ records.add( createEndRecord() );
+ records.add( createEndRecord() );
+ records.add( createEndRecord() );
+ records.add( createEndRecord() );
+ }
+
+ private LinkedDataRecord createLinkedDataRecord()
+ {
+ LinkedDataRecord r = new LinkedDataRecord();
+ r.setLinkType(LinkedDataRecord.LINK_TYPE_TITLE_OR_TEXT);
+ r.setReferenceType(LinkedDataRecord.REFERENCE_TYPE_DIRECT);
+ r.setCustomNumberFormat(false);
+ r.setIndexNumberFmtRecord((short)0);
+ r.setFormulaOfLink(null);
+ return r;
+ }
+
+ private TextRecord createTextRecord()
+ {
+ TextRecord r = new TextRecord();
+ r.setHorizontalAlignment(TextRecord.HORIZONTAL_ALIGNMENT_CENTER);
+ r.setVerticalAlignment(TextRecord.VERTICAL_ALIGNMENT_CENTER);
+ r.setDisplayMode((short)1);
+ r.setRgbColor(0x00000000);
+ r.setX(-37);
+ r.setY(-60);
+ r.setWidth(0);
+ r.setHeight(0);
+ r.setAutoColor(true);
+ r.setShowKey(false);
+ r.setShowValue(false);
+ r.setVertical(false);
+ r.setAutoGeneratedText(true);
+ r.setGenerated(true);
+ r.setAutoLabelDeleted(false);
+ r.setAutoBackground(true);
+ r.setRotation((short)0);
+ r.setShowCategoryLabelAsPercentage(false);
+ r.setShowValueAsPercentage(false);
+ r.setShowBubbleSizes(false);
+ r.setShowLabel(false);
+ r.setIndexOfColorValue((short)77);
+ r.setDataLabelPlacement((short)0);
+ r.setTextRotation((short)0);
+ return r;
+ }
+
+ private LegendRecord createLegendRecord()
+ {
+ LegendRecord r = new LegendRecord();
+ r.setXAxisUpperLeft(3542);
+ r.setYAxisUpperLeft(1566);
+ r.setXSize(437);
+ r.setYSize(213);
+ r.setType(LegendRecord.TYPE_RIGHT);
+ r.setSpacing(LegendRecord.SPACING_MEDIUM);
+ r.setAutoPosition(true);
+ r.setAutoSeries(true);
+ r.setAutoXPositioning(true);
+ r.setAutoYPositioning(true);
+ r.setVertical(true);
+ r.setDataTable(false);
+ return r;
+ }
+
+ private BarRecord createBarRecord()
+ {
+ BarRecord r = new BarRecord();
+ r.setBarSpace((short)0);
+ r.setCategorySpace((short)150);
+ r.setHorizontal(false);
+ r.setStacked(false);
+ r.setDisplayAsPercentage(false);
+ r.setShadow(false);
+ return r;
+ }
+
+ private ChartFormatRecord createChartFormatRecord()
+ {
+ ChartFormatRecord r = new ChartFormatRecord();
+ r.setXPosition(0);
+ r.setYPosition(0);
+ r.setWidth(0);
+ r.setHeight(0);
+ r.setVaryDisplayPattern(false);
+ return r;
+ }
+
+ private PlotAreaRecord createPlotAreaRecord()
+ {
+ return new PlotAreaRecord( );
+ }
+
+ private AxisLineFormatRecord createAxisLineFormatRecord( short format )
+ {
+ AxisLineFormatRecord r = new AxisLineFormatRecord();
+ r.setAxisType( format );
+ return r;
+ }
+
+ private ValueRangeRecord createValueRangeRecord()
+ {
+ ValueRangeRecord r = new ValueRangeRecord();
+ r.setMinimumAxisValue( 0.0 );
+ r.setMaximumAxisValue( 0.0 );
+ r.setMajorIncrement( 0 );
+ r.setMinorIncrement( 0 );
+ r.setCategoryAxisCross( 0 );
+ r.setAutomaticMinimum( true );
+ r.setAutomaticMaximum( true );
+ r.setAutomaticMajor( true );
+ r.setAutomaticMinor( true );
+ r.setAutomaticCategoryCrossing( true );
+ r.setLogarithmicScale( false );
+ r.setValuesInReverse( false );
+ r.setCrossCategoryAxisAtMaximum( false );
+ r.setReserved( true ); // what's this do??
+ return r;
+ }
+
+ private TickRecord createTickRecord1()
+ {
+ TickRecord r = new TickRecord();
+ r.setMajorTickType( (byte) 2 );
+ r.setMinorTickType( (byte) 0 );
+ r.setLabelPosition( (byte) 3 );
+ r.setBackground( (byte) 1 );
+ r.setLabelColorRgb( 0 );
+ r.setZero1( (short) 0 );
+ r.setZero2( (short) 0 );
+ r.setZero3( (short) 45 );
+ r.setAutorotate( true );
+ r.setAutoTextBackground( true );
+ r.setRotation( (short) 0 );
+ r.setAutorotate( true );
+ r.setTickColor( (short) 77 );
+ return r;
+ }
+
+ private TickRecord createTickRecord2()
+ {
+ TickRecord r = createTickRecord1();
+ r.setZero3((short)0);
+ return r;
+ }
+
+ private AxisOptionsRecord createAxisOptionsRecord()
+ {
+ AxisOptionsRecord r = new AxisOptionsRecord();
+ r.setMinimumCategory( (short) -28644 );
+ r.setMaximumCategory( (short) -28715 );
+ r.setMajorUnitValue( (short) 2 );
+ r.setMajorUnit( (short) 0 );
+ r.setMinorUnitValue( (short) 1 );
+ r.setMinorUnit( (short) 0 );
+ r.setBaseUnit( (short) 0 );
+ r.setCrossingPoint( (short) -28644 );
+ r.setDefaultMinimum( true );
+ r.setDefaultMaximum( true );
+ r.setDefaultMajor( true );
+ r.setDefaultMinorUnit( true );
+ r.setIsDate( true );
+ r.setDefaultBase( true );
+ r.setDefaultCross( true );
+ r.setDefaultDateSettings( true );
+ return r;
+ }
+
+ private CategorySeriesAxisRecord createCategorySeriesAxisRecord()
+ {
+ CategorySeriesAxisRecord r = new CategorySeriesAxisRecord();
+ r.setCrossingPoint( (short) 1 );
+ r.setLabelFrequency( (short) 1 );
+ r.setTickMarkFrequency( (short) 1 );
+ r.setValueAxisCrossing( true );
+ r.setCrossesFarRight( false );
+ r.setReversed( false );
+ return r;
+ }
+
+ private AxisRecord createAxisRecord( short axisType )
+ {
+ AxisRecord r = new AxisRecord();
+ r.setAxisType( axisType );
+ return r;
+ }
+
+ private AxisParentRecord createAxisParentRecord()
+ {
+ AxisParentRecord r = new AxisParentRecord();
+ r.setAxisType( AxisParentRecord.AXIS_TYPE_MAIN );
+ r.setX( 479 );
+ r.setY( 221 );
+ r.setWidth( 2995 );
+ r.setHeight( 2902 );
+ return r;
+ }
+
+ private AxisUsedRecord createAxisUsedRecord( short numAxis )
+ {
+ AxisUsedRecord r = new AxisUsedRecord();
+ r.setNumAxis( numAxis );
+ return r;
+ }
+
+ private LinkedDataRecord createDirectLinkRecord()
+ {
+ LinkedDataRecord r = new LinkedDataRecord();
+ r.setLinkType( LinkedDataRecord.LINK_TYPE_TITLE_OR_TEXT );
+ r.setReferenceType( LinkedDataRecord.REFERENCE_TYPE_DIRECT );
+ r.setCustomNumberFormat( false );
+ r.setIndexNumberFmtRecord( (short) 0 );
+ r.setFormulaOfLink(null);
+ return r;
+ }
+
+ private FontIndexRecord createFontIndexRecord( int index )
+ {
+ FontIndexRecord r = new FontIndexRecord();
+ r.setFontIndex( (short) index );
+ return r;
+ }
+
+ private TextRecord createAllTextRecord()
+ {
+ TextRecord r = new TextRecord();
+ r.setHorizontalAlignment( TextRecord.HORIZONTAL_ALIGNMENT_CENTER );
+ r.setVerticalAlignment( TextRecord.VERTICAL_ALIGNMENT_CENTER );
+ r.setDisplayMode( TextRecord.DISPLAY_MODE_TRANSPARENT );
+ r.setRgbColor( 0 );
+ r.setX( -37 );
+ r.setY( -60 );
+ r.setWidth( 0 );
+ r.setHeight( 0 );
+ r.setAutoColor( true );
+ r.setShowKey( false );
+ r.setShowValue( true );
+ r.setVertical( false );
+ r.setAutoGeneratedText( true );
+ r.setGenerated( true );
+ r.setAutoLabelDeleted( false );
+ r.setAutoBackground( true );
+ r.setRotation( (short) 0 );
+ r.setShowCategoryLabelAsPercentage( false );
+ r.setShowValueAsPercentage( false );
+ r.setShowBubbleSizes( false );
+ r.setShowLabel( false );
+ r.setIndexOfColorValue( (short) 77 );
+ r.setDataLabelPlacement( (short) 0 );
+ r.setTextRotation( (short) 0 );
+ return r;
+ }
+
+ private TextRecord createUnknownTextRecord()
+ {
+ TextRecord r = new TextRecord();
+ r.setHorizontalAlignment( TextRecord.HORIZONTAL_ALIGNMENT_CENTER );
+ r.setVerticalAlignment( TextRecord.VERTICAL_ALIGNMENT_CENTER );
+ r.setDisplayMode( TextRecord.DISPLAY_MODE_TRANSPARENT );
+ r.setRgbColor( 0 );
+ r.setX( -37 );
+ r.setY( -60 );
+ r.setWidth( 0 );
+ r.setHeight( 0 );
+ r.setAutoColor( true );
+ r.setShowKey( false );
+ r.setShowValue( false );
+ r.setVertical( false );
+ r.setAutoGeneratedText( true );
+ r.setGenerated( true );
+ r.setAutoLabelDeleted( false );
+ r.setAutoBackground( true );
+ r.setRotation( (short) 0 );
+ r.setShowCategoryLabelAsPercentage( false );
+ r.setShowValueAsPercentage( false );
+ r.setShowBubbleSizes( false );
+ r.setShowLabel( false );
+ r.setIndexOfColorValue( (short) 77 );
+ r.setDataLabelPlacement( (short) 11088 );
+ r.setTextRotation( (short) 0 );
+ return r;
+ }
+
+ private DefaultDataLabelTextPropertiesRecord createDefaultTextRecord( short categoryDataType )
+ {
+ DefaultDataLabelTextPropertiesRecord r = new DefaultDataLabelTextPropertiesRecord();
+ r.setCategoryDataType( categoryDataType );
+ return r;
+ }
+
+ private SheetPropertiesRecord createSheetPropsRecord()
+ {
+ SheetPropertiesRecord r = new SheetPropertiesRecord();
+ r.setChartTypeManuallyFormatted( false );
+ r.setPlotVisibleOnly( true );
+ r.setDoNotSizeWithWindow( false );
+ r.setDefaultPlotDimensions( true );
+ r.setAutoPlotArea( false );
+ return r;
+ }
+
+ private SeriesToChartGroupRecord createSeriesToChartGroupRecord()
+ {
+ return new SeriesToChartGroupRecord();
+ }
+
+ private DataFormatRecord createDataFormatRecord()
+ {
+ DataFormatRecord r = new DataFormatRecord();
+ r.setPointNumber( (short) -1 );
+ r.setSeriesIndex( (short) 0 );
+ r.setSeriesNumber( (short) 0 );
+ r.setUseExcel4Colors( false );
+ return r;
+ }
+
+ private LinkedDataRecord createCategoriesLinkedDataRecord()
+ {
+ LinkedDataRecord r = new LinkedDataRecord();
+ r.setLinkType( LinkedDataRecord.LINK_TYPE_CATEGORIES );
+ r.setReferenceType( LinkedDataRecord.REFERENCE_TYPE_WORKSHEET );
+ r.setCustomNumberFormat( false );
+ r.setIndexNumberFmtRecord( (short) 0 );
+ Area3DPtg p = new Area3DPtg(0, 31, 1, 1,
+ false, false, false, false, 0);
+ r.setFormulaOfLink(new Ptg[] { p, });
+ return r;
+ }
+
+ private LinkedDataRecord createValuesLinkedDataRecord()
+ {
+ LinkedDataRecord r = new LinkedDataRecord();
+ r.setLinkType( LinkedDataRecord.LINK_TYPE_VALUES );
+ r.setReferenceType( LinkedDataRecord.REFERENCE_TYPE_WORKSHEET );
+ r.setCustomNumberFormat( false );
+ r.setIndexNumberFmtRecord( (short) 0 );
+ Area3DPtg p = new Area3DPtg(0, 31, 0, 0,
+ false, false, false, false, 0);
+ r.setFormulaOfLink(new Ptg[] { p, });
+ return r;
+ }
+
+ private LinkedDataRecord createTitleLinkedDataRecord()
+ {
+ LinkedDataRecord r = new LinkedDataRecord();
+ r.setLinkType( LinkedDataRecord.LINK_TYPE_TITLE_OR_TEXT );
+ r.setReferenceType( LinkedDataRecord.REFERENCE_TYPE_DIRECT );
+ r.setCustomNumberFormat( false );
+ r.setIndexNumberFmtRecord( (short) 0 );
+ r.setFormulaOfLink(null);
+ return r;
+ }
+
+ private SeriesRecord createSeriesRecord()
+ {
+ SeriesRecord r = new SeriesRecord();
+ r.setCategoryDataType( SeriesRecord.CATEGORY_DATA_TYPE_NUMERIC );
+ r.setValuesDataType( SeriesRecord.VALUES_DATA_TYPE_NUMERIC );
+ r.setNumCategories( (short) 32 );
+ r.setNumValues( (short) 31 );
+ r.setBubbleSeriesType( SeriesRecord.BUBBLE_SERIES_TYPE_NUMERIC );
+ r.setNumBubbleValues( (short) 0 );
+ return r;
+ }
+
+ private EndRecord createEndRecord()
+ {
+ return new EndRecord();
+ }
+
+ private AreaFormatRecord createAreaFormatRecord1()
+ {
+ AreaFormatRecord r = new AreaFormatRecord();
+ r.setForegroundColor( 16777215 ); // RGB Color
+ r.setBackgroundColor( 0 ); // RGB Color
+ r.setPattern( (short) 1 ); // TODO: Add Pattern constants to record
+ r.setAutomatic( true );
+ r.setInvert( false );
+ r.setForecolorIndex( (short) 78 );
+ r.setBackcolorIndex( (short) 77 );
+ return r;
+ }
+
+ private AreaFormatRecord createAreaFormatRecord2()
+ {
+ AreaFormatRecord r = new AreaFormatRecord();
+ r.setForegroundColor(0x00c0c0c0);
+ r.setBackgroundColor(0x00000000);
+ r.setPattern((short)1);
+ r.setAutomatic(false);
+ r.setInvert(false);
+ r.setForecolorIndex((short)22);
+ r.setBackcolorIndex((short)79);
+ return r;
+ }
+
+ private LineFormatRecord createLineFormatRecord( boolean drawTicks )
+ {
+ LineFormatRecord r = new LineFormatRecord();
+ r.setLineColor( 0 );
+ r.setLinePattern( LineFormatRecord.LINE_PATTERN_SOLID );
+ r.setWeight( (short) -1 );
+ r.setAuto( true );
+ r.setDrawTicks( drawTicks );
+ r.setColourPaletteIndex( (short) 77 ); // what colour is this?
+ return r;
+ }
+
+ private LineFormatRecord createLineFormatRecord2()
+ {
+ LineFormatRecord r = new LineFormatRecord();
+ r.setLineColor( 0x00808080 );
+ r.setLinePattern( (short) 0 );
+ r.setWeight( (short) 0 );
+ r.setAuto( false );
+ r.setDrawTicks( false );
+ r.setUnknown( false );
+ r.setColourPaletteIndex( (short) 23 );
+ return r;
+ }
+
+ private FrameRecord createFrameRecord1()
+ {
+ FrameRecord r = new FrameRecord();
+ r.setBorderType( FrameRecord.BORDER_TYPE_REGULAR );
+ r.setAutoSize( false );
+ r.setAutoPosition( true );
+ return r;
+ }
+
+ private FrameRecord createFrameRecord2()
+ {
+ FrameRecord r = new FrameRecord();
+ r.setBorderType( FrameRecord.BORDER_TYPE_REGULAR );
+ r.setAutoSize( true );
+ r.setAutoPosition( true );
+ return r;
+ }
+
+ private PlotGrowthRecord createPlotGrowthRecord( int horizScale, int vertScale )
+ {
+ PlotGrowthRecord r = new PlotGrowthRecord();
+ r.setHorizontalScale( horizScale );
+ r.setVerticalScale( vertScale );
+ return r;
+ }
+
+ private SCLRecord createSCLRecord( short numerator, short denominator )
+ {
+ SCLRecord r = new SCLRecord();
+ r.setDenominator( denominator );
+ r.setNumerator( numerator );
+ return r;
+ }
+
+ private BeginRecord createBeginRecord()
+ {
+ return new BeginRecord();
+ }
+
+ private ChartRecord createChartRecord( int x, int y, int width, int height )
+ {
+ ChartRecord r = new ChartRecord();
+ r.setX( x );
+ r.setY( y );
+ r.setWidth( width );
+ r.setHeight( height );
+ return r;
+ }
+
+ private UnitsRecord createUnitsRecord()
+ {
+ UnitsRecord r = new UnitsRecord();
+ r.setUnits( (short) 0 );
+ return r;
+ }
+
+
+ /**
+ * A series in a chart
+ */
+ public static class HSSFSeries {
+ private SeriesRecord series;
+ private SeriesTextRecord seriesTitleText;
+ private LinkedDataRecord dataName;
+ private LinkedDataRecord dataValues;
+ private LinkedDataRecord dataCategoryLabels;
+ private LinkedDataRecord dataSecondaryCategoryLabels;
+
+ /* package */ HSSFSeries(SeriesRecord series) {
+ this.series = series;
+ }
+
+ /* package */ void insertData(LinkedDataRecord data){
+ switch(data.getLinkType()){
+
+ case LinkedDataRecord.LINK_TYPE_TITLE_OR_TEXT:
+ dataName = data;
+ break;
+ case LinkedDataRecord.LINK_TYPE_VALUES:
+ dataValues = data;
+ break;
+ case LinkedDataRecord.LINK_TYPE_CATEGORIES:
+ dataCategoryLabels = data;
+ break;
+ case LinkedDataRecord.LINK_TYPE_SECONDARY_CATEGORIES:
+ dataSecondaryCategoryLabels = data;
+ break;
+ default:
+ throw new IllegalStateException("Invalid link type: " + data.getLinkType());
+ }
+ }
+
+ /* package */ void setSeriesTitleText(SeriesTextRecord seriesTitleText)
+ {
+ this.seriesTitleText = seriesTitleText;
+ }
+
+ public short getNumValues() {
+ return series.getNumValues();
+ }
+ /**
+ * See {@link SeriesRecord}
+ */
+ public short getValueType() {
+ return series.getValuesDataType();
+ }
+
+ /**
+ * Returns the series' title, if there is one,
+ * or null if not
+ */
+ public String getSeriesTitle() {
+ if(seriesTitleText != null) {
+ return seriesTitleText.getText();
+ }
+ return null;
+ }
+
+ /**
+ * Changes the series' title, but only if there
+ * was one already.
+ * TODO - add in the records if not
+ */
+ public void setSeriesTitle(String title) {
+ if(seriesTitleText != null) {
+ seriesTitleText.setText(title);
+ } else {
+ throw new IllegalStateException("No series title found to change");
+ }
+ }
+
+ /**
+ * @return record with data names
+ */
+ public LinkedDataRecord getDataName(){
+ return dataName;
+ }
+
+ /**
+ * @return record with data values
+ */
+ public LinkedDataRecord getDataValues(){
+ return dataValues;
+ }
+
+ /**
+ * @return record with data category labels
+ */
+ public LinkedDataRecord getDataCategoryLabels(){
+ return dataCategoryLabels;
+ }
+
+ /**
+ * @return record with data secondary category labels
+ */
+ public LinkedDataRecord getDataSecondaryCategoryLabels() {
+ return dataSecondaryCategoryLabels;
+ }
+
+ /**
+ * @return record with series
+ */
+ public SeriesRecord getSeries() {
+ return series;
+ }
+
+ private CellRangeAddressBase getCellRange(LinkedDataRecord linkedDataRecord) {
+ if (linkedDataRecord == null)
+ {
+ return null ;
+ }
+
+ int firstRow = 0;
+ int lastRow = 0;
+ int firstCol = 0;
+ int lastCol = 0;
+
+ for (Ptg ptg : linkedDataRecord.getFormulaOfLink()) {
+ if (ptg instanceof AreaPtgBase) {
+ AreaPtgBase areaPtg = (AreaPtgBase) ptg;
+
+ firstRow = areaPtg.getFirstRow();
+ lastRow = areaPtg.getLastRow();
+
+ firstCol = areaPtg.getFirstColumn();
+ lastCol = areaPtg.getLastColumn();
+ }
+ }
+
+ return new CellRangeAddress(firstRow, lastRow, firstCol, lastCol);
+ }
+
+ public CellRangeAddressBase getValuesCellRange() {
+ return getCellRange(dataValues);
+ }
+
+ public CellRangeAddressBase getCategoryLabelsCellRange() {
+ return getCellRange(dataCategoryLabels);
+ }
+
+ private Integer setVerticalCellRange(LinkedDataRecord linkedDataRecord,
+ CellRangeAddressBase range) {
+ if (linkedDataRecord == null)
+ {
+ return null;
+ }
+
+ List<Ptg> ptgList = new ArrayList<>();
+
+ int rowCount = (range.getLastRow() - range.getFirstRow()) + 1;
+ int colCount = (range.getLastColumn() - range.getFirstColumn()) + 1;
+
+ for (Ptg ptg : linkedDataRecord.getFormulaOfLink()) {
+ if (ptg instanceof AreaPtgBase) {
+ AreaPtgBase areaPtg = (AreaPtgBase) ptg;
+
+ areaPtg.setFirstRow(range.getFirstRow());
+ areaPtg.setLastRow(range.getLastRow());
+
+ areaPtg.setFirstColumn(range.getFirstColumn());
+ areaPtg.setLastColumn(range.getLastColumn());
+ ptgList.add(areaPtg);
+ }
+ }
+
+ linkedDataRecord.setFormulaOfLink(ptgList.toArray(new Ptg[ptgList.size()]));
+
+ return rowCount * colCount;
+ }
+
+ public void setValuesCellRange(CellRangeAddressBase range) {
+ Integer count = setVerticalCellRange(dataValues, range);
+ if (count == null)
+ {
+ return;
+ }
+
+ series.setNumValues((short)(int)count);
+ }
+
+ public void setCategoryLabelsCellRange(CellRangeAddressBase range) {
+ Integer count = setVerticalCellRange(dataCategoryLabels, range);
+ if (count == null)
+ {
+ return;
+ }
+
+ series.setNumCategories((short)(int)count);
+ }
+ }
+
+ public HSSFSeries createSeries() throws Exception {
+ ArrayList<RecordBase> seriesTemplate = new ArrayList<>();
+ boolean seriesTemplateFilled = false;
+
+ int idx = 0;
+ int deep = 0;
+ int chartRecordIdx = -1;
+ int chartDeep = -1;
+ int lastSeriesDeep = -1;
+ int endSeriesRecordIdx = -1;
+ int seriesIdx = 0;
+ final List<RecordBase> records = sheet.getSheet().getRecords();
+
+ /* store first series as template and find last series index */
+ for(final RecordBase record : records) {
+
+ idx++;
+
+ if (record instanceof BeginRecord) {
+ deep++;
+ } else if (record instanceof EndRecord) {
+ deep--;
+
+ if (lastSeriesDeep == deep) {
+ lastSeriesDeep = -1;
+ endSeriesRecordIdx = idx;
+ if (!seriesTemplateFilled) {
+ seriesTemplate.add(record);
+ seriesTemplateFilled = true;
+ }
+ }
+
+ if (chartDeep == deep) {
+ break;
+ }
+ }
+
+ if (record instanceof ChartRecord) {
+ if (record == chartRecord) {
+ chartRecordIdx = idx;
+ chartDeep = deep;
+ }
+ } else if (record instanceof SeriesRecord) {
+ if (chartRecordIdx != -1) {
+ seriesIdx++;
+ lastSeriesDeep = deep;
+ }
+ }
+
+ if (lastSeriesDeep != -1 && !seriesTemplateFilled) {
+ seriesTemplate.add(record) ;
+ }
+ }
+
+ /* check if a series was found */
+ if (endSeriesRecordIdx == -1) {
+ return null;
+ }
+
+ /* next index in the records list where the new series can be inserted */
+ idx = endSeriesRecordIdx + 1;
+
+ HSSFSeries newSeries = null;
+
+ /* duplicate record of the template series */
+ ArrayList<RecordBase> clonedRecords = new ArrayList<>();
+ for(final RecordBase record : seriesTemplate) {
+
+ Record newRecord = null;
+
+ if (record instanceof BeginRecord) {
+ newRecord = new BeginRecord();
+ } else if (record instanceof EndRecord) {
+ newRecord = new EndRecord();
+ } else if (record instanceof SeriesRecord) {
+ SeriesRecord seriesRecord = (SeriesRecord) ((SeriesRecord)record).clone();
+ newSeries = new HSSFSeries(seriesRecord);
+ newRecord = seriesRecord;
+ } else if (record instanceof LinkedDataRecord) {
+ LinkedDataRecord linkedDataRecord = ((LinkedDataRecord)record).clone();
+ if (newSeries != null) {
+ newSeries.insertData(linkedDataRecord);
+ }
+ newRecord = linkedDataRecord;
+ } else if (record instanceof DataFormatRecord) {
+ DataFormatRecord dataFormatRecord = ((DataFormatRecord)record).clone();
+
+ dataFormatRecord.setSeriesIndex((short)seriesIdx) ;
+ dataFormatRecord.setSeriesNumber((short)seriesIdx) ;
+
+ newRecord = dataFormatRecord;
+ } else if (record instanceof SeriesTextRecord) {
+ SeriesTextRecord seriesTextRecord = (SeriesTextRecord) ((SeriesTextRecord)record).clone();
+ if (newSeries != null) {
+ newSeries.setSeriesTitleText(seriesTextRecord);
+ }
+ newRecord = seriesTextRecord;
+ } else if (record instanceof Record) {
+ newRecord = (Record) ((Record)record).clone();
+ }
+
+ if (newRecord != null)
+ {
+ clonedRecords.add(newRecord);
+ }
+ }
+
+ /* check if a user model series object was created */
+ if (newSeries == null)
+ {
+ return null;
+ }
+
+ /* transfer series to record list */
+ for(final RecordBase record : clonedRecords) {
+ records.add(idx++, record);
+ }
+
+ return newSeries;
+ }
+
+ public boolean removeSeries(HSSFSeries remSeries) {
+ int deep = 0;
+ int chartDeep = -1;
+ int lastSeriesDeep = -1;
+ int seriesIdx = -1;
+ boolean removeSeries = false;
+ boolean chartEntered = false;
+ boolean result = false;
+ final List<RecordBase> records = sheet.getSheet().getRecords();
+
+ /* store first series as template and find last series index */
+ Iterator<RecordBase> iter = records.iterator();
+ while (iter.hasNext()) {
+ RecordBase record = iter.next();
+
+ if (record instanceof BeginRecord) {
+ deep++;
+ } else if (record instanceof EndRecord) {
+ deep--;
+
+ if (lastSeriesDeep == deep) {
+ lastSeriesDeep = -1;
+
+ if (removeSeries) {
+ removeSeries = false;
+ result = true;
+ iter.remove();
+ }
+ }
+
+ if (chartDeep == deep) {
+ break;
+ }
+ }
+
+ if (record instanceof ChartRecord) {
+ if (record == chartRecord) {
+ chartDeep = deep;
+ chartEntered = true;
+ }
+ } else if (record instanceof SeriesRecord) {
+ if (chartEntered) {
+ if (remSeries.series == record) {
+ lastSeriesDeep = deep;
+ removeSeries = true;
+ } else {
+ seriesIdx++;
+ }
+ }
+ } else if (record instanceof DataFormatRecord) {
+ if (chartEntered && !removeSeries) {
+ DataFormatRecord dataFormatRecord = (DataFormatRecord) record;
+ dataFormatRecord.setSeriesIndex((short) seriesIdx);
+ dataFormatRecord.setSeriesNumber((short) seriesIdx);
+ }
+ }
+
+ if (removeSeries) {
+ iter.remove();
+ }
+ }
+
+ return result;
+ }
+
+ public HSSFChartType getType() {
+ return type;
+ }
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.ss.extractor;
+
+import org.apache.poi.ss.usermodel.Shape;
+
+/**
+ * A collection of embedded object informations and content
+ */
+public class EmbeddedData {
+ private String filename;
+ private byte[] embeddedData;
+ private Shape shape;
+ private String contentType = "binary/octet-stream";
+
+ public EmbeddedData(String filename, byte[] embeddedData, String contentType) {
+ setFilename(filename);
+ setEmbeddedData(embeddedData);
+ setContentType(contentType);
+ }
+
+ /**
+ * @return the filename
+ */
+ public String getFilename() {
+ return filename;
+ }
+
+ /**
+ * Sets the filename
+ *
+ * @param filename the filename
+ */
+ public void setFilename(String filename) {
+ if (filename == null) {
+ this.filename = "unknown.bin";
+ } else {
+ this.filename = filename.replaceAll("[^/\\\\]*[/\\\\]", "").trim();
+ }
+ }
+
+ /**
+ * @return the embedded object byte array
+ */
+ public byte[] getEmbeddedData() {
+ return embeddedData;
+ }
+
+ /**
+ * Sets the embedded object as byte array
+ *
+ * @param embeddedData the embedded object byte array
+ */
+ public void setEmbeddedData(byte[] embeddedData) {
+ this.embeddedData = (embeddedData == null) ? null : embeddedData.clone();
+ }
+
+ /**
+ * @return the shape which links to the embedded object
+ */
+ public Shape getShape() {
+ return shape;
+ }
+
+ /**
+ * Sets the shape which links to the embedded object
+ *
+ * @param shape the shape
+ */
+ public void setShape(Shape shape) {
+ this.shape = shape;
+ }
+
+ /**
+ * @return the content-/mime-type of the embedded object, the default (if unknown) is {@code binary/octet-stream}
+ */
+ public String getContentType() {
+ return contentType;
+ }
+
+ /**
+ * Sets the content-/mime-type
+ *
+ * @param contentType the content-type
+ */
+ public void setContentType(String contentType) {
+ this.contentType = contentType;
+ }
+}
\ No newline at end of file
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.ss.extractor;
+
+import static org.apache.poi.util.StringUtil.endsWithIgnoreCase;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.poi.hpsf.ClassID;
+import org.apache.poi.hpsf.ClassIDPredefined;
+import org.apache.poi.poifs.filesystem.DirectoryNode;
+import org.apache.poi.poifs.filesystem.DocumentInputStream;
+import org.apache.poi.poifs.filesystem.Entry;
+import org.apache.poi.poifs.filesystem.Ole10Native;
+import org.apache.poi.poifs.filesystem.Ole10NativeException;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.ss.usermodel.Drawing;
+import org.apache.poi.ss.usermodel.ObjectData;
+import org.apache.poi.ss.usermodel.Picture;
+import org.apache.poi.ss.usermodel.PictureData;
+import org.apache.poi.ss.usermodel.Shape;
+import org.apache.poi.ss.usermodel.ShapeContainer;
+import org.apache.poi.ss.usermodel.Sheet;
+import org.apache.poi.ss.usermodel.Workbook;
+import org.apache.poi.util.Beta;
+import org.apache.poi.util.IOUtils;
+import org.apache.poi.util.LocaleUtil;
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
+
+/**
+ * This extractor class tries to identify various embedded documents within Excel files
+ * and provide them via a common interface, i.e. the EmbeddedData instances
+ */
+@Beta
+public class EmbeddedExtractor implements Iterable<EmbeddedExtractor> {
+ private static final POILogger LOG = POILogFactory.getLogger(EmbeddedExtractor.class);
+ //arbitrarily selected; may need to increase
+ private static final int MAX_RECORD_LENGTH = 1_000_000;
+
+ // contentType
+ private static final String CONTENT_TYPE_BYTES = "binary/octet-stream";
+ private static final String CONTENT_TYPE_PDF = "application/pdf";
+ private static final String CONTENT_TYPE_DOC = "application/msword";
+ private static final String CONTENT_TYPE_XLS = "application/vnd.ms-excel";
+
+ /**
+ * @return the list of known extractors, if you provide custom extractors, override this method
+ */
+ @Override
+ public Iterator<EmbeddedExtractor> iterator() {
+ EmbeddedExtractor[] ee = {
+ new Ole10Extractor(), new PdfExtractor(), new BiffExtractor(), new OOXMLExtractor(), new FsExtractor()
+ };
+ return Arrays.asList(ee).iterator();
+ }
+
+ public EmbeddedData extractOne(DirectoryNode src) throws IOException {
+ for (EmbeddedExtractor ee : this) {
+ if (ee.canExtract(src)) {
+ return ee.extract(src);
+ }
+ }
+ return null;
+ }
+
+ public EmbeddedData extractOne(Picture src) throws IOException {
+ for (EmbeddedExtractor ee : this) {
+ if (ee.canExtract(src)) {
+ return ee.extract(src);
+ }
+ }
+ return null;
+ }
+
+ public List<EmbeddedData> extractAll(Sheet sheet) throws IOException {
+ Drawing<?> patriarch = sheet.getDrawingPatriarch();
+ if (null == patriarch){
+ return Collections.emptyList();
+ }
+ List<EmbeddedData> embeddings = new ArrayList<>();
+ extractAll(patriarch, embeddings);
+ return embeddings;
+ }
+
+ protected void extractAll(ShapeContainer<?> parent, List<EmbeddedData> embeddings) throws IOException {
+ for (Shape shape : parent) {
+ EmbeddedData data = null;
+ if (shape instanceof ObjectData) {
+ ObjectData od = (ObjectData)shape;
+ try {
+ if (od.hasDirectoryEntry()) {
+ data = extractOne((DirectoryNode)od.getDirectory());
+ } else {
+ data = new EmbeddedData(od.getFileName(), od.getObjectData(), od.getContentType());
+ }
+ } catch (Exception e) {
+ LOG.log(POILogger.WARN, "Entry not found / readable - ignoring OLE embedding", e);
+ }
+ } else if (shape instanceof Picture) {
+ data = extractOne((Picture)shape);
+ } else if (shape instanceof ShapeContainer) {
+ extractAll((ShapeContainer<?>)shape, embeddings);
+ }
+
+ if (data == null) {
+ continue;
+ }
+
+ data.setShape(shape);
+ String filename = data.getFilename();
+ String extension = (filename == null || filename.lastIndexOf('.') == -1) ? ".bin" : filename.substring(filename.lastIndexOf('.'));
+
+ // try to find an alternative name
+ if (filename == null || filename.isEmpty() || filename.startsWith("MBD") || filename.startsWith("Root Entry")) {
+ filename = shape.getShapeName();
+ if (filename != null) {
+ filename += extension;
+ }
+ }
+ // default to dummy name
+ if (filename == null || filename.isEmpty()) {
+ filename = "picture_" + embeddings.size() + extension;
+ }
+ filename = filename.trim();
+ data.setFilename(filename);
+
+ embeddings.add(data);
+ }
+ }
+
+
+ public boolean canExtract(DirectoryNode source) {
+ return false;
+ }
+
+ public boolean canExtract(Picture source) {
+ return false;
+ }
+
+ protected EmbeddedData extract(DirectoryNode dn) throws IOException {
+ assert(canExtract(dn));
+ ByteArrayOutputStream bos = new ByteArrayOutputStream(20000);
+ try (POIFSFileSystem dest = new POIFSFileSystem()) {
+ copyNodes(dn, dest.getRoot());
+ // start with a reasonable big size
+ dest.writeFilesystem(bos);
+ }
+
+ return new EmbeddedData(dn.getName(), bos.toByteArray(), CONTENT_TYPE_BYTES);
+ }
+
+ protected EmbeddedData extract(Picture source) throws IOException {
+ return null;
+ }
+
+ public static class Ole10Extractor extends EmbeddedExtractor {
+ @Override
+ public boolean canExtract(DirectoryNode dn) {
+ ClassID clsId = dn.getStorageClsid();
+ return ClassIDPredefined.lookup(clsId) == ClassIDPredefined.OLE_V1_PACKAGE;
+ }
+
+ @Override
+ public EmbeddedData extract(DirectoryNode dn) throws IOException {
+ try {
+ // TODO: inspect the CompObj record for more details, i.e. the content type
+ Ole10Native ole10 = Ole10Native.createFromEmbeddedOleObject(dn);
+ return new EmbeddedData(ole10.getFileName(), ole10.getDataBuffer(), CONTENT_TYPE_BYTES);
+ } catch (Ole10NativeException e) {
+ throw new IOException(e);
+ }
+ }
+ }
+
+ static class PdfExtractor extends EmbeddedExtractor {
+ static ClassID PdfClassID = new ClassID("{B801CA65-A1FC-11D0-85AD-444553540000}");
+ @Override
+ public boolean canExtract(DirectoryNode dn) {
+ ClassID clsId = dn.getStorageClsid();
+ return (PdfClassID.equals(clsId) || dn.hasEntry("CONTENTS"));
+ }
+
+ @Override
+ public EmbeddedData extract(DirectoryNode dn) throws IOException {
+ try(ByteArrayOutputStream bos = new ByteArrayOutputStream();
+ InputStream is = dn.createDocumentInputStream("CONTENTS")) {
+ IOUtils.copy(is, bos);
+ return new EmbeddedData(dn.getName() + ".pdf", bos.toByteArray(), CONTENT_TYPE_PDF);
+ }
+ }
+
+ @Override
+ public boolean canExtract(Picture source) {
+ PictureData pd = source.getPictureData();
+ return (pd != null && pd.getPictureType() == Workbook.PICTURE_TYPE_EMF);
+ }
+
+ /**
+ * Mac Office encodes embedded objects inside the picture, e.g. PDF is part of an EMF.
+ * If an embedded stream is inside an EMF picture, this method extracts the payload.
+ *
+ * @return the embedded data in an EMF picture or null if none is found
+ */
+ @Override
+ protected EmbeddedData extract(Picture source) throws IOException {
+ // check for emf+ embedded pdf (poor mans style :( )
+ // Mac Excel 2011 embeds pdf files with this method.
+ PictureData pd = source.getPictureData();
+ if (pd == null || pd.getPictureType() != Workbook.PICTURE_TYPE_EMF) {
+ return null;
+ }
+
+ // TODO: investigate if this is just an EMF-hack or if other formats are also embedded in EMF
+ byte pictureBytes[] = pd.getData();
+ int idxStart = indexOf(pictureBytes, 0, "%PDF-".getBytes(LocaleUtil.CHARSET_1252));
+ if (idxStart == -1) {
+ return null;
+ }
+
+ int idxEnd = indexOf(pictureBytes, idxStart, "%%EOF".getBytes(LocaleUtil.CHARSET_1252));
+ if (idxEnd == -1) {
+ return null;
+ }
+
+ int pictureBytesLen = idxEnd-idxStart+6;
+ byte[] pdfBytes = IOUtils.safelyAllocate(pictureBytesLen, MAX_RECORD_LENGTH);
+ System.arraycopy(pictureBytes, idxStart, pdfBytes, 0, pictureBytesLen);
+ String filename = source.getShapeName().trim();
+ if (!endsWithIgnoreCase(filename, ".pdf")) {
+ filename += ".pdf";
+ }
+ return new EmbeddedData(filename, pdfBytes, CONTENT_TYPE_PDF);
+ }
+
+
+ }
+
+ static class OOXMLExtractor extends EmbeddedExtractor {
+ @Override
+ public boolean canExtract(DirectoryNode dn) {
+ return dn.hasEntry("package");
+ }
+
+ @Override
+ public EmbeddedData extract(DirectoryNode dn) throws IOException {
+
+ ClassIDPredefined clsId = ClassIDPredefined.lookup(dn.getStorageClsid());
+
+ String contentType = null;
+ String ext = null;
+
+ if (clsId != null) {
+ contentType = clsId.getContentType();
+ ext = clsId.getFileExtension();
+ }
+
+ if (contentType == null || ext == null) {
+ contentType = "application/zip";
+ ext = ".zip";
+ }
+
+ DocumentInputStream dis = dn.createDocumentInputStream("package");
+ byte data[] = IOUtils.toByteArray(dis);
+ dis.close();
+
+ return new EmbeddedData(dn.getName()+ext, data, contentType);
+ }
+ }
+
+ static class BiffExtractor extends EmbeddedExtractor {
+ @Override
+ public boolean canExtract(DirectoryNode dn) {
+ return canExtractExcel(dn) || canExtractWord(dn);
+ }
+
+ protected boolean canExtractExcel(DirectoryNode dn) {
+ ClassIDPredefined clsId = ClassIDPredefined.lookup(dn.getStorageClsid());
+ return (ClassIDPredefined.EXCEL_V7 == clsId
+ || ClassIDPredefined.EXCEL_V8 == clsId
+ || dn.hasEntry("Workbook") /*...*/);
+ }
+
+ protected boolean canExtractWord(DirectoryNode dn) {
+ ClassIDPredefined clsId = ClassIDPredefined.lookup(dn.getStorageClsid());
+ return (ClassIDPredefined.WORD_V7 == clsId
+ || ClassIDPredefined.WORD_V8 == clsId
+ || dn.hasEntry("WordDocument"));
+ }
+
+ @Override
+ public EmbeddedData extract(DirectoryNode dn) throws IOException {
+ EmbeddedData ed = super.extract(dn);
+ if (canExtractExcel(dn)) {
+ ed.setFilename(dn.getName() + ".xls");
+ ed.setContentType(CONTENT_TYPE_XLS);
+ } else if (canExtractWord(dn)) {
+ ed.setFilename(dn.getName() + ".doc");
+ ed.setContentType(CONTENT_TYPE_DOC);
+ }
+
+ return ed;
+ }
+ }
+
+ static class FsExtractor extends EmbeddedExtractor {
+ @Override
+ public boolean canExtract(DirectoryNode dn) {
+ return true;
+ }
+ @Override
+ public EmbeddedData extract(DirectoryNode dn) throws IOException {
+ EmbeddedData ed = super.extract(dn);
+ ed.setFilename(dn.getName() + ".ole");
+ // TODO: read the content type from CombObj stream
+ return ed;
+ }
+ }
+
+ protected static void copyNodes(DirectoryNode src, DirectoryNode dest) throws IOException {
+ for (Entry e : src) {
+ if (e instanceof DirectoryNode) {
+ DirectoryNode srcDir = (DirectoryNode)e;
+ DirectoryNode destDir = (DirectoryNode)dest.createDirectory(srcDir.getName());
+ destDir.setStorageClsid(srcDir.getStorageClsid());
+ copyNodes(srcDir, destDir);
+ } else {
+ try (InputStream is = src.createDocumentInputStream(e)) {
+ dest.createDocument(e.getName(), is);
+ }
+ }
+ }
+ }
+
+
+
+ /**
+ * Knuth-Morris-Pratt Algorithm for Pattern Matching
+ * Finds the first occurrence of the pattern in the text.
+ */
+ private static int indexOf(byte[] data, int offset, byte[] pattern) {
+ int[] failure = computeFailure(pattern);
+
+ int j = 0;
+ if (data.length == 0) {
+ return -1;
+ }
+
+ for (int i = offset; i < data.length; i++) {
+ while (j > 0 && pattern[j] != data[i]) {
+ j = failure[j - 1];
+ }
+ if (pattern[j] == data[i]) { j++; }
+ if (j == pattern.length) {
+ return i - pattern.length + 1;
+ }
+ }
+ return -1;
+ }
+
+ /**
+ * Computes the failure function using a boot-strapping process,
+ * where the pattern is matched against itself.
+ */
+ private static int[] computeFailure(byte[] pattern) {
+ int[] failure = new int[pattern.length];
+
+ int j = 0;
+ for (int i = 1; i < pattern.length; i++) {
+ while (j > 0 && pattern[j] != pattern[i]) {
+ j = failure[j - 1];
+ }
+ if (pattern[j] == pattern[i]) {
+ j++;
+ }
+ failure[i] = j;
+ }
+
+ return failure;
+ }
+
+
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.ss.usermodel;
+
+import java.io.BufferedInputStream;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+
+import org.apache.poi.EncryptedDocumentException;
+import org.apache.poi.OldFileFormatException;
+import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey;
+import org.apache.poi.hssf.usermodel.HSSFWorkbook;
+import org.apache.poi.poifs.crypt.Decryptor;
+import org.apache.poi.poifs.filesystem.DirectoryNode;
+import org.apache.poi.poifs.filesystem.DocumentFactoryHelper;
+import org.apache.poi.poifs.filesystem.FileMagic;
+import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
+import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
+import org.apache.poi.util.IOUtils;
+import org.apache.poi.util.Removal;
+
+/**
+ * Factory for creating the appropriate kind of Workbook
+ * (be it {@link HSSFWorkbook} or XSSFWorkbook),
+ * by auto-detecting from the supplied input.
+ */
+public class WorkbookFactory {
+ /**
+ * Creates a HSSFWorkbook from the given NPOIFSFileSystem<p>
+ *
+ * Note that in order to properly release resources the
+ * Workbook should be closed after use.
+ *
+ * @param fs The {@link NPOIFSFileSystem} to read the document from
+ *
+ * @return The created workbook
+ *
+ * @throws IOException if an error occurs while reading the data
+ */
+ public static Workbook create(NPOIFSFileSystem fs) throws IOException {
+ return create(fs, null);
+ }
+
+ /**
+ * Creates a Workbook from the given NPOIFSFileSystem, which may
+ * be password protected
+ *
+ * @param fs The {@link NPOIFSFileSystem} to read the document from
+ * @param password The password that should be used or null if no password is necessary.
+ *
+ * @return The created Workbook
+ *
+ * @throws IOException if an error occurs while reading the data
+ */
+ private static Workbook create(final NPOIFSFileSystem fs, String password) throws IOException {
+ return create(fs.getRoot(), password);
+ }
+
+
+ /**
+ * Creates a Workbook from the given NPOIFSFileSystem.
+ *
+ * @param root The {@link DirectoryNode} to start reading the document from
+ *
+ * @return The created Workbook
+ *
+ * @throws IOException if an error occurs while reading the data
+ */
+ public static Workbook create(final DirectoryNode root) throws IOException {
+ return create(root, null);
+ }
+
+
+ /**
+ * Creates a Workbook from the given NPOIFSFileSystem, which may
+ * be password protected
+ *
+ * @param root The {@link DirectoryNode} to start reading the document from
+ * @param password The password that should be used or null if no password is necessary.
+ *
+ * @return The created Workbook
+ *
+ * @throws IOException if an error occurs while reading the data
+ */
+ public static Workbook create(final DirectoryNode root, String password) throws IOException {
+ // Encrypted OOXML files go inside OLE2 containers, is this one?
+ if (root.hasEntry(Decryptor.DEFAULT_POIFS_ENTRY)) {
+ InputStream stream = null;
+ try {
+ stream = DocumentFactoryHelper.getDecryptedStream(root, password);
+
+ return createXSSFWorkbook(stream);
+ } finally {
+ IOUtils.closeQuietly(stream);
+ }
+ }
+
+ // If we get here, it isn't an encrypted PPTX file
+ // So, treat it as a regular HSLF PPT one
+ boolean passwordSet = false;
+ if (password != null) {
+ Biff8EncryptionKey.setCurrentUserPassword(password);
+ passwordSet = true;
+ }
+ try {
+ return createHSSFWorkbook(root);
+ } finally {
+ if (passwordSet) {
+ Biff8EncryptionKey.setCurrentUserPassword(null);
+ }
+ }
+ }
+
+ /**
+ * Creates a XSSFWorkbook from the given OOXML Package.
+ * As the WorkbookFactory is located in the POI module, which doesn't know about the OOXML formats,
+ * this can be only achieved by using an Object reference to the OPCPackage.
+ *
+ * <p>Note that in order to properly release resources the
+ * Workbook should be closed after use.</p>
+ *
+ * @param pkg The {@link OPCPackage} opened for reading data.
+ *
+ * @return The created Workbook
+ *
+ * @throws IOException if an error occurs while reading the data
+ *
+ * @deprecated use XSSFWorkbookFactory.create
+ */
+ @Deprecated
+ @Removal(version = "4.2.0")
+ public static Workbook create(Object pkg) throws IOException {
+ return createXSSFWorkbook(pkg);
+ }
+
+ /**
+ * Creates the appropriate HSSFWorkbook / XSSFWorkbook from
+ * the given InputStream.
+ *
+ * <p>Your input stream MUST either support mark/reset, or
+ * be wrapped as a {@link BufferedInputStream}!
+ * Note that using an {@link InputStream} has a higher memory footprint
+ * than using a {@link File}.</p>
+ *
+ * <p>Note that in order to properly release resources the
+ * Workbook should be closed after use. Note also that loading
+ * from an InputStream requires more memory than loading
+ * from a File, so prefer {@link #create(File)} where possible.
+ *
+ * @param inp The {@link InputStream} to read data from.
+ *
+ * @return The created Workbook
+ *
+ * @throws IOException if an error occurs while reading the data
+ * @throws EncryptedDocumentException If the Workbook given is password protected
+ */
+ public static Workbook create(InputStream inp) throws IOException, EncryptedDocumentException {
+ return create(inp, null);
+ }
+
+ /**
+ * Creates the appropriate HSSFWorkbook / XSSFWorkbook from
+ * the given InputStream, which may be password protected.
+ *
+ * <p>Your input stream MUST either support mark/reset, or
+ * be wrapped as a {@link BufferedInputStream}!
+ * Note that using an {@link InputStream} has a higher memory footprint
+ * than using a {@link File}.</p>
+ *
+ * <p>Note that in order to properly release resources the
+ * Workbook should be closed after use. Note also that loading
+ * from an InputStream requires more memory than loading
+ * from a File, so prefer {@link #create(File)} where possible.</p>
+ *
+ * @param inp The {@link InputStream} to read data from.
+ * @param password The password that should be used or null if no password is necessary.
+ *
+ * @return The created Workbook
+ *
+ * @throws IOException if an error occurs while reading the data
+ * @throws EncryptedDocumentException If the wrong password is given for a protected file
+ */
+ public static Workbook create(InputStream inp, String password) throws IOException, EncryptedDocumentException {
+ InputStream is = FileMagic.prepareToCheckMagic(inp);
+ FileMagic fm = FileMagic.valueOf(is);
+
+ switch (fm) {
+ case OLE2:
+ NPOIFSFileSystem fs = new NPOIFSFileSystem(is);
+ return create(fs, password);
+ case OOXML:
+ return createXSSFWorkbook(is);
+ default:
+ throw new IOException("Your InputStream was neither an OLE2 stream, nor an OOXML stream");
+ }
+ }
+
+ /**
+ * Creates the appropriate HSSFWorkbook / XSSFWorkbook from
+ * the given File, which must exist and be readable.
+ * <p>Note that in order to properly release resources the
+ * Workbook should be closed after use.
+ *
+ * @param file The file to read data from.
+ *
+ * @return The created Workbook
+ *
+ * @throws IOException if an error occurs while reading the data
+ * @throws EncryptedDocumentException If the Workbook given is password protected
+ */
+ public static Workbook create(File file) throws IOException, EncryptedDocumentException {
+ return create(file, null);
+ }
+
+ /**
+ * Creates the appropriate HSSFWorkbook / XSSFWorkbook from
+ * the given File, which must exist and be readable, and
+ * may be password protected
+ * <p>Note that in order to properly release resources the
+ * Workbook should be closed after use.
+ *
+ * @param file The file to read data from.
+ * @param password The password that should be used or null if no password is necessary.
+ *
+ * @return The created Workbook
+ *
+ * @throws IOException if an error occurs while reading the data
+ * @throws EncryptedDocumentException If the wrong password is given for a protected file
+ */
+ public static Workbook create(File file, String password) throws IOException, EncryptedDocumentException {
+ return create(file, password, false);
+ }
+
+ /**
+ * Creates the appropriate HSSFWorkbook / XSSFWorkbook from
+ * the given File, which must exist and be readable, and
+ * may be password protected
+ * <p>Note that in order to properly release resources the
+ * Workbook should be closed after use.
+ *
+ * @param file The file to read data from.
+ * @param password The password that should be used or null if no password is necessary.
+ * @param readOnly If the Workbook should be opened in read-only mode to avoid writing back
+ * changes when the document is closed.
+ *
+ * @return The created Workbook
+ *
+ * @throws IOException if an error occurs while reading the data
+ * @throws EncryptedDocumentException If the wrong password is given for a protected file
+ */
+ public static Workbook create(File file, String password, boolean readOnly) throws IOException, EncryptedDocumentException {
+ if (!file.exists()) {
+ throw new FileNotFoundException(file.toString());
+ }
+
+ NPOIFSFileSystem fs = null;
+ try {
+ fs = new NPOIFSFileSystem(file, readOnly);
+ return create(fs, password);
+ } catch(OfficeXmlFileException e) {
+ IOUtils.closeQuietly(fs);
+ return createXSSFWorkbook(file, readOnly);
+ } catch(RuntimeException e) {
+ IOUtils.closeQuietly(fs);
+ throw e;
+ }
+ }
+
+ private static Workbook createHSSFWorkbook(Object... args) throws IOException, EncryptedDocumentException {
+ return createWorkbook("org.apache.poi.hssf.usermodel.HSSFWorkbookFactory", args);
+ }
+
+ private static Workbook createXSSFWorkbook(Object... args) throws IOException, EncryptedDocumentException {
+ return createWorkbook("org.apache.poi.xssf.usermodel.XSSFWorkbookFactory", args);
+ }
+
+ private static Workbook createWorkbook(String factoryClass, Object args[]) throws IOException, EncryptedDocumentException {
+ try {
+ Class<?> clazz = Thread.currentThread().getContextClassLoader().loadClass(factoryClass);
+ Class<?> argsClz[] = new Class<?>[args.length];
+ int i=0;
+ for (Object o : args) {
+ Class<?> c = o.getClass();
+ if (Boolean.class.isAssignableFrom(c)) {
+ c = boolean.class;
+ } else if (InputStream.class.isAssignableFrom(c)) {
+ c = InputStream.class;
+ }
+ argsClz[i++] = c;
+ }
+ Method m = clazz.getMethod("createWorkbook", argsClz);
+ return (Workbook)m.invoke(null, args);
+ } catch (InvocationTargetException e) {
+ Throwable t = e.getCause();
+ if (t instanceof IOException) {
+ throw (IOException)t;
+ } else if (t instanceof EncryptedDocumentException) {
+ throw (EncryptedDocumentException)t;
+ } else if (t instanceof OldFileFormatException) {
+ throw (OldFileFormatException)t;
+ } else if (t instanceof RuntimeException) {
+ throw (RuntimeException)t;
+ } else {
+ throw new IOException(t.getMessage(), t);
+ }
+ } catch (Exception e) {
+ throw new IOException(e);
+ }
+ }
+
+}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi;
-
-import java.io.Closeable;
-import java.io.File;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
-import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
-import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
-import org.apache.poi.openxml4j.opc.OPCPackage;
-import org.apache.poi.openxml4j.opc.PackageAccess;
-import org.apache.poi.openxml4j.opc.PackagePart;
-import org.apache.poi.openxml4j.opc.PackageRelationship;
-import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
-import org.apache.xmlbeans.impl.common.SystemCache;
-
-/**
- * This holds the common functionality for all POI OOXML Document classes.
- */
-public abstract class POIXMLDocument extends POIXMLDocumentPart implements Closeable {
- public static final String DOCUMENT_CREATOR = "Apache POI";
-
- // OLE embeddings relation name
- public static final String OLE_OBJECT_REL_TYPE="http://schemas.openxmlformats.org/officeDocument/2006/relationships/oleObject";
-
- // Embedded OPC documents relation name
- public static final String PACK_OBJECT_REL_TYPE="http://schemas.openxmlformats.org/officeDocument/2006/relationships/package";
-
- /** The OPC Package */
- private OPCPackage pkg;
-
- /**
- * The properties of the OPC package, opened as needed
- */
- private POIXMLProperties properties;
-
- protected POIXMLDocument(OPCPackage pkg) {
- super(pkg);
- init(pkg);
- }
-
- protected POIXMLDocument(OPCPackage pkg, String coreDocumentRel) {
- super(pkg, coreDocumentRel);
- init(pkg);
- }
-
- private void init(OPCPackage p) {
- this.pkg = p;
-
- // Workaround for XMLBEANS-512 - ensure that when we parse
- // the file, we start with a fresh XML Parser each time,
- // and avoid the risk of getting a SaxHandler that's in error
- SystemCache.get().setSaxLoader(null);
- }
-
- /**
- * Wrapper to open a package, which works around shortcomings in java's this() constructor calls
- *
- * @param path the path to the document
- * @return the new OPCPackage
- *
- * @exception IOException if there was a problem opening the document
- */
- public static OPCPackage openPackage(String path) throws IOException {
- try {
- return OPCPackage.open(path);
- } catch (InvalidFormatException e) {
- throw new IOException(e.toString(), e);
- }
- }
-
- /**
- * Get the assigned OPCPackage
- *
- * @return the assigned OPCPackage
- */
- public OPCPackage getPackage() {
- return this.pkg;
- }
-
- protected PackagePart getCorePart() {
- return getPackagePart();
- }
-
- /**
- * Retrieves all the PackageParts which are defined as relationships of the base document with the
- * specified content type.
- *
- * @param contentType the content type
- *
- * @return all the base document PackageParts which match the content type
- *
- * @throws InvalidFormatException when the relationships or the parts contain errors
- *
- * @see org.apache.poi.xssf.usermodel.XSSFRelation
- * @see org.apache.poi.xslf.usermodel.XSLFRelation
- * @see org.apache.poi.xwpf.usermodel.XWPFRelation
- * @see org.apache.poi.xdgf.usermodel.XDGFRelation
- */
- protected PackagePart[] getRelatedByType(String contentType) throws InvalidFormatException {
- PackageRelationshipCollection partsC =
- getPackagePart().getRelationshipsByType(contentType);
-
- PackagePart[] parts = new PackagePart[partsC.size()];
- int count = 0;
- for (PackageRelationship rel : partsC) {
- parts[count] = getPackagePart().getRelatedPart(rel);
- count++;
- }
- return parts;
- }
-
- /**
- * Get the document properties. This gives you access to the
- * core ooxml properties, and the extended ooxml properties.
- *
- * @return the document properties
- */
- public POIXMLProperties getProperties() {
- if(properties == null) {
- try {
- properties = new POIXMLProperties(pkg);
- } catch (Exception e){
- throw new POIXMLException(e);
- }
- }
- return properties;
- }
-
- /**
- * Get the document's embedded files.
- *
- * @return the document's embedded files
- *
- * @throws OpenXML4JException if the embedded parts can't be determined
- */
- public abstract List<PackagePart> getAllEmbedds() throws OpenXML4JException;
-
- protected final void load(POIXMLFactory factory) throws IOException {
- Map<PackagePart, POIXMLDocumentPart> context = new HashMap<>();
- try {
- read(factory, context);
- } catch (OpenXML4JException e){
- throw new POIXMLException(e);
- }
- onDocumentRead();
- context.clear();
- }
-
- /**
- * Closes the underlying {@link OPCPackage} from which this
- * document was read, if there is one
- *
- * <p>Once this has been called, no further
- * operations, updates or reads should be performed on the
- * document.
- *
- * @throws IOException for writable packages, if an IO exception occur during the saving process.
- */
- @Override
- public void close() throws IOException {
- if (pkg != null) {
- if (pkg.getPackageAccess() == PackageAccess.READ) {
- pkg.revert();
- } else {
- pkg.close();
- }
- pkg = null;
- }
- }
-
- /**
- * Write out this document to an Outputstream.
- *
- * Note - if the Document was opened from a {@link File} rather
- * than an {@link InputStream}, you <b>must</b> write out to
- * a different file, overwriting via an OutputStream isn't possible.
- *
- * If {@code stream} is a {@link java.io.FileOutputStream} on a networked drive
- * or has a high cost/latency associated with each written byte,
- * consider wrapping the OutputStream in a {@link java.io.BufferedOutputStream}
- * to improve write performance.
- *
- * @param stream - the java OutputStream you wish to write the file to
- *
- * @exception IOException if anything can't be written.
- */
- @SuppressWarnings("resource")
- public final void write(OutputStream stream) throws IOException {
- OPCPackage p = getPackage();
- if(p == null) {
- throw new IOException("Cannot write data, document seems to have been closed already");
- }
-
- //force all children to commit their changes into the underlying OOXML Package
- // TODO Shouldn't they be committing to the new one instead?
- Set<PackagePart> context = new HashSet<>();
- onSave(context);
- context.clear();
-
- //save extended and custom properties
- getProperties().commit();
-
- p.save(stream);
- }
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi;
-
-import java.io.IOException;
-import java.net.URI;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.LinkedHashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
-import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
-import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
-import org.apache.poi.openxml4j.exceptions.PartAlreadyExistsException;
-import org.apache.poi.openxml4j.opc.OPCPackage;
-import org.apache.poi.openxml4j.opc.PackagePart;
-import org.apache.poi.openxml4j.opc.PackagePartName;
-import org.apache.poi.openxml4j.opc.PackageRelationship;
-import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
-import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
-import org.apache.poi.openxml4j.opc.PackagingURIHelper;
-import org.apache.poi.openxml4j.opc.TargetMode;
-import org.apache.poi.util.Internal;
-import org.apache.poi.util.POILogFactory;
-import org.apache.poi.util.POILogger;
-import org.apache.poi.xddf.usermodel.chart.XDDFChart;
-import org.apache.poi.xssf.usermodel.XSSFRelation;
-import org.apache.poi.xssf.usermodel.XSSFWorkbook;
-
-/**
- * Represents an entry of a OOXML package.
- * <p>
- * Each POIXMLDocumentPart keeps a reference to the underlying a {@link org.apache.poi.openxml4j.opc.PackagePart}.
- * </p>
- */
-public class POIXMLDocumentPart {
- private static final POILogger logger = POILogFactory.getLogger(POIXMLDocumentPart.class);
-
- private String coreDocumentRel = PackageRelationshipTypes.CORE_DOCUMENT;
- private PackagePart packagePart;
- private POIXMLDocumentPart parent;
- private Map<String, RelationPart> relations = new LinkedHashMap<>();
- private boolean isCommited = false;
-
- /**
- * to check whether embedded part is already committed
- *
- * @return return true if embedded part is committed
- */
- public boolean isCommited() {
- return isCommited;
- }
-
- /**
- * setter method to set embedded part is committed
- *
- * @param isCommited boolean value
- */
- public void setCommited(boolean isCommited) {
- this.isCommited = isCommited;
- }
-
- /**
- * The RelationPart is a cached relationship between the document, which contains the RelationPart,
- * and one of its referenced child document parts.
- * The child document parts may only belong to one parent, but it's often referenced by other
- * parents too, having varying {@link PackageRelationship#getId() relationship ids} pointing to it.
- */
- public static class RelationPart {
- private final PackageRelationship relationship;
- private final POIXMLDocumentPart documentPart;
-
- RelationPart(PackageRelationship relationship, POIXMLDocumentPart documentPart) {
- this.relationship = relationship;
- this.documentPart = documentPart;
- }
-
- /**
- * @return the cached relationship, which uniquely identifies this child document part within the parent
- */
- public PackageRelationship getRelationship() {
- return relationship;
- }
-
- /**
- * @param <T> the cast of the caller to a document sub class
- * @return the child document part
- */
- @SuppressWarnings("unchecked")
- public <T extends POIXMLDocumentPart> T getDocumentPart() {
- return (T) documentPart;
- }
- }
-
- /**
- * Counter that provides the amount of incoming relations from other parts
- * to this part.
- */
- private int relationCounter;
-
- int incrementRelationCounter() {
- relationCounter++;
- return relationCounter;
- }
-
- int decrementRelationCounter() {
- relationCounter--;
- return relationCounter;
- }
-
- int getRelationCounter() {
- return relationCounter;
- }
-
- /**
- * Construct POIXMLDocumentPart representing a "core document" package part.
- *
- * @param pkg the OPCPackage containing this document
- */
- public POIXMLDocumentPart(OPCPackage pkg) {
- this(pkg, PackageRelationshipTypes.CORE_DOCUMENT);
- }
-
- /**
- * Construct POIXMLDocumentPart representing a custom "core document" package part.
- *
- * @param pkg the OPCPackage containing this document
- * @param coreDocumentRel the relation type of this document
- */
- public POIXMLDocumentPart(OPCPackage pkg, String coreDocumentRel) {
- this(getPartFromOPCPackage(pkg, coreDocumentRel));
- this.coreDocumentRel = coreDocumentRel;
- }
-
- /**
- * Creates new POIXMLDocumentPart - called by client code to create new parts from scratch.
- *
- * @see #createRelationship(POIXMLRelation, POIXMLFactory, int, boolean)
- */
- public POIXMLDocumentPart() {
- }
-
- /**
- * Creates an POIXMLDocumentPart representing the given package part and relationship.
- * Called by {@link #read(POIXMLFactory, java.util.Map)} when reading in an existing file.
- *
- * @param part - The package part that holds xml data representing this sheet.
- * @see #read(POIXMLFactory, java.util.Map)
- * @since POI 3.14-Beta1
- */
- public POIXMLDocumentPart(PackagePart part) {
- this(null, part);
- }
-
- /**
- * Creates an POIXMLDocumentPart representing the given package part, relationship and parent
- * Called by {@link #read(POIXMLFactory, java.util.Map)} when reading in an existing file.
- *
- * @param parent - Parent part
- * @param part - The package part that holds xml data representing this sheet.
- * @see #read(POIXMLFactory, java.util.Map)
- * @since POI 3.14-Beta1
- */
- public POIXMLDocumentPart(POIXMLDocumentPart parent, PackagePart part) {
- this.packagePart = part;
- this.parent = parent;
- }
-
- /**
- * When you open something like a theme, call this to
- * re-base the XML Document onto the core child of the
- * current core document
- *
- * @param pkg the package to be rebased
- * @throws InvalidFormatException if there was an error in the core document relation
- * @throws IllegalStateException if there are more than one core document relations
- */
- protected final void rebase(OPCPackage pkg) throws InvalidFormatException {
- PackageRelationshipCollection cores =
- packagePart.getRelationshipsByType(coreDocumentRel);
- if (cores.size() != 1) {
- throw new IllegalStateException(
- "Tried to rebase using " + coreDocumentRel +
- " but found " + cores.size() + " parts of the right type"
- );
- }
- packagePart = packagePart.getRelatedPart(cores.getRelationship(0));
- }
-
- /**
- * Provides access to the underlying PackagePart
- *
- * @return the underlying PackagePart
- */
- public final PackagePart getPackagePart() {
- return packagePart;
- }
-
- /**
- * Returns the list of child relations for this POIXMLDocumentPart
- *
- * @return child relations
- */
- public final List<POIXMLDocumentPart> getRelations() {
- List<POIXMLDocumentPart> l = new ArrayList<>();
- for (RelationPart rp : relations.values()) {
- l.add(rp.getDocumentPart());
- }
- return Collections.unmodifiableList(l);
- }
-
- /**
- * Returns the list of child relations for this POIXMLDocumentPart
- *
- * @return child relations
- */
- public final List<RelationPart> getRelationParts() {
- List<RelationPart> l = new ArrayList<>(relations.values());
- return Collections.unmodifiableList(l);
- }
-
- /**
- * Returns the target {@link POIXMLDocumentPart}, where a
- * {@link PackageRelationship} is set from the {@link PackagePart} of this
- * {@link POIXMLDocumentPart} to the {@link PackagePart} of the target
- * {@link POIXMLDocumentPart} with a {@link PackageRelationship#getId()}
- * matching the given parameter value.
- *
- * @param id The relation id to look for
- * @return the target part of the relation, or null, if none exists
- */
- public final POIXMLDocumentPart getRelationById(String id) {
- RelationPart rp = getRelationPartById(id);
- return (rp == null) ? null : rp.getDocumentPart();
- }
-
- /**
- * Returns the target {@link RelationPart}, where a
- * {@link PackageRelationship} is set from the {@link PackagePart} of this
- * {@link POIXMLDocumentPart} to the {@link PackagePart} of the target
- * {@link POIXMLDocumentPart} with a {@link PackageRelationship#getId()}
- * matching the given parameter value.
- *
- * @param id The relation id to look for
- * @return the target relation part, or null, if none exists
- * @since 4.0.0
- */
- public final RelationPart getRelationPartById(String id) {
- return relations.get(id);
- }
-
- /**
- * Returns the first {@link PackageRelationship#getId()} of the
- * {@link PackageRelationship}, that sources from the {@link PackagePart} of
- * this {@link POIXMLDocumentPart} to the {@link PackagePart} of the given
- * parameter value.<p>
- * <p>
- * There can be multiple references to the given {@link POIXMLDocumentPart}
- * and only the first in the order of creation is returned.
- *
- * @param part The {@link POIXMLDocumentPart} for which the according
- * relation-id shall be found.
- * @return The value of the {@link PackageRelationship#getId()} or null, if
- * parts are not related.
- */
- public final String getRelationId(POIXMLDocumentPart part) {
- for (RelationPart rp : relations.values()) {
- if (rp.getDocumentPart() == part) {
- return rp.getRelationship().getId();
- }
- }
- return null;
- }
-
- /**
- * Add a new child POIXMLDocumentPart
- *
- * @param relId the preferred relation id, when null the next free relation id will be used
- * @param relationshipType the package relationship type
- * @param part the child to add
- * @return the new RelationPart
- * @since 3.14-Beta1
- */
- public final RelationPart addRelation(String relId, POIXMLRelation relationshipType, POIXMLDocumentPart part) {
- PackageRelationship pr = this.packagePart.findExistingRelation(part.getPackagePart());
- if (pr == null) {
- PackagePartName ppn = part.getPackagePart().getPartName();
- String relType = relationshipType.getRelation();
- pr = packagePart.addRelationship(ppn, TargetMode.INTERNAL, relType, relId);
- }
- addRelation(pr, part);
- return new RelationPart(pr, part);
- }
-
- /**
- * Add a new child POIXMLDocumentPart
- *
- * @param pr the relationship of the child
- * @param part the child to add
- */
- private void addRelation(PackageRelationship pr, POIXMLDocumentPart part) {
- relations.put(pr.getId(), new RelationPart(pr, part));
- part.incrementRelationCounter();
-
- }
-
- /**
- * Remove the relation to the specified part in this package and remove the
- * part, if it is no longer needed.<p>
- * <p>
- * If there are multiple relationships to the same part, this will only
- * remove the first relationship in the order of creation. The removal
- * via the part id ({@link #removeRelation(String)} is preferred.
- *
- * @param part the part which relation is to be removed from this document
- */
- protected final void removeRelation(POIXMLDocumentPart part) {
- removeRelation(part, true);
- }
-
- /**
- * Remove the relation to the specified part in this package and remove the
- * part, if it is no longer needed and flag is set to true.<p>
- * <p>
- * If there are multiple relationships to the same part, this will only
- * remove the first relationship in the order of creation. The removal
- * via the part id ({@link #removeRelation(String, boolean)} is preferred.
- *
- * @param part The related part, to which the relation shall be removed.
- * @param removeUnusedParts true, if the part shall be removed from the package if not
- * needed any longer.
- * @return true, if the relation was removed
- */
- protected final boolean removeRelation(POIXMLDocumentPart part, boolean removeUnusedParts) {
- String id = getRelationId(part);
- return removeRelation(id, removeUnusedParts);
- }
-
- /**
- * Remove the relation to the specified part in this package and remove the
- * part, if it is no longer needed.<p>
- * <p>
- * If there are multiple relationships to the same part, this will only
- * remove the first relationship in the order of creation. The removal
- * via the part id ({@link #removeRelation(String)} is preferred.
- *
- * @param partId the part id which relation is to be removed from this document
- * @since 4.0.0
- */
- protected final void removeRelation(String partId) {
- removeRelation(partId, true);
- }
-
- /**
- * Remove the relation to the specified part in this package and remove the
- * part, if it is no longer needed and flag is set to true.<p>
- *
- * @param partId The related part id, to which the relation shall be removed.
- * @param removeUnusedParts true, if the part shall be removed from the package if not
- * needed any longer.
- * @return true, if the relation was removed
- * @since 4.0.0
- */
- private final boolean removeRelation(String partId, boolean removeUnusedParts) {
- RelationPart rp = relations.get(partId);
- if (rp == null) {
- // part is not related with this POIXMLDocumentPart
- return false;
- }
- POIXMLDocumentPart part = rp.getDocumentPart();
- /* decrement usage counter */
- part.decrementRelationCounter();
- /* remove packagepart relationship */
- getPackagePart().removeRelationship(partId);
- /* remove POIXMLDocument from relations */
- relations.remove(partId);
-
- if (removeUnusedParts) {
- /* if last relation to target part was removed, delete according target part */
- if (part.getRelationCounter() == 0) {
- try {
- part.onDocumentRemove();
- } catch (IOException e) {
- throw new POIXMLException(e);
- }
- getPackagePart().getPackage().removePart(part.getPackagePart());
- }
- }
- return true;
- }
-
-
- /**
- * Returns the parent POIXMLDocumentPart. All parts except root have not-null parent.
- *
- * @return the parent POIXMLDocumentPart or <code>null</code> for the root element.
- */
- public final POIXMLDocumentPart getParent() {
- return parent;
- }
-
- @Override
- public String toString() {
- return packagePart == null ? "" : packagePart.toString();
- }
-
- /**
- * Save the content in the underlying package part.
- * Default implementation is empty meaning that the package part is left unmodified.
- * <p>
- * Sub-classes should override and add logic to marshal the "model" into Ooxml4J.
- * <p>
- * For example, the code saving a generic XML entry may look as follows:
- * <pre>
- * protected void commit() throws IOException {
- * PackagePart part = getPackagePart();
- * OutputStream out = part.getOutputStream();
- * XmlObject bean = getXmlBean(); //the "model" which holds changes in memory
- * bean.save(out, DEFAULT_XML_OPTIONS);
- * out.close();
- * }
- * </pre>
- *
- * @throws IOException a subclass may throw an IOException if the changes can't be committed
- */
- protected void commit() throws IOException {
-
- }
-
- /**
- * Save changes in the underlying OOXML package.
- * Recursively fires {@link #commit()} for each package part
- *
- * @param alreadySaved context set containing already visited nodes
- * @throws IOException a related part may throw an IOException if the changes can't be saved
- */
- protected final void onSave(Set<PackagePart> alreadySaved) throws IOException {
- //if part is already committed then return
- if (this.isCommited) {
- return;
- }
-
- // this usually clears out previous content in the part...
- prepareForCommit();
-
- commit();
- alreadySaved.add(this.getPackagePart());
- for (RelationPart rp : relations.values()) {
- POIXMLDocumentPart p = rp.getDocumentPart();
- if (!alreadySaved.contains(p.getPackagePart())) {
- p.onSave(alreadySaved);
- }
- }
- }
-
- /**
- * Ensure that a memory based package part does not have lingering data from previous
- * commit() calls.
- * <p>
- * Note: This is overwritten for some objects, as *PictureData seem to store the actual content
- * in the part directly without keeping a copy like all others therefore we need to handle them differently.
- */
- protected void prepareForCommit() {
- PackagePart part = this.getPackagePart();
- if (part != null) {
- part.clear();
- }
- }
-
- /**
- * Create a new child POIXMLDocumentPart
- *
- * @param descriptor the part descriptor
- * @param factory the factory that will create an instance of the requested relation
- * @return the created child POIXMLDocumentPart
- * @throws PartAlreadyExistsException If rule M1.12 is not verified : Packages shall not contain
- * equivalent part names and package implementers shall neither
- * create nor recognize packages with equivalent part names.
- */
- public final POIXMLDocumentPart createRelationship(POIXMLRelation descriptor, POIXMLFactory factory) {
- return createRelationship(descriptor, factory, -1, false).getDocumentPart();
- }
-
- /**
- * Create a new child POIXMLDocumentPart
- *
- * @param descriptor the part descriptor
- * @param factory the factory that will create an instance of the requested relation
- * @param idx part number
- * @return the created child POIXMLDocumentPart
- * @throws PartAlreadyExistsException If rule M1.12 is not verified : Packages shall not contain
- * equivalent part names and package implementers shall neither
- * create nor recognize packages with equivalent part names.
- */
- public final POIXMLDocumentPart createRelationship(POIXMLRelation descriptor, POIXMLFactory factory, int idx) {
- return createRelationship(descriptor, factory, idx, false).getDocumentPart();
- }
-
- /**
- * Identifies the next available part number for a part of the given type,
- * if possible, otherwise -1 if none are available.
- * The found (valid) index can then be safely given to
- * {@link #createRelationship(POIXMLRelation, POIXMLFactory, int)} or
- * {@link #createRelationship(POIXMLRelation, POIXMLFactory, int, boolean)}
- * without naming clashes.
- * If parts with other types are already claiming a name for this relationship
- * type (eg a {@link XSSFRelation#CHART} using the drawing part namespace
- * normally used by {@link XSSFRelation#DRAWINGS}), those will be considered
- * when finding the next spare number.
- *
- * @param descriptor The relationship type to find the part number for
- * @param minIdx The minimum free index to assign, use -1 for any
- * @return The next free part number, or -1 if none available
- */
- protected final int getNextPartNumber(POIXMLRelation descriptor, int minIdx) {
- OPCPackage pkg = packagePart.getPackage();
-
- try {
- String name = descriptor.getDefaultFileName();
- if (name.equals(descriptor.getFileName(9999))) {
- // Non-index based, check if default is free
- PackagePartName ppName = PackagingURIHelper.createPartName(name);
- if (pkg.containPart(ppName)) {
- // Default name already taken, not index based, nothing free
- return -1;
- } else {
- // Default name free
- return 0;
- }
- }
-
- // Default to searching from 1, unless they asked for 0+
- int idx = (minIdx < 0) ? 1 : minIdx;
- int maxIdx = minIdx + pkg.getParts().size();
- while (idx <= maxIdx) {
- name = descriptor.getFileName(idx);
- PackagePartName ppName = PackagingURIHelper.createPartName(name);
- if (!pkg.containPart(ppName)) {
- return idx;
- }
- idx++;
- }
- } catch (InvalidFormatException e) {
- // Give a general wrapped exception for the problem
- throw new POIXMLException(e);
- }
- return -1;
- }
-
- /**
- * Create a new child POIXMLDocumentPart
- *
- * @param descriptor the part descriptor
- * @param factory the factory that will create an instance of the requested relation
- * @param idx part number
- * @param noRelation if true, then no relationship is added.
- * @return the created child POIXMLDocumentPart
- * @throws PartAlreadyExistsException If rule M1.12 is not verified : Packages shall not contain
- * equivalent part names and package implementers shall neither
- * create nor recognize packages with equivalent part names.
- */
- public final RelationPart createRelationship(POIXMLRelation descriptor, POIXMLFactory factory, int idx, boolean noRelation) {
- try {
- PackagePartName ppName = PackagingURIHelper.createPartName(descriptor.getFileName(idx));
- PackageRelationship rel = null;
- PackagePart part = packagePart.getPackage().createPart(ppName, descriptor.getContentType());
- if (!noRelation) {
- /* only add to relations, if according relationship is being created. */
- rel = packagePart.addRelationship(ppName, TargetMode.INTERNAL, descriptor.getRelation());
- }
- POIXMLDocumentPart doc = factory.newDocumentPart(descriptor);
- doc.packagePart = part;
- doc.parent = this;
- if (!noRelation) {
- /* only add to relations, if according relationship is being created. */
- addRelation(rel, doc);
- }
-
- return new RelationPart(rel, doc);
- } catch (PartAlreadyExistsException pae) {
- // Return the specific exception so the user knows
- // that the name is already taken
- throw pae;
- } catch (Exception e) {
- // Give a general wrapped exception for the problem
- throw new POIXMLException(e);
- }
- }
-
- /**
- * Iterate through the underlying PackagePart and create child POIXMLFactory instances
- * using the specified factory
- *
- * @param factory the factory object that creates POIXMLFactory instances
- * @param context context map containing already visited noted keyed by targetURI
- * @throws OpenXML4JException thrown when a related part can't be read
- */
- protected void read(POIXMLFactory factory, Map<PackagePart, POIXMLDocumentPart> context) throws OpenXML4JException {
- PackagePart pp = getPackagePart();
- // add mapping a second time, in case of initial caller hasn't done so
- POIXMLDocumentPart otherChild = context.put(pp, this);
- if (otherChild != null && otherChild != this) {
- throw new POIXMLException("Unique PackagePart-POIXMLDocumentPart relation broken!");
- }
-
- if (!pp.hasRelationships()) return;
-
- PackageRelationshipCollection rels = packagePart.getRelationships();
- List<POIXMLDocumentPart> readLater = new ArrayList<>();
-
- // scan breadth-first, so parent-relations are hopefully the shallowest element
- for (PackageRelationship rel : rels) {
- if (rel.getTargetMode() == TargetMode.INTERNAL) {
- URI uri = rel.getTargetURI();
-
- // check for internal references (e.g. '#Sheet1!A1')
- PackagePartName relName;
- if (uri.getRawFragment() != null) {
- relName = PackagingURIHelper.createPartName(uri.getPath());
- } else {
- relName = PackagingURIHelper.createPartName(uri);
- }
-
- final PackagePart p = packagePart.getPackage().getPart(relName);
- if (p == null) {
- logger.log(POILogger.ERROR, "Skipped invalid entry " + rel.getTargetURI());
- continue;
- }
-
- POIXMLDocumentPart childPart = context.get(p);
- if (childPart == null) {
- childPart = factory.createDocumentPart(this, p);
- //here we are checking if part if embedded and excel then set it to chart class
- //so that at the time to writing we can also write updated embedded part
- if (this instanceof XDDFChart && childPart instanceof XSSFWorkbook) {
- ((XDDFChart) this).setWorkbook((XSSFWorkbook) childPart);
- }
- childPart.parent = this;
- // already add child to context, so other children can reference it
- context.put(p, childPart);
- readLater.add(childPart);
- }
-
- addRelation(rel, childPart);
- }
- }
-
- for (POIXMLDocumentPart childPart : readLater) {
- childPart.read(factory, context);
- }
- }
-
- /**
- * Get the PackagePart that is the target of a relationship from this Part.
- *
- * @param rel The relationship
- * @return The target part
- * @throws InvalidFormatException thrown if the related part has is erroneous
- */
- protected PackagePart getTargetPart(PackageRelationship rel) throws InvalidFormatException {
- return getPackagePart().getRelatedPart(rel);
- }
-
-
- /**
- * Fired when a new package part is created
- *
- * @throws IOException a subclass may throw an IOException on document creation
- */
- protected void onDocumentCreate() throws IOException {
-
- }
-
- /**
- * Fired when a package part is read
- *
- * @throws IOException a subclass may throw an IOException when a document is read
- */
- protected void onDocumentRead() throws IOException {
-
- }
-
- /**
- * Fired when a package part is about to be removed from the package
- *
- * @throws IOException a subclass may throw an IOException when a document is removed
- */
- protected void onDocumentRemove() throws IOException {
-
- }
-
- /**
- * Internal method, do not use!
- * <p>
- * This method only exists to allow access to protected {@link POIXMLDocumentPart#onDocumentRead()}
- * from {@link org.apache.poi.xwpf.usermodel.XWPFDocument} without reflection. It should be removed.
- *
- * @param part the part which is to be read
- * @throws IOException if the part can't be read
- */
- @Internal
- @Deprecated
- public static void _invokeOnDocumentRead(POIXMLDocumentPart part) throws IOException {
- part.onDocumentRead();
- }
-
- /**
- * Retrieves the core document part
- *
- * @since POI 3.14-Beta1
- */
- private static PackagePart getPartFromOPCPackage(OPCPackage pkg, String coreDocumentRel) {
- PackageRelationship coreRel = pkg.getRelationshipsByType(coreDocumentRel).getRelationship(0);
-
- if (coreRel != null) {
- PackagePart pp = pkg.getPart(coreRel);
- if (pp == null) {
- throw new POIXMLException("OOXML file structure broken/invalid - core document '" + coreRel.getTargetURI() + "' not found.");
- }
- return pp;
- }
-
- coreRel = pkg.getRelationshipsByType(PackageRelationshipTypes.STRICT_CORE_DOCUMENT).getRelationship(0);
- if (coreRel != null) {
- throw new POIXMLException("Strict OOXML isn't currently supported, please see bug #57699");
- }
-
- throw new POIXMLException("OOXML file structure broken/invalid - no core document found!");
- }
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi;
-
-/**
- * Indicates a generic OOXML error.
- *
- * @author Yegor Kozlov
- */
-@SuppressWarnings("serial")
-public final class POIXMLException extends RuntimeException{
- /**
- * Create a new <code>POIXMLException</code> with no
- * detail mesage.
- */
- public POIXMLException() {
- super();
- }
-
- /**
- * Create a new <code>POIXMLException</code> with
- * the <code>String</code> specified as an error message.
- *
- * @param msg The error message for the exception.
- */
- public POIXMLException(String msg) {
- super(msg);
- }
-
- /**
- * Create a new <code>POIXMLException</code> with
- * the <code>String</code> specified as an error message and the cause.
- *
- * @param msg The error message for the exception.
- * @param cause the cause (which is saved for later retrieval by the
- * {@link #getCause()} method). (A <tt>null</tt> value is
- * permitted, and indicates that the cause is nonexistent or
- * unknown.)
- */
- public POIXMLException(String msg, Throwable cause) {
- super(msg, cause);
- }
-
- /**
- * Create a new <code>POIXMLException</code> with
- * the specified cause.
- *
- * @param cause the cause (which is saved for later retrieval by the
- * {@link #getCause()} method). (A <tt>null</tt> value is
- * permitted, and indicates that the cause is nonexistent or
- * unknown.)
- */
- public POIXMLException(Throwable cause) {
- super(cause);
- }
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi;
-
-import java.lang.reflect.InvocationTargetException;
-
-import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
-import org.apache.poi.openxml4j.opc.PackagePart;
-import org.apache.poi.openxml4j.opc.PackageRelationship;
-import org.apache.poi.util.POILogFactory;
-import org.apache.poi.util.POILogger;
-
-/**
- * Defines a factory API that enables sub-classes to create instances of <code>POIXMLDocumentPart</code>
- */
-public abstract class POIXMLFactory {
- private static final POILogger LOGGER = POILogFactory.getLogger(POIXMLFactory.class);
-
- private static final Class<?>[] PARENT_PART = {POIXMLDocumentPart.class, PackagePart.class};
- private static final Class<?>[] ORPHAN_PART = {PackagePart.class};
-
- /**
- * Create a POIXMLDocumentPart from existing package part and relation. This method is called
- * from {@link POIXMLDocument#load(POIXMLFactory)} when parsing a document
- *
- * @param parent parent part
- * @param part the PackagePart representing the created instance
- * @return A new instance of a POIXMLDocumentPart.
- *
- * @since by POI 3.14-Beta1
- */
- public POIXMLDocumentPart createDocumentPart(POIXMLDocumentPart parent, PackagePart part) {
- PackageRelationship rel = getPackageRelationship(parent, part);
- POIXMLRelation descriptor = getDescriptor(rel.getRelationshipType());
-
- if (descriptor == null || descriptor.getRelationClass() == null) {
- LOGGER.log(POILogger.DEBUG, "using default POIXMLDocumentPart for " + rel.getRelationshipType());
- return new POIXMLDocumentPart(parent, part);
- }
-
- Class<? extends POIXMLDocumentPart> cls = descriptor.getRelationClass();
- try {
- try {
- return createDocumentPart(cls, PARENT_PART, new Object[]{parent, part});
- } catch (NoSuchMethodException e) {
- return createDocumentPart(cls, ORPHAN_PART, new Object[]{part});
- }
- } catch (Exception e) {
- throw new POIXMLException((e.getCause() != null ? e.getCause() : e).getMessage(), e);
- }
- }
-
- /**
- * Need to delegate instantiation to sub class because of constructor visibility
- *
- * @param cls the document class to be instantiated
- * @param classes the classes of the constructor arguments
- * @param values the values of the constructor arguments
- * @return the new document / part
- * @throws SecurityException thrown if the object can't be instantiated
- * @throws NoSuchMethodException thrown if there is no constructor found for the given arguments
- * @throws InstantiationException thrown if the object can't be instantiated
- * @throws IllegalAccessException thrown if the object can't be instantiated
- * @throws InvocationTargetException thrown if the object can't be instantiated
- *
- * @since POI 3.14-Beta1
- */
- protected abstract POIXMLDocumentPart createDocumentPart
- (Class<? extends POIXMLDocumentPart> cls, Class<?>[] classes, Object[] values)
- throws SecurityException, NoSuchMethodException, InstantiationException, IllegalAccessException, InvocationTargetException;
-
- /**
- * returns the descriptor for the given relationship type
- *
- * @param relationshipType the relationship type of the descriptor
- * @return the descriptor or null if type is unknown
- *
- * @since POI 3.14-Beta1
- */
- protected abstract POIXMLRelation getDescriptor(String relationshipType);
-
- /**
- * Create a new POIXMLDocumentPart using the supplied descriptor. This method is used when adding new parts
- * to a document, for example, when adding a sheet to a workbook, slide to a presentation, etc.
- *
- * @param descriptor describes the object to create
- * @return A new instance of a POIXMLDocumentPart.
- */
- public POIXMLDocumentPart newDocumentPart(POIXMLRelation descriptor) {
- Class<? extends POIXMLDocumentPart> cls = descriptor.getRelationClass();
- try {
- return createDocumentPart(cls, null, null);
- } catch (Exception e) {
- throw new POIXMLException(e);
- }
- }
-
- /**
- * Retrieves the package relationship of the child part within the parent
- *
- * @param parent the parent to search for the part
- * @param part the part to look for
- *
- * @return the relationship
- *
- * @throws POIXMLException if the relations are erroneous or the part is not related
- *
- * @since POI 3.14-Beta1
- */
- protected PackageRelationship getPackageRelationship(POIXMLDocumentPart parent, PackagePart part) {
- try {
- String partName = part.getPartName().getName();
- for (PackageRelationship pr : parent.getPackagePart().getRelationships()) {
- String packName = pr.getTargetURI().toASCIIString();
- if (packName.equalsIgnoreCase(partName)) {
- return pr;
- }
- }
- } catch (InvalidFormatException e) {
- throw new POIXMLException("error while determining package relations", e);
- }
-
- throw new POIXMLException("package part isn't a child of the parent document.");
- }
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi;
-
-import static org.apache.poi.POIXMLTypeLoader.DEFAULT_XML_OPTIONS;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.util.Date;
-
-import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
-import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
-import org.apache.poi.openxml4j.opc.ContentTypes;
-import org.apache.poi.openxml4j.opc.OPCPackage;
-import org.apache.poi.openxml4j.opc.PackagePart;
-import org.apache.poi.openxml4j.opc.PackagePartName;
-import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
-import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
-import org.apache.poi.openxml4j.opc.PackagingURIHelper;
-import org.apache.poi.openxml4j.opc.StreamHelper;
-import org.apache.poi.openxml4j.opc.TargetMode;
-import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart;
-import org.apache.poi.openxml4j.util.Nullable;
-import org.apache.xmlbeans.XmlException;
-import org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperty;
-
-/**
- * Wrapper around the three different kinds of OOXML properties
- * and metadata a document can have (Core, Extended and Custom),
- * as well Thumbnails.
- */
-public class POIXMLProperties {
- private OPCPackage pkg;
- private CoreProperties core;
- private ExtendedProperties ext;
- private CustomProperties cust;
-
- private PackagePart extPart;
- private PackagePart custPart;
-
-
- private static final org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument NEW_EXT_INSTANCE;
- private static final org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument NEW_CUST_INSTANCE;
- static {
- NEW_EXT_INSTANCE = org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument.Factory.newInstance();
- NEW_EXT_INSTANCE.addNewProperties();
-
- NEW_CUST_INSTANCE = org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument.Factory.newInstance();
- NEW_CUST_INSTANCE.addNewProperties();
- }
-
- public POIXMLProperties(OPCPackage docPackage) throws IOException, OpenXML4JException, XmlException {
- this.pkg = docPackage;
-
- // Core properties
- core = new CoreProperties((PackagePropertiesPart)pkg.getPackageProperties() );
-
- // Extended properties
- PackageRelationshipCollection extRel =
- pkg.getRelationshipsByType(PackageRelationshipTypes.EXTENDED_PROPERTIES);
- if(extRel.size() == 1) {
- extPart = pkg.getPart( extRel.getRelationship(0));
- org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument props = org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument.Factory.parse(
- extPart.getInputStream(), DEFAULT_XML_OPTIONS
- );
- ext = new ExtendedProperties(props);
- } else {
- extPart = null;
- ext = new ExtendedProperties((org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument)NEW_EXT_INSTANCE.copy());
- }
-
- // Custom properties
- PackageRelationshipCollection custRel =
- pkg.getRelationshipsByType(PackageRelationshipTypes.CUSTOM_PROPERTIES);
- if(custRel.size() == 1) {
- custPart = pkg.getPart( custRel.getRelationship(0));
- org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument props = org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument.Factory.parse(
- custPart.getInputStream(), DEFAULT_XML_OPTIONS
- );
- cust = new CustomProperties(props);
- } else {
- custPart = null;
- cust = new CustomProperties((org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument)NEW_CUST_INSTANCE.copy());
- }
- }
-
- /**
- * Returns the core document properties
- *
- * @return the core document properties
- */
- public CoreProperties getCoreProperties() {
- return core;
- }
-
- /**
- * Returns the extended document properties
- *
- * @return the extended document properties
- */
- public ExtendedProperties getExtendedProperties() {
- return ext;
- }
-
- /**
- * Returns the custom document properties
- *
- * @return the custom document properties
- */
- public CustomProperties getCustomProperties() {
- return cust;
- }
-
- /**
- * Returns the {@link PackagePart} for the Document
- * Thumbnail, or <code>null</code> if there isn't one
- *
- * @return The Document Thumbnail part or null
- */
- protected PackagePart getThumbnailPart() {
- PackageRelationshipCollection rels =
- pkg.getRelationshipsByType(PackageRelationshipTypes.THUMBNAIL);
- if(rels.size() == 1) {
- return pkg.getPart(rels.getRelationship(0));
- }
- return null;
- }
- /**
- * Returns the name of the Document thumbnail, eg
- * <code>thumbnail.jpeg</code>, or <code>null</code> if there
- * isn't one.
- *
- * @return The thumbnail filename, or null
- */
- public String getThumbnailFilename() {
- PackagePart tPart = getThumbnailPart();
- if (tPart == null) return null;
- String name = tPart.getPartName().getName();
- return name.substring(name.lastIndexOf('/'));
- }
- /**
- * Returns the Document thumbnail image data, or {@code null} if there isn't one.
- *
- * @return The thumbnail data, or null
- *
- * @throws IOException if the thumbnail can't be read
- */
- public InputStream getThumbnailImage() throws IOException {
- PackagePart tPart = getThumbnailPart();
- if (tPart == null) return null;
- return tPart.getInputStream();
- }
-
- /**
- * Sets the Thumbnail for the document, replacing any existing one.
- *
- * @param filename The filename for the thumbnail image, eg {@code thumbnail.jpg}
- * @param imageData The inputstream to read the thumbnail image from
- *
- * @throws IOException if the thumbnail can't be written
- */
- public void setThumbnail(String filename, InputStream imageData) throws IOException {
- PackagePart tPart = getThumbnailPart();
- if (tPart == null) {
- // New thumbnail
- pkg.addThumbnail(filename, imageData);
- } else {
- // Change existing
- String newType = ContentTypes.getContentTypeFromFileExtension(filename);
- if (! newType.equals(tPart.getContentType())) {
- throw new IllegalArgumentException("Can't set a Thumbnail of type " +
- newType + " when existing one is of a different type " +
- tPart.getContentType());
- }
- StreamHelper.copyStream(imageData, tPart.getOutputStream());
- }
- }
-
- /**
- * Commit changes to the underlying OPC package
- *
- * @throws IOException if the properties can't be saved
- * @throws POIXMLException if the properties are erroneous
- */
- public void commit() throws IOException{
-
- if(extPart == null && !NEW_EXT_INSTANCE.toString().equals(ext.props.toString())){
- try {
- PackagePartName prtname = PackagingURIHelper.createPartName("/docProps/app.xml");
- pkg.addRelationship(prtname, TargetMode.INTERNAL, "http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties");
- extPart = pkg.createPart(prtname, "application/vnd.openxmlformats-officedocument.extended-properties+xml");
- } catch (InvalidFormatException e){
- throw new POIXMLException(e);
- }
- }
- if(custPart == null && !NEW_CUST_INSTANCE.toString().equals(cust.props.toString())){
- try {
- PackagePartName prtname = PackagingURIHelper.createPartName("/docProps/custom.xml");
- pkg.addRelationship(prtname, TargetMode.INTERNAL, "http://schemas.openxmlformats.org/officeDocument/2006/relationships/custom-properties");
- custPart = pkg.createPart(prtname, "application/vnd.openxmlformats-officedocument.custom-properties+xml");
- } catch (InvalidFormatException e){
- throw new POIXMLException(e);
- }
- }
- if(extPart != null){
- OutputStream out = extPart.getOutputStream();
- if (extPart.getSize() > 0) {
- extPart.clear();
- }
- ext.props.save(out, DEFAULT_XML_OPTIONS);
- out.close();
- }
- if(custPart != null){
- OutputStream out = custPart.getOutputStream();
- cust.props.save(out, DEFAULT_XML_OPTIONS);
- out.close();
- }
- }
-
- /**
- * The core document properties
- */
- public static class CoreProperties {
- private PackagePropertiesPart part;
- private CoreProperties(PackagePropertiesPart part) {
- this.part = part;
- }
-
- public String getCategory() {
- return part.getCategoryProperty().getValue();
- }
- public void setCategory(String category) {
- part.setCategoryProperty(category);
- }
- public String getContentStatus() {
- return part.getContentStatusProperty().getValue();
- }
- public void setContentStatus(String contentStatus) {
- part.setContentStatusProperty(contentStatus);
- }
- public String getContentType() {
- return part.getContentTypeProperty().getValue();
- }
- public void setContentType(String contentType) {
- part.setContentTypeProperty(contentType);
- }
- public Date getCreated() {
- return part.getCreatedProperty().getValue();
- }
- public void setCreated(Nullable<Date> date) {
- part.setCreatedProperty(date);
- }
- public void setCreated(String date) {
- part.setCreatedProperty(date);
- }
- public String getCreator() {
- return part.getCreatorProperty().getValue();
- }
- public void setCreator(String creator) {
- part.setCreatorProperty(creator);
- }
- public String getDescription() {
- return part.getDescriptionProperty().getValue();
- }
- public void setDescription(String description) {
- part.setDescriptionProperty(description);
- }
- public String getIdentifier() {
- return part.getIdentifierProperty().getValue();
- }
- public void setIdentifier(String identifier) {
- part.setIdentifierProperty(identifier);
- }
- public String getKeywords() {
- return part.getKeywordsProperty().getValue();
- }
- public void setKeywords(String keywords) {
- part.setKeywordsProperty(keywords);
- }
- public Date getLastPrinted() {
- return part.getLastPrintedProperty().getValue();
- }
- public void setLastPrinted(Nullable<Date> date) {
- part.setLastPrintedProperty(date);
- }
- public void setLastPrinted(String date) {
- part.setLastPrintedProperty(date);
- }
- /** @since POI 3.15 beta 3 */
- public String getLastModifiedByUser() {
- return part.getLastModifiedByProperty().getValue();
- }
- /** @since POI 3.15 beta 3 */
- public void setLastModifiedByUser(String user) {
- part.setLastModifiedByProperty(user);
- }
- public Date getModified() {
- return part.getModifiedProperty().getValue();
- }
- public void setModified(Nullable<Date> date) {
- part.setModifiedProperty(date);
- }
- public void setModified(String date) {
- part.setModifiedProperty(date);
- }
- public String getSubject() {
- return part.getSubjectProperty().getValue();
- }
- public void setSubjectProperty(String subject) {
- part.setSubjectProperty(subject);
- }
- public void setTitle(String title) {
- part.setTitleProperty(title);
- }
- public String getTitle() {
- return part.getTitleProperty().getValue();
- }
- public String getRevision() {
- return part.getRevisionProperty().getValue();
- }
- public void setRevision(String revision) {
- try {
- Long.valueOf(revision);
- part.setRevisionProperty(revision);
- }
- catch (NumberFormatException e) {}
- }
-
- public PackagePropertiesPart getUnderlyingProperties() {
- return part;
- }
- }
-
- /**
- * Extended document properties
- */
- public static class ExtendedProperties {
- private org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument props;
- private ExtendedProperties(org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument props) {
- this.props = props;
- }
-
- public org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties getUnderlyingProperties() {
- return props.getProperties();
- }
-
- public String getTemplate() {
- if (props.getProperties().isSetTemplate()) {
- return props.getProperties().getTemplate();
- }
- return null;
- }
- public String getManager() {
- if (props.getProperties().isSetManager()) {
- return props.getProperties().getManager();
- }
- return null;
- }
- public String getCompany() {
- if (props.getProperties().isSetCompany()) {
- return props.getProperties().getCompany();
- }
- return null;
- }
- public String getPresentationFormat() {
- if (props.getProperties().isSetPresentationFormat()) {
- return props.getProperties().getPresentationFormat();
- }
- return null;
- }
- public String getApplication() {
- if (props.getProperties().isSetApplication()) {
- return props.getProperties().getApplication();
- }
- return null;
- }
- public String getAppVersion() {
- if (props.getProperties().isSetAppVersion()) {
- return props.getProperties().getAppVersion();
- }
- return null;
- }
-
- public int getPages() {
- if (props.getProperties().isSetPages()) {
- return props.getProperties().getPages();
- }
- return -1;
- }
- public int getWords() {
- if (props.getProperties().isSetWords()) {
- return props.getProperties().getWords();
- }
- return -1;
- }
- public int getCharacters() {
- if (props.getProperties().isSetCharacters()) {
- return props.getProperties().getCharacters();
- }
- return -1;
- }
- public int getCharactersWithSpaces() {
- if (props.getProperties().isSetCharactersWithSpaces()) {
- return props.getProperties().getCharactersWithSpaces();
- }
- return -1;
- }
- public int getLines() {
- if (props.getProperties().isSetLines()) {
- return props.getProperties().getLines();
- }
- return -1;
- }
- public int getParagraphs() {
- if (props.getProperties().isSetParagraphs()) {
- return props.getProperties().getParagraphs();
- }
- return -1;
- }
- public int getSlides() {
- if (props.getProperties().isSetSlides()) {
- return props.getProperties().getSlides();
- }
- return -1;
- }
- public int getNotes() {
- if (props.getProperties().isSetNotes()) {
- return props.getProperties().getNotes();
- }
- return -1;
- }
- public int getTotalTime() {
- if (props.getProperties().isSetTotalTime()) {
- return props.getProperties().getTotalTime();
- }
- return -1;
- }
- public int getHiddenSlides() {
- if (props.getProperties().isSetHiddenSlides()) {
- return props.getProperties().getHiddenSlides();
- }
- return -1;
- }
- public int getMMClips() {
- if (props.getProperties().isSetMMClips()) {
- return props.getProperties().getMMClips();
- }
- return -1;
- }
-
- public String getHyperlinkBase() {
- if (props.getProperties().isSetHyperlinkBase()) {
- return props.getProperties().getHyperlinkBase();
- }
- return null;
- }
- }
-
- /**
- * Custom document properties
- */
- public static class CustomProperties {
- /**
- * Each custom property element contains an fmtid attribute
- * with the same GUID value ({D5CDD505-2E9C-101B-9397-08002B2CF9AE}).
- */
- public static final String FORMAT_ID = "{D5CDD505-2E9C-101B-9397-08002B2CF9AE}";
-
- private org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument props;
- private CustomProperties(org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument props) {
- this.props = props;
- }
-
- public org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperties getUnderlyingProperties() {
- return props.getProperties();
- }
-
- /**
- * Add a new property
- *
- * @param name the property name
- * @throws IllegalArgumentException if a property with this name already exists
- */
- private CTProperty add(String name) {
- if(contains(name)) {
- throw new IllegalArgumentException("A property with this name " +
- "already exists in the custom properties");
- }
-
- CTProperty p = props.getProperties().addNewProperty();
- int pid = nextPid();
- p.setPid(pid);
- p.setFmtid(FORMAT_ID);
- p.setName(name);
- return p;
- }
-
- /**
- * Add a new string property
- *
- * @param name the property name
- * @param value the property value
- *
- * @throws IllegalArgumentException if a property with this name already exists
- */
- public void addProperty(String name, String value){
- CTProperty p = add(name);
- p.setLpwstr(value);
- }
-
- /**
- * Add a new double property
- *
- * @param name the property name
- * @param value the property value
- *
- * @throws IllegalArgumentException if a property with this name already exists
- */
- public void addProperty(String name, double value){
- CTProperty p = add(name);
- p.setR8(value);
- }
-
- /**
- * Add a new integer property
- *
- * @param name the property name
- * @param value the property value
- *
- * @throws IllegalArgumentException if a property with this name already exists
- */
- public void addProperty(String name, int value){
- CTProperty p = add(name);
- p.setI4(value);
- }
-
- /**
- * Add a new boolean property
- *
- * @param name the property name
- * @param value the property value
- *
- * @throws IllegalArgumentException if a property with this name already exists
- */
- public void addProperty(String name, boolean value){
- CTProperty p = add(name);
- p.setBool(value);
- }
-
- /**
- * Generate next id that uniquely relates a custom property
- *
- * @return next property id starting with 2
- */
- protected int nextPid() {
- int propid = 1;
- for(CTProperty p : props.getProperties().getPropertyArray()){
- if(p.getPid() > propid) propid = p.getPid();
- }
- return propid + 1;
- }
-
- /**
- * Check if a property with this name already exists in the collection of custom properties
- *
- * @param name the name to check
- * @return whether a property with the given name exists in the custom properties
- */
- public boolean contains(String name) {
- for(CTProperty p : props.getProperties().getPropertyArray()){
- if(p.getName().equals(name)) return true;
- }
- return false;
- }
-
- /**
- * Retrieve the custom property with this name, or null if none exists.
- *
- * You will need to test the various isSetX methods to work out
- * what the type of the property is, before fetching the
- * appropriate value for it.
- *
- * @param name the name of the property to fetch
- *
- * @return the custom property with this name, or null if none exists
- */
- public CTProperty getProperty(String name) {
- for(CTProperty p : props.getProperties().getPropertyArray()){
- if(p.getName().equals(name)) {
- return p;
- }
- }
- return null;
- }
- }
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-
-package org.apache.poi;
-
-import java.math.BigDecimal;
-import java.text.DateFormat;
-import java.text.DateFormatSymbols;
-import java.text.SimpleDateFormat;
-import java.util.Date;
-import java.util.Locale;
-
-import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart;
-import org.apache.poi.util.LocaleUtil;
-import org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperty;
-
-/**
- * A {@link POITextExtractor} for returning the textual
- * content of the OOXML file properties, eg author
- * and title.
- */
-public class POIXMLPropertiesTextExtractor extends POIXMLTextExtractor {
-
- private final DateFormat dateFormat;
-
- /**
- * Creates a new POIXMLPropertiesTextExtractor for the given open document.
- *
- * @param doc the given open document
- */
- public POIXMLPropertiesTextExtractor(POIXMLDocument doc) {
- super(doc);
- DateFormatSymbols dfs = DateFormatSymbols.getInstance(Locale.ROOT);
- dateFormat = new SimpleDateFormat("EEE MMM dd HH:mm:ss zzz yyyy", dfs);
- dateFormat.setTimeZone(LocaleUtil.TIMEZONE_UTC);
- }
-
- /**
- * Creates a new POIXMLPropertiesTextExtractor, for the
- * same file that another TextExtractor is already
- * working on.
- *
- * @param otherExtractor the extractor referencing the given file
- */
- public POIXMLPropertiesTextExtractor(POIXMLTextExtractor otherExtractor) {
- this(otherExtractor.getDocument());
- }
-
- private void appendIfPresent(StringBuilder text, String thing, boolean value) {
- appendIfPresent(text, thing, Boolean.toString(value));
- }
-
- private void appendIfPresent(StringBuilder text, String thing, int value) {
- appendIfPresent(text, thing, Integer.toString(value));
- }
-
- private void appendIfPresent(StringBuilder text, String thing, Date value) {
- if (value == null) {
- return;
- }
- appendIfPresent(text, thing, dateFormat.format(value));
- }
-
- private void appendIfPresent(StringBuilder text, String thing, String value) {
- if (value == null) {
- return;
- }
- text.append(thing);
- text.append(" = ");
- text.append(value);
- text.append("\n");
- }
-
- /**
- * Returns the core document properties, eg author
- *
- * @return the core document properties
- */
- @SuppressWarnings("resource")
- public String getCorePropertiesText() {
- POIXMLDocument document = getDocument();
- if (document == null) { // event based extractor does not have a document
- return "";
- }
-
- StringBuilder text = new StringBuilder(64);
- PackagePropertiesPart props =
- document.getProperties().getCoreProperties().getUnderlyingProperties();
-
- appendIfPresent(text, "Category", props.getCategoryProperty().getValue());
- appendIfPresent(text, "Category", props.getCategoryProperty().getValue());
- appendIfPresent(text, "ContentStatus", props.getContentStatusProperty().getValue());
- appendIfPresent(text, "ContentType", props.getContentTypeProperty().getValue());
- appendIfPresent(text, "Created", props.getCreatedProperty().getValue());
- appendIfPresent(text, "CreatedString", props.getCreatedPropertyString());
- appendIfPresent(text, "Creator", props.getCreatorProperty().getValue());
- appendIfPresent(text, "Description", props.getDescriptionProperty().getValue());
- appendIfPresent(text, "Identifier", props.getIdentifierProperty().getValue());
- appendIfPresent(text, "Keywords", props.getKeywordsProperty().getValue());
- appendIfPresent(text, "Language", props.getLanguageProperty().getValue());
- appendIfPresent(text, "LastModifiedBy", props.getLastModifiedByProperty().getValue());
- appendIfPresent(text, "LastPrinted", props.getLastPrintedProperty().getValue());
- appendIfPresent(text, "LastPrintedString", props.getLastPrintedPropertyString());
- appendIfPresent(text, "Modified", props.getModifiedProperty().getValue());
- appendIfPresent(text, "ModifiedString", props.getModifiedPropertyString());
- appendIfPresent(text, "Revision", props.getRevisionProperty().getValue());
- appendIfPresent(text, "Subject", props.getSubjectProperty().getValue());
- appendIfPresent(text, "Title", props.getTitleProperty().getValue());
- appendIfPresent(text, "Version", props.getVersionProperty().getValue());
-
- return text.toString();
- }
-
- /**
- * Returns the extended document properties, eg application
- *
- * @return the extended document properties
- */
- @SuppressWarnings("resource")
- public String getExtendedPropertiesText() {
- POIXMLDocument document = getDocument();
- if (document == null) { // event based extractor does not have a document
- return "";
- }
-
- StringBuilder text = new StringBuilder(64);
- org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties
- props = document.getProperties().getExtendedProperties().getUnderlyingProperties();
-
- appendIfPresent(text, "Application", props.getApplication());
- appendIfPresent(text, "AppVersion", props.getAppVersion());
- appendIfPresent(text, "Characters", props.getCharacters());
- appendIfPresent(text, "CharactersWithSpaces", props.getCharactersWithSpaces());
- appendIfPresent(text, "Company", props.getCompany());
- appendIfPresent(text, "HyperlinkBase", props.getHyperlinkBase());
- appendIfPresent(text, "HyperlinksChanged", props.getHyperlinksChanged());
- appendIfPresent(text, "Lines", props.getLines());
- appendIfPresent(text, "LinksUpToDate", props.getLinksUpToDate());
- appendIfPresent(text, "Manager", props.getManager());
- appendIfPresent(text, "Pages", props.getPages());
- appendIfPresent(text, "Paragraphs", props.getParagraphs());
- appendIfPresent(text, "PresentationFormat", props.getPresentationFormat());
- appendIfPresent(text, "Template", props.getTemplate());
- appendIfPresent(text, "TotalTime", props.getTotalTime());
-
- return text.toString();
- }
-
- /**
- * Returns the custom document properties, if there are any
- *
- * @return the custom document properties
- */
- @SuppressWarnings({"resource"})
- public String getCustomPropertiesText() {
- POIXMLDocument document = getDocument();
- if (document == null) { // event based extractor does not have a document
- return "";
- }
-
- StringBuilder text = new StringBuilder();
- org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperties
- props = document.getProperties().getCustomProperties().getUnderlyingProperties();
-
- for (CTProperty property : props.getPropertyArray()) {
- String val = "(not implemented!)";
-
- if (property.isSetLpwstr()) {
- val = property.getLpwstr();
- } else if (property.isSetLpstr()) {
- val = property.getLpstr();
- } else if (property.isSetDate()) {
- val = property.getDate().toString();
- } else if (property.isSetFiletime()) {
- val = property.getFiletime().toString();
- } else if (property.isSetBool()) {
- val = Boolean.toString(property.getBool());
- }
-
- // Integers
- else if (property.isSetI1()) {
- val = Integer.toString(property.getI1());
- } else if (property.isSetI2()) {
- val = Integer.toString(property.getI2());
- } else if (property.isSetI4()) {
- val = Integer.toString(property.getI4());
- } else if (property.isSetI8()) {
- val = Long.toString(property.getI8());
- } else if (property.isSetInt()) {
- val = Integer.toString(property.getInt());
- }
-
- // Unsigned Integers
- else if (property.isSetUi1()) {
- val = Integer.toString(property.getUi1());
- } else if (property.isSetUi2()) {
- val = Integer.toString(property.getUi2());
- } else if (property.isSetUi4()) {
- val = Long.toString(property.getUi4());
- } else if (property.isSetUi8()) {
- val = property.getUi8().toString();
- } else if (property.isSetUint()) {
- val = Long.toString(property.getUint());
- }
-
- // Reals
- else if (property.isSetR4()) {
- val = Float.toString(property.getR4());
- } else if (property.isSetR8()) {
- val = Double.toString(property.getR8());
- } else if (property.isSetDecimal()) {
- BigDecimal d = property.getDecimal();
- if (d == null) {
- val = null;
- } else {
- val = d.toPlainString();
- }
- }
-
- /*else if (property.isSetArray()) {
- // TODO Fetch the array values and output
- }
- else if (property.isSetVector()) {
- // TODO Fetch the vector values and output
- }
-
- else if (property.isSetBlob() || property.isSetOblob()) {
- // TODO Decode, if possible
- }
- else if (property.isSetStream() || property.isSetOstream() ||
- property.isSetVstream()) {
- // TODO Decode, if possible
- }
- else if (property.isSetStorage() || property.isSetOstorage()) {
- // TODO Decode, if possible
- }*/
-
- text.append(property.getName()).append(" = ").append(val).append("\n");
- }
-
- return text.toString();
- }
-
- @Override
- public String getText() {
- try {
- return
- getCorePropertiesText() +
- getExtendedPropertiesText() +
- getCustomPropertiesText();
- } catch (Exception e) {
- throw new RuntimeException(e);
- }
- }
-
- @Override
- public POIXMLPropertiesTextExtractor getMetadataTextExtractor() {
- throw new IllegalStateException("You already have the Metadata Text Extractor, not recursing!");
- }
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.Iterator;
-
-import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
-import org.apache.poi.openxml4j.opc.PackagePart;
-import org.apache.poi.openxml4j.opc.PackagePartName;
-import org.apache.poi.openxml4j.opc.PackageRelationship;
-import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
-import org.apache.poi.openxml4j.opc.PackagingURIHelper;
-import org.apache.poi.util.POILogFactory;
-import org.apache.poi.util.POILogger;
-
-/**
- * Represents a descriptor of a OOXML relation.
- */
-public abstract class POIXMLRelation {
-
- private static final POILogger log = POILogFactory.getLogger(POIXMLRelation.class);
-
- /**
- * Describes the content stored in a part.
- */
- private String _type;
-
- /**
- * The kind of connection between a source part and a target part in a package.
- */
- private String _relation;
-
- /**
- * The path component of a pack URI.
- */
- private String _defaultName;
-
- /**
- * Defines what object is used to construct instances of this relationship
- */
- private Class<? extends POIXMLDocumentPart> _cls;
-
- /**
- * Instantiates a POIXMLRelation.
- *
- * @param type content type
- * @param rel relationship
- * @param defaultName default item name
- * @param cls defines what object is used to construct instances of this relationship
- */
- public POIXMLRelation(String type, String rel, String defaultName, Class<? extends POIXMLDocumentPart> cls) {
- _type = type;
- _relation = rel;
- _defaultName = defaultName;
- _cls = cls;
- }
-
- /**
- * Instantiates a POIXMLRelation.
- *
- * @param type content type
- * @param rel relationship
- * @param defaultName default item name
- */
- public POIXMLRelation(String type, String rel, String defaultName) {
- this(type, rel, defaultName, null);
- }
- /**
- * Return the content type. Content types define a media type, a subtype, and an
- * optional set of parameters, as defined in RFC 2616.
- *
- * @return the content type
- */
- public String getContentType() {
- return _type;
- }
-
- /**
- * Return the relationship, the kind of connection between a source part and a target part in a package.
- * Relationships make the connections between parts directly discoverable without looking at the content
- * in the parts, and without altering the parts themselves.
- *
- * @return the relationship
- */
- public String getRelation() {
- return _relation;
- }
-
- /**
- * Return the default part name. Part names are used to refer to a part in the context of a
- * package, typically as part of a URI.
- *
- * @return the default part name
- */
- public String getDefaultFileName() {
- return _defaultName;
- }
-
- /**
- * Returns the filename for the nth one of these, e.g. /xl/comments4.xml
- *
- * @param index the suffix for the document type
- * @return the filename including the suffix
- */
- public String getFileName(int index) {
- if(! _defaultName.contains("#")) {
- // Generic filename in all cases
- return getDefaultFileName();
- }
- return _defaultName.replace("#", Integer.toString(index));
- }
-
- /**
- * Returns the index of the filename within the package for the given part.
- * e.g. 4 for /xl/comments4.xml
- *
- * @param part the part to read the suffix from
- * @return the suffix
- */
- public Integer getFileNameIndex(POIXMLDocumentPart part) {
- String regex = _defaultName.replace("#", "(\\d+)");
- return Integer.valueOf(part.getPackagePart().getPartName().getName().replaceAll(regex, "$1"));
- }
-
- /**
- * Return type of the object used to construct instances of this relationship
- *
- * @return the class of the object used to construct instances of this relation
- */
- public Class<? extends POIXMLDocumentPart> getRelationClass(){
- return _cls;
- }
-
- /**
- * Fetches the InputStream to read the contents, based
- * of the specified core part, for which we are defined
- * as a suitable relationship
- *
- * @since 3.16-beta3
- */
- public InputStream getContents(PackagePart corePart) throws IOException, InvalidFormatException {
- PackageRelationshipCollection prc =
- corePart.getRelationshipsByType(getRelation());
- Iterator<PackageRelationship> it = prc.iterator();
- if(it.hasNext()) {
- PackageRelationship rel = it.next();
- PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI());
- PackagePart part = corePart.getPackage().getPart(relName);
- return part.getInputStream();
- }
- log.log(POILogger.WARN, "No part " + getDefaultFileName() + " found");
- return null;
- }
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-
-package org.apache.poi;
-
-import java.io.IOException;
-
-import org.apache.poi.POIXMLProperties.CoreProperties;
-import org.apache.poi.POIXMLProperties.CustomProperties;
-import org.apache.poi.POIXMLProperties.ExtendedProperties;
-import org.apache.poi.openxml4j.opc.OPCPackage;
-import org.apache.poi.openxml4j.util.ZipSecureFile;
-
-public abstract class POIXMLTextExtractor extends POITextExtractor {
- /** The POIXMLDocument that's open */
- private final POIXMLDocument _document;
-
- /**
- * Creates a new text extractor for the given document
- *
- * @param document the document to extract from
- */
- public POIXMLTextExtractor(POIXMLDocument document) {
- _document = document;
- }
-
- /**
- * Returns the core document properties
- *
- * @return the core document properties
- */
- public CoreProperties getCoreProperties() {
- return _document.getProperties().getCoreProperties();
- }
- /**
- * Returns the extended document properties
- *
- * @return the extended document properties
- */
- public ExtendedProperties getExtendedProperties() {
- return _document.getProperties().getExtendedProperties();
- }
- /**
- * Returns the custom document properties
- *
- * @return the custom document properties
- */
- public CustomProperties getCustomProperties() {
- return _document.getProperties().getCustomProperties();
- }
-
- /**
- * Returns opened document
- *
- * @return the opened document
- */
- @Override
- public final POIXMLDocument getDocument() {
- return _document;
- }
-
- /**
- * Returns the opened OPCPackage that contains the document
- *
- * @return the opened OPCPackage
- */
- public OPCPackage getPackage() {
- return _document.getPackage();
- }
-
- /**
- * Returns an OOXML properties text extractor for the
- * document properties metadata, such as title and author.
- */
- @Override
- public POIXMLPropertiesTextExtractor getMetadataTextExtractor() {
- return new POIXMLPropertiesTextExtractor(_document);
- }
-
- @Override
- public void close() throws IOException {
- // e.g. XSSFEventBaseExcelExtractor passes a null-document
- if(_document != null) {
- @SuppressWarnings("resource")
- OPCPackage pkg = _document.getPackage();
- if(pkg != null) {
- // revert the package to not re-write the file, which is very likely not wanted for a TextExtractor!
- pkg.revert();
- }
- }
- super.close();
- }
-
- protected void checkMaxTextSize(CharSequence text, String string) {
- if(string == null) {
- return;
- }
-
- int size = text.length() + string.length();
- if(size > ZipSecureFile.getMaxTextSize()) {
- throw new IllegalStateException("The text would exceed the max allowed overall size of extracted text. "
- + "By default this is prevented as some documents may exhaust available memory and it may indicate that the file is used to inflate memory usage and thus could pose a security risk. "
- + "You can adjust this limit via ZipSecureFile.setMaxTextSize() if you need to work with files which have a lot of text. "
- + "Size: " + size + ", limit: MAX_TEXT_SIZE: " + ZipSecureFile.getMaxTextSize());
- }
- }
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-
-package org.apache.poi;
-
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.Reader;
-import java.io.StringReader;
-import java.lang.ref.WeakReference;
-import java.net.URL;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.Map;
-
-import javax.xml.stream.XMLStreamReader;
-
-import org.apache.poi.openxml4j.opc.PackageNamespaces;
-import org.apache.poi.util.DocumentHelper;
-import org.apache.poi.util.Removal;
-import org.apache.xmlbeans.SchemaType;
-import org.apache.xmlbeans.SchemaTypeLoader;
-import org.apache.xmlbeans.XmlBeans;
-import org.apache.xmlbeans.XmlException;
-import org.apache.xmlbeans.XmlObject;
-import org.apache.xmlbeans.XmlOptions;
-import org.apache.xmlbeans.xml.stream.XMLInputStream;
-import org.apache.xmlbeans.xml.stream.XMLStreamException;
-import org.w3c.dom.Document;
-import org.w3c.dom.Node;
-import org.xml.sax.InputSource;
-import org.xml.sax.SAXException;
-
-@SuppressWarnings("deprecation")
-public class POIXMLTypeLoader {
-
- private static ThreadLocal<SchemaTypeLoader> typeLoader = new ThreadLocal<>();
-
- // TODO: Do these have a good home like o.a.p.openxml4j.opc.PackageNamespaces and PackageRelationshipTypes?
- // These constants should be common to all of POI and easy to use by other applications such as Tika
- private static final String MS_OFFICE_URN = "urn:schemas-microsoft-com:office:office";
- private static final String MS_EXCEL_URN = "urn:schemas-microsoft-com:office:excel";
- private static final String MS_WORD_URN = "urn:schemas-microsoft-com:office:word";
- private static final String MS_VML_URN = "urn:schemas-microsoft-com:vml";
-
- public static final XmlOptions DEFAULT_XML_OPTIONS;
- static {
- DEFAULT_XML_OPTIONS = new XmlOptions();
- DEFAULT_XML_OPTIONS.setSaveOuter();
- DEFAULT_XML_OPTIONS.setUseDefaultNamespace();
- DEFAULT_XML_OPTIONS.setSaveAggressiveNamespaces();
- DEFAULT_XML_OPTIONS.setCharacterEncoding("UTF-8");
- // Piccolo is disabled for POI builts, i.e. JAXP is used for parsing
- // so only user code using XmlObject/XmlToken.Factory.parse
- // directly can bypass the entity check, which is probably unlikely (... and not within our responsibility :))
- // DEFAULT_XML_OPTIONS.setLoadEntityBytesLimit(4096);
-
- // POI is not thread-safe - so we can switch to unsynchronized xmlbeans mode - see #61350
- // Update: disabled again for now as it caused strange NPEs and other problems
- // when reading properties in separate workbooks in multiple threads
- // DEFAULT_XML_OPTIONS.setUnsynchronized();
-
- Map<String, String> map = new HashMap<>();
- map.put("http://schemas.openxmlformats.org/drawingml/2006/main", "a");
- map.put("http://schemas.openxmlformats.org/drawingml/2006/chart", "c");
- map.put("http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing", "wp");
- map.put(PackageNamespaces.MARKUP_COMPATIBILITY, "ve");
- map.put("http://schemas.openxmlformats.org/officeDocument/2006/math", "m");
- map.put("http://schemas.openxmlformats.org/officeDocument/2006/relationships", "r");
- map.put("http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes", "vt");
- map.put("http://schemas.openxmlformats.org/presentationml/2006/main", "p");
- map.put("http://schemas.openxmlformats.org/wordprocessingml/2006/main", "w");
- map.put("http://schemas.microsoft.com/office/word/2006/wordml", "wne");
- map.put(MS_OFFICE_URN, "o");
- map.put(MS_EXCEL_URN, "x");
- map.put(MS_WORD_URN, "w10");
- map.put(MS_VML_URN, "v");
- DEFAULT_XML_OPTIONS.setSaveSuggestedPrefixes(Collections.unmodifiableMap(map));
- }
-
- private static XmlOptions getXmlOptions(XmlOptions options) {
- return options == null ? DEFAULT_XML_OPTIONS : options;
- }
-
- private static SchemaTypeLoader getTypeLoader(SchemaType type) {
- SchemaTypeLoader tl = typeLoader.get();
- if (tl == null) {
- ClassLoader cl = type.getClass().getClassLoader();
- tl = XmlBeans.typeLoaderForClassLoader(cl);
- typeLoader.set(tl);
- }
- return tl;
- }
-
- public static XmlObject newInstance(SchemaType type, XmlOptions options) {
- return getTypeLoader(type).newInstance(type, getXmlOptions(options));
- }
-
- public static XmlObject parse(String xmlText, SchemaType type, XmlOptions options) throws XmlException {
- try {
- return parse(new StringReader(xmlText), type, options);
- } catch (IOException e) {
- throw new XmlException("Unable to parse xml bean", e);
- }
- }
-
- public static XmlObject parse(File file, SchemaType type, XmlOptions options) throws XmlException, IOException {
- try (InputStream is = new FileInputStream(file)) {
- return parse(is, type, options);
- }
- }
-
- public static XmlObject parse(URL file, SchemaType type, XmlOptions options) throws XmlException, IOException {
- try (InputStream is = file.openStream()) {
- return parse(is, type, options);
- }
- }
-
- public static XmlObject parse(InputStream jiois, SchemaType type, XmlOptions options) throws XmlException, IOException {
- try {
- Document doc = DocumentHelper.readDocument(jiois);
- return getTypeLoader(type).parse(doc.getDocumentElement(), type, getXmlOptions(options));
- } catch (SAXException e) {
- throw new IOException("Unable to parse xml bean", e);
- }
- }
-
- public static XmlObject parse(XMLStreamReader xsr, SchemaType type, XmlOptions options) throws XmlException {
- return getTypeLoader(type).parse(xsr, type, getXmlOptions(options));
- }
-
- public static XmlObject parse(Reader jior, SchemaType type, XmlOptions options) throws XmlException, IOException {
- try {
- Document doc = DocumentHelper.readDocument(new InputSource(jior));
- return getTypeLoader(type).parse(doc.getDocumentElement(), type, getXmlOptions(options));
- } catch (SAXException e) {
- throw new XmlException("Unable to parse xml bean", e);
- }
- }
-
- public static XmlObject parse(Node node, SchemaType type, XmlOptions options) throws XmlException {
- return getTypeLoader(type).parse(node, type, getXmlOptions(options));
- }
-
- public static XmlObject parse(XMLInputStream xis, SchemaType type, XmlOptions options) throws XmlException, XMLStreamException {
- return getTypeLoader(type).parse(xis, type, getXmlOptions(options));
- }
-
- public static XMLInputStream newValidatingXMLInputStream ( XMLInputStream xis, SchemaType type, XmlOptions options ) throws XmlException, XMLStreamException {
- return getTypeLoader(type).newValidatingXMLInputStream(xis, type, getXmlOptions(options));
- }
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.dev;
-
-import java.io.*;
-import java.util.ArrayList;
-
-import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
-import org.apache.poi.openxml4j.opc.OPCPackage;
-import org.apache.poi.openxml4j.opc.PackageAccess;
-import org.apache.poi.openxml4j.opc.PackagePart;
-import org.apache.poi.openxml4j.opc.PackageRelationship;
-import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
-
-/**
- * Prints out the contents of a OOXML container.
- * Useful for seeing what parts are defined, and how
- * they're all related to each other.
- */
-public class OOXMLLister implements Closeable {
- private final OPCPackage container;
- private final PrintStream disp;
-
- public OOXMLLister(OPCPackage container) {
- this(container, System.out);
- }
- public OOXMLLister(OPCPackage container, PrintStream disp) {
- this.container = container;
- this.disp = disp;
- }
-
- /**
- * Figures out how big a given PackagePart is.
- *
- * @param part the PackagePart
- * @return the size of the PackagePart
- *
- * @throws IOException if the part can't be read
- */
- public static long getSize(PackagePart part) throws IOException {
- InputStream in = part.getInputStream();
- try {
- byte[] b = new byte[8192];
- long size = 0;
- int read = 0;
-
- while(read > -1) {
- read = in.read(b);
- if(read > 0) {
- size += read;
- }
- }
-
- return size;
- } finally {
- in.close();
- }
- }
-
- /**
- * Displays information on all the different
- * parts of the OOXML file container.
- * @throws InvalidFormatException if the package relations are invalid
- * @throws IOException if the package can't be read
- */
- public void displayParts() throws InvalidFormatException, IOException {
- ArrayList<PackagePart> parts = container.getParts();
- for (PackagePart part : parts) {
- disp.println(part.getPartName());
- disp.println("\t" + part.getContentType());
-
- if(! part.getPartName().toString().equals("/docProps/core.xml")) {
- disp.println("\t" + getSize(part) + " bytes");
- }
-
- if(! part.isRelationshipPart()) {
- disp.println("\t" + part.getRelationships().size() + " relations");
- for(PackageRelationship rel : part.getRelationships()) {
- displayRelation(rel, "\t ");
- }
- }
- }
- }
- /**
- * Displays information on all the different
- * relationships between different parts
- * of the OOXML file container.
- */
- public void displayRelations() {
- PackageRelationshipCollection rels =
- container.getRelationships();
- for (PackageRelationship rel : rels) {
- displayRelation(rel, "");
- }
- }
-
- private void displayRelation(PackageRelationship rel, String indent) {
- disp.println(indent+"Relationship:");
- disp.println(indent+"\tFrom: "+ rel.getSourceURI());
- disp.println(indent+"\tTo: " + rel.getTargetURI());
- disp.println(indent+"\tID: " + rel.getId());
- disp.println(indent+"\tMode: " + rel.getTargetMode());
- disp.println(indent+"\tType: " + rel.getRelationshipType());
- }
-
- @Override
- public void close() throws IOException {
- container.close();
- }
-
- public static void main(String[] args) throws IOException, InvalidFormatException {
- if(args.length == 0) {
- System.err.println("Use:");
- System.err.println("\tjava OOXMLLister <filename>");
- System.exit(1);
- }
-
- File f = new File(args[0]);
- if(! f.exists()) {
- System.err.println("Error, file not found!");
- System.err.println("\t" + f);
- System.exit(2);
- }
-
- OOXMLLister lister = new OOXMLLister(
- OPCPackage.open(f.toString(), PackageAccess.READ)
- );
-
- try {
- lister.disp.println(f + "\n");
- lister.displayParts();
- lister.disp.println();
- lister.displayRelations();
- } finally {
- lister.close();
- }
- }
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.dev;
-
-import java.io.BufferedOutputStream;
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.OutputStream;
-import java.util.Enumeration;
-import java.util.zip.ZipEntry;
-import java.util.zip.ZipException;
-import java.util.zip.ZipFile;
-import java.util.zip.ZipOutputStream;
-
-import javax.xml.parsers.DocumentBuilder;
-import javax.xml.parsers.DocumentBuilderFactory;
-import javax.xml.parsers.ParserConfigurationException;
-import javax.xml.transform.OutputKeys;
-import javax.xml.transform.Result;
-import javax.xml.transform.Source;
-import javax.xml.transform.Transformer;
-import javax.xml.transform.TransformerException;
-import javax.xml.transform.TransformerFactory;
-import javax.xml.transform.dom.DOMSource;
-import javax.xml.transform.stream.StreamResult;
-
-import org.apache.poi.openxml4j.opc.internal.ZipHelper;
-import org.apache.poi.openxml4j.util.ZipSecureFile;
-import org.apache.poi.util.IOUtils;
-import org.w3c.dom.Document;
-import org.xml.sax.InputSource;
-
-/**
- * Reads a zipped OOXML file and produces a copy with the included
- * pretty-printed XML files.
- *
- * This is useful for comparing OOXML files produced by different tools as the often
- * use different formatting of the XML.
- */
-public class OOXMLPrettyPrint {
- private final DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
- private final DocumentBuilder documentBuilder;
-
- public OOXMLPrettyPrint() throws ParserConfigurationException {
- // allow files with much lower inflation rate here as there is no risk of Zip Bomb attacks in this developer tool
- ZipSecureFile.setMinInflateRatio(0.00001);
-
- documentBuilder = documentBuilderFactory.newDocumentBuilder();
- }
-
- public static void main(String[] args) throws Exception {
- if(args.length <= 1 || args.length % 2 != 0) {
- System.err.println("Use:");
- System.err.println("\tjava OOXMLPrettyPrint [<filename> <outfilename>] ...");
- System.exit(1);
- }
-
- for(int i = 0;i < args.length;i+=2) {
- File f = new File(args[i]);
- if(! f.exists()) {
- System.err.println("Error, file not found!");
- System.err.println("\t" + f);
- System.exit(2);
- }
-
- handleFile(f, new File(args[i+1]));
- }
- System.out.println("Done.");
- }
-
- private static void handleFile(File file, File outFile) throws ZipException,
- IOException, ParserConfigurationException {
- System.out.println("Reading zip-file " + file + " and writing pretty-printed XML to " + outFile);
-
- try (ZipFile zipFile = ZipHelper.openZipFile(file)) {
- try (ZipOutputStream out = new ZipOutputStream(new BufferedOutputStream(new FileOutputStream(outFile)))) {
- new OOXMLPrettyPrint().handle(zipFile, out);
- }
- } finally {
- System.out.println();
- }
- }
-
- private void handle(ZipFile file, ZipOutputStream out) throws IOException {
- Enumeration<? extends ZipEntry> entries = file.entries();
- while(entries.hasMoreElements()) {
- ZipEntry entry = entries.nextElement();
-
- String name = entry.getName();
- out.putNextEntry(new ZipEntry(name));
- try {
- if(name.endsWith(".xml") || name.endsWith(".rels")) {
- Document document = documentBuilder.parse(new InputSource(file.getInputStream(entry)));
- document.setXmlStandalone(true);
- pretty(document, out, 2);
- } else {
- System.out.println("Not pretty-printing non-XML file " + name);
- IOUtils.copy(file.getInputStream(entry), out);
- }
- } catch (Exception e) {
- throw new IOException("While handling entry " + name, e);
- } finally {
- out.closeEntry();
- }
- System.out.print(".");
- }
- }
-
- private static void pretty(Document document, OutputStream outputStream, int indent) throws TransformerException {
- TransformerFactory transformerFactory = TransformerFactory.newInstance();
- Transformer transformer = transformerFactory.newTransformer();
- transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
- if (indent > 0) {
- // set properties to indent the resulting XML nicely
- transformer.setOutputProperty(OutputKeys.INDENT, "yes");
- transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", Integer.toString(indent));
- }
- Result result = new StreamResult(outputStream);
- Source source = new DOMSource(document);
- transformer.transform(source, result);
- }
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.extractor;
-
-import java.io.File;
-
-import org.apache.poi.POITextExtractor;
-
-/**
- * A command line wrapper around {@link ExtractorFactory}, useful
- * for when debugging.
- */
-public class CommandLineTextExtractor {
- public static final String DIVIDER = "=======================";
-
- public static void main(String[] args) throws Exception {
- if(args.length < 1) {
- System.err.println("Use:");
- System.err.println(" CommandLineTextExtractor <filename> [filename] [filename]");
- System.exit(1);
- }
-
- for (String arg : args) {
- System.out.println(DIVIDER);
-
- File f = new File(arg);
- System.out.println(f);
-
- POITextExtractor extractor =
- ExtractorFactory.createExtractor(f);
- try {
- POITextExtractor metadataExtractor =
- extractor.getMetadataTextExtractor();
-
- System.out.println(" " + DIVIDER);
- String metaData = metadataExtractor.getText();
- System.out.println(metaData);
- System.out.println(" " + DIVIDER);
- String text = extractor.getText();
- System.out.println(text);
- System.out.println(DIVIDER);
- System.out.println("Had " + metaData.length() + " characters of metadata and " + text.length() + " characters of text");
- } finally {
- extractor.close();
- }
- }
- }
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.extractor;
-
-import java.io.ByteArrayInputStream;
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.ArrayList;
-import java.util.Iterator;
-
-import org.apache.poi.EncryptedDocumentException;
-import org.apache.poi.POIOLE2TextExtractor;
-import org.apache.poi.POITextExtractor;
-import org.apache.poi.POIXMLTextExtractor;
-import org.apache.poi.hsmf.MAPIMessage;
-import org.apache.poi.hsmf.datatypes.AttachmentChunks;
-import org.apache.poi.hsmf.extractor.OutlookTextExtactor;
-import org.apache.poi.hssf.extractor.ExcelExtractor;
-import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey;
-import org.apache.poi.hwpf.extractor.WordExtractor;
-import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
-import org.apache.poi.openxml4j.opc.OPCPackage;
-import org.apache.poi.openxml4j.opc.PackageAccess;
-import org.apache.poi.openxml4j.opc.PackagePart;
-import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
-import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
-import org.apache.poi.poifs.crypt.Decryptor;
-import org.apache.poi.poifs.crypt.EncryptionInfo;
-import org.apache.poi.poifs.filesystem.DirectoryEntry;
-import org.apache.poi.poifs.filesystem.DirectoryNode;
-import org.apache.poi.poifs.filesystem.Entry;
-import org.apache.poi.poifs.filesystem.FileMagic;
-import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
-import org.apache.poi.poifs.filesystem.NotOLE2FileException;
-import org.apache.poi.poifs.filesystem.OPOIFSFileSystem;
-import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
-import org.apache.poi.poifs.filesystem.POIFSFileSystem;
-import org.apache.poi.sl.extractor.SlideShowExtractor;
-import org.apache.poi.util.IOUtils;
-import org.apache.poi.util.NotImplemented;
-import org.apache.poi.util.POILogFactory;
-import org.apache.poi.util.POILogger;
-import org.apache.poi.util.Removal;
-import org.apache.poi.xdgf.extractor.XDGFVisioExtractor;
-import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
-import org.apache.poi.xslf.usermodel.XMLSlideShow;
-import org.apache.poi.xslf.usermodel.XSLFRelation;
-import org.apache.poi.xslf.usermodel.XSLFSlideShow;
-import org.apache.poi.xssf.extractor.XSSFBEventBasedExcelExtractor;
-import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor;
-import org.apache.poi.xssf.extractor.XSSFExcelExtractor;
-import org.apache.poi.xssf.usermodel.XSSFRelation;
-import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
-import org.apache.poi.xwpf.usermodel.XWPFRelation;
-import org.apache.xmlbeans.XmlException;
-
-/**
- * Figures out the correct POITextExtractor for your supplied
- * document, and returns it.
- *
- * <p>Note 1 - will fail for many file formats if the POI Scratchpad jar is
- * not present on the runtime classpath</p>
- * <p>Note 2 - rather than using this, for most cases you would be better
- * off switching to <a href="http://tika.apache.org">Apache Tika</a> instead!</p>
- */
-@SuppressWarnings("WeakerAccess")
-public class ExtractorFactory {
- private static final POILogger logger = POILogFactory.getLogger(ExtractorFactory.class);
-
- public static final String CORE_DOCUMENT_REL = PackageRelationshipTypes.CORE_DOCUMENT;
- protected static final String VISIO_DOCUMENT_REL = PackageRelationshipTypes.VISIO_CORE_DOCUMENT;
- protected static final String STRICT_DOCUMENT_REL = PackageRelationshipTypes.STRICT_CORE_DOCUMENT;
-
- /**
- * Should this thread prefer event based over usermodel based extractors?
- * (usermodel extractors tend to be more accurate, but use more memory)
- * Default is false.
- */
- public static boolean getThreadPrefersEventExtractors() {
- return OLE2ExtractorFactory.getThreadPrefersEventExtractors();
- }
-
- /**
- * Should all threads prefer event based over usermodel based extractors?
- * (usermodel extractors tend to be more accurate, but use more memory)
- * Default is to use the thread level setting, which defaults to false.
- */
- public static Boolean getAllThreadsPreferEventExtractors() {
- return OLE2ExtractorFactory.getAllThreadsPreferEventExtractors();
- }
-
- /**
- * Should this thread prefer event based over usermodel based extractors?
- * Will only be used if the All Threads setting is null.
- */
- public static void setThreadPrefersEventExtractors(boolean preferEventExtractors) {
- OLE2ExtractorFactory.setThreadPrefersEventExtractors(preferEventExtractors);
- }
-
- /**
- * Should all threads prefer event based over usermodel based extractors?
- * If set, will take preference over the Thread level setting.
- */
- public static void setAllThreadsPreferEventExtractors(Boolean preferEventExtractors) {
- OLE2ExtractorFactory.setAllThreadsPreferEventExtractors(preferEventExtractors);
- }
-
- /**
- * Should this thread use event based extractors is available?
- * Checks the all-threads one first, then thread specific.
- */
- protected static boolean getPreferEventExtractor() {
- return OLE2ExtractorFactory.getPreferEventExtractor();
- }
-
- public static <T extends POITextExtractor> T createExtractor(File f) throws IOException, OpenXML4JException, XmlException {
- NPOIFSFileSystem fs = null;
- try {
- fs = new NPOIFSFileSystem(f);
- if (fs.getRoot().hasEntry(Decryptor.DEFAULT_POIFS_ENTRY)) {
- return (T)createEncryptedOOXMLExtractor(fs);
- }
- POITextExtractor extractor = createExtractor(fs);
- extractor.setFilesystem(fs);
- return (T)extractor;
- } catch (OfficeXmlFileException e) {
- // ensure file-handle release
- IOUtils.closeQuietly(fs);
- return (T)createExtractor(OPCPackage.open(f.toString(), PackageAccess.READ));
- } catch (NotOLE2FileException ne) {
- // ensure file-handle release
- IOUtils.closeQuietly(fs);
- throw new IllegalArgumentException("Your File was neither an OLE2 file, nor an OOXML file");
- } catch (OpenXML4JException | Error | RuntimeException | IOException | XmlException e) {
- // ensure file-handle release
- IOUtils.closeQuietly(fs);
- throw e;
- }
- }
-
- public static POITextExtractor createExtractor(InputStream inp) throws IOException, OpenXML4JException, XmlException {
- InputStream is = FileMagic.prepareToCheckMagic(inp);
-
- FileMagic fm = FileMagic.valueOf(is);
-
- switch (fm) {
- case OLE2:
- NPOIFSFileSystem fs = new NPOIFSFileSystem(is);
- boolean isEncrypted = fs.getRoot().hasEntry(Decryptor.DEFAULT_POIFS_ENTRY);
- return isEncrypted ? createEncryptedOOXMLExtractor(fs) : createExtractor(fs);
- case OOXML:
- return createExtractor(OPCPackage.open(is));
- default:
- throw new IllegalArgumentException("Your InputStream was neither an OLE2 stream, nor an OOXML stream");
- }
- }
-
- /**
- * Tries to determine the actual type of file and produces a matching text-extractor for it.
- *
- * @param pkg An {@link OPCPackage}.
- * @return A {@link POIXMLTextExtractor} for the given file.
- * @throws IOException If an error occurs while reading the file
- * @throws OpenXML4JException If an error parsing the OpenXML file format is found.
- * @throws XmlException If an XML parsing error occurs.
- * @throws IllegalArgumentException If no matching file type could be found.
- */
- public static POITextExtractor createExtractor(OPCPackage pkg) throws IOException, OpenXML4JException, XmlException {
- try {
- // Check for the normal Office core document
- PackageRelationshipCollection core;
- core = pkg.getRelationshipsByType(CORE_DOCUMENT_REL);
-
- // If nothing was found, try some of the other OOXML-based core types
- if (core.size() == 0) {
- // Could it be an OOXML-Strict one?
- core = pkg.getRelationshipsByType(STRICT_DOCUMENT_REL);
- }
- if (core.size() == 0) {
- // Could it be a visio one?
- core = pkg.getRelationshipsByType(VISIO_DOCUMENT_REL);
- if (core.size() == 1)
- return new XDGFVisioExtractor(pkg);
- }
-
- // Should just be a single core document, complain if not
- if (core.size() != 1) {
- throw new IllegalArgumentException("Invalid OOXML Package received - expected 1 core document, found " + core.size());
- }
-
- // Grab the core document part, and try to identify from that
- final PackagePart corePart = pkg.getPart(core.getRelationship(0));
- final String contentType = corePart.getContentType();
-
- // Is it XSSF?
- for (XSSFRelation rel : XSSFExcelExtractor.SUPPORTED_TYPES) {
- if ( rel.getContentType().equals( contentType ) ) {
- if (getPreferEventExtractor()) {
- return new XSSFEventBasedExcelExtractor(pkg);
- }
- return new XSSFExcelExtractor(pkg);
- }
- }
-
- // Is it XWPF?
- for (XWPFRelation rel : XWPFWordExtractor.SUPPORTED_TYPES) {
- if ( rel.getContentType().equals( contentType ) ) {
- return new XWPFWordExtractor(pkg);
- }
- }
-
- // Is it XSLF?
- for (XSLFRelation rel : XSLFPowerPointExtractor.SUPPORTED_TYPES) {
- if ( rel.getContentType().equals( contentType ) ) {
- return new SlideShowExtractor(new XMLSlideShow(pkg));
- }
- }
-
- // special handling for SlideShow-Theme-files,
- if (XSLFRelation.THEME_MANAGER.getContentType().equals(contentType)) {
- return new SlideShowExtractor(new XMLSlideShow(pkg));
- }
-
- // How about xlsb?
- for (XSSFRelation rel : XSSFBEventBasedExcelExtractor.SUPPORTED_TYPES) {
- if (rel.getContentType().equals(contentType)) {
- return new XSSFBEventBasedExcelExtractor(pkg);
- }
- }
-
- throw new IllegalArgumentException("No supported documents found in the OOXML package (found "+contentType+")");
-
- } catch (IOException | Error | RuntimeException | XmlException | OpenXML4JException e) {
- // ensure that we close the package again if there is an error opening it, however
- // we need to revert the package to not re-write the file via close(), which is very likely not wanted for a TextExtractor!
- pkg.revert();
- throw e;
- }
- }
-
- public static <T extends POITextExtractor> T createExtractor(POIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
- return createExtractor(fs.getRoot());
- }
- public static <T extends POITextExtractor> T createExtractor(NPOIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
- return createExtractor(fs.getRoot());
- }
- public static <T extends POITextExtractor> T createExtractor(OPOIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
- return createExtractor(fs.getRoot());
- }
-
- public static <T extends POITextExtractor> T createExtractor(DirectoryNode poifsDir) throws IOException, OpenXML4JException, XmlException
- {
- // First, check for OOXML
- for (String entryName : poifsDir.getEntryNames()) {
- if (entryName.equals("Package")) {
- OPCPackage pkg = OPCPackage.open(poifsDir.createDocumentInputStream("Package"));
- return (T)createExtractor(pkg);
- }
- }
-
- // If not, ask the OLE2 code to check, with Scratchpad if possible
- return (T)OLE2ExtractorFactory.createExtractor(poifsDir);
- }
-
- /**
- * Returns an array of text extractors, one for each of
- * the embedded documents in the file (if there are any).
- * If there are no embedded documents, you'll get back an
- * empty array. Otherwise, you'll get one open
- * {@link POITextExtractor} for each embedded file.
- *
- * @deprecated Use the method with correct "embedded"
- */
- @Deprecated
- @Removal(version="4.2")
- public static POITextExtractor[] getEmbededDocsTextExtractors(POIOLE2TextExtractor ext) throws IOException, OpenXML4JException, XmlException {
- return getEmbeddedDocsTextExtractors(ext);
- }
-
- /**
- * Returns an array of text extractors, one for each of
- * the embedded documents in the file (if there are any).
- * If there are no embedded documents, you'll get back an
- * empty array. Otherwise, you'll get one open
- * {@link POITextExtractor} for each embedded file.
- */
- public static POITextExtractor[] getEmbeddedDocsTextExtractors(POIOLE2TextExtractor ext) throws IOException, OpenXML4JException, XmlException {
- // All the embedded directories we spotted
- ArrayList<Entry> dirs = new ArrayList<>();
- // For anything else not directly held in as a POIFS directory
- ArrayList<InputStream> nonPOIFS = new ArrayList<>();
-
- // Find all the embedded directories
- DirectoryEntry root = ext.getRoot();
- if (root == null) {
- throw new IllegalStateException("The extractor didn't know which POIFS it came from!");
- }
-
- if (ext instanceof ExcelExtractor) {
- // These are in MBD... under the root
- Iterator<Entry> it = root.getEntries();
- while (it.hasNext()) {
- Entry entry = it.next();
- if (entry.getName().startsWith("MBD")) {
- dirs.add(entry);
- }
- }
- } else if (ext instanceof WordExtractor) {
- // These are in ObjectPool -> _... under the root
- try {
- DirectoryEntry op = (DirectoryEntry) root.getEntry("ObjectPool");
- Iterator<Entry> it = op.getEntries();
- while (it.hasNext()) {
- Entry entry = it.next();
- if (entry.getName().startsWith("_")) {
- dirs.add(entry);
- }
- }
- } catch (FileNotFoundException e) {
- logger.log(POILogger.INFO, "Ignoring FileNotFoundException while extracting Word document", e.getLocalizedMessage());
- // ignored here
- }
- //} else if(ext instanceof PowerPointExtractor) {
- // Tricky, not stored directly in poifs
- // TODO
- } else if (ext instanceof OutlookTextExtactor) {
- // Stored in the Attachment blocks
- MAPIMessage msg = ((OutlookTextExtactor)ext).getMAPIMessage();
- for (AttachmentChunks attachment : msg.getAttachmentFiles()) {
- if (attachment.getAttachData() != null) {
- byte[] data = attachment.getAttachData().getValue();
- nonPOIFS.add( new ByteArrayInputStream(data) );
- } else if (attachment.getAttachmentDirectory() != null) {
- dirs.add(attachment.getAttachmentDirectory().getDirectory());
- }
- }
- }
-
- // Create the extractors
- if (dirs.size() == 0 && nonPOIFS.size() == 0){
- return new POITextExtractor[0];
- }
-
- ArrayList<POITextExtractor> textExtractors = new ArrayList<>();
- for (Entry dir : dirs) {
- textExtractors.add(createExtractor((DirectoryNode) dir));
- }
- for (InputStream nonPOIF : nonPOIFS) {
- try {
- textExtractors.add(createExtractor(nonPOIF));
- } catch (IllegalArgumentException e) {
- // Ignore, just means it didn't contain
- // a format we support as yet
- logger.log(POILogger.INFO, "Format not supported yet", e.getLocalizedMessage());
- } catch (XmlException | OpenXML4JException e) {
- throw new IOException(e.getMessage(), e);
- }
- }
- return textExtractors.toArray(new POITextExtractor[textExtractors.size()]);
- }
-
- /**
- * Returns an array of text extractors, one for each of
- * the embedded documents in the file (if there are any).
- * If there are no embedded documents, you'll get back an
- * empty array. Otherwise, you'll get one open
- * {@link POITextExtractor} for each embedded file.
- *
- * @deprecated Use the method with correct "embedded"
- */
- @Deprecated
- @Removal(version="4.2")
- @NotImplemented
- @SuppressWarnings({"UnusedParameters", "UnusedReturnValue"})
- public static POITextExtractor[] getEmbededDocsTextExtractors(POIXMLTextExtractor ext) {
- return getEmbeddedDocsTextExtractors(ext);
- }
-
- /**
- * Returns an array of text extractors, one for each of
- * the embedded documents in the file (if there are any).
- * If there are no embedded documents, you'll get back an
- * empty array. Otherwise, you'll get one open
- * {@link POITextExtractor} for each embedded file.
- */
- @NotImplemented
- @SuppressWarnings({"UnusedParameters", "UnusedReturnValue"})
- public static POITextExtractor[] getEmbeddedDocsTextExtractors(POIXMLTextExtractor ext) {
- throw new IllegalStateException("Not yet supported");
- }
-
- private static POITextExtractor createEncryptedOOXMLExtractor(NPOIFSFileSystem fs)
- throws IOException {
- String pass = Biff8EncryptionKey.getCurrentUserPassword();
- if (pass == null) {
- pass = Decryptor.DEFAULT_PASSWORD;
- }
-
- EncryptionInfo ei = new EncryptionInfo(fs);
- Decryptor dec = ei.getDecryptor();
- InputStream is = null;
- try {
- if (!dec.verifyPassword(pass)) {
- throw new EncryptedDocumentException("Invalid password specified - use Biff8EncryptionKey.setCurrentUserPassword() before calling extractor");
- }
- is = dec.getDataStream(fs);
- return createExtractor(OPCPackage.open(is));
- } catch (IOException e) {
- throw e;
- } catch (Exception e) {
- throw new EncryptedDocumentException(e);
- } finally {
- IOUtils.closeQuietly(is);
-
- // also close the NPOIFSFileSystem here as we read all the data
- // while decrypting
- fs.close();
- }
- }
-}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.ooxml;
+
+import java.io.Closeable;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
+import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.openxml4j.opc.PackageAccess;
+import org.apache.poi.openxml4j.opc.PackagePart;
+import org.apache.poi.openxml4j.opc.PackageRelationship;
+import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
+import org.apache.xmlbeans.impl.common.SystemCache;
+
+/**
+ * This holds the common functionality for all POI OOXML Document classes.
+ */
+public abstract class POIXMLDocument extends POIXMLDocumentPart implements Closeable {
+ public static final String DOCUMENT_CREATOR = "Apache POI";
+
+ // OLE embeddings relation name
+ public static final String OLE_OBJECT_REL_TYPE="http://schemas.openxmlformats.org/officeDocument/2006/relationships/oleObject";
+
+ // Embedded OPC documents relation name
+ public static final String PACK_OBJECT_REL_TYPE="http://schemas.openxmlformats.org/officeDocument/2006/relationships/package";
+
+ /** The OPC Package */
+ private OPCPackage pkg;
+
+ /**
+ * The properties of the OPC package, opened as needed
+ */
+ private POIXMLProperties properties;
+
+ protected POIXMLDocument(OPCPackage pkg) {
+ super(pkg);
+ init(pkg);
+ }
+
+ protected POIXMLDocument(OPCPackage pkg, String coreDocumentRel) {
+ super(pkg, coreDocumentRel);
+ init(pkg);
+ }
+
+ private void init(OPCPackage p) {
+ this.pkg = p;
+
+ // Workaround for XMLBEANS-512 - ensure that when we parse
+ // the file, we start with a fresh XML Parser each time,
+ // and avoid the risk of getting a SaxHandler that's in error
+ SystemCache.get().setSaxLoader(null);
+ }
+
+ /**
+ * Wrapper to open a package, which works around shortcomings in java's this() constructor calls
+ *
+ * @param path the path to the document
+ * @return the new OPCPackage
+ *
+ * @exception IOException if there was a problem opening the document
+ */
+ public static OPCPackage openPackage(String path) throws IOException {
+ try {
+ return OPCPackage.open(path);
+ } catch (InvalidFormatException e) {
+ throw new IOException(e.toString(), e);
+ }
+ }
+
+ /**
+ * Get the assigned OPCPackage
+ *
+ * @return the assigned OPCPackage
+ */
+ public OPCPackage getPackage() {
+ return this.pkg;
+ }
+
+ protected PackagePart getCorePart() {
+ return getPackagePart();
+ }
+
+ /**
+ * Retrieves all the PackageParts which are defined as relationships of the base document with the
+ * specified content type.
+ *
+ * @param contentType the content type
+ *
+ * @return all the base document PackageParts which match the content type
+ *
+ * @throws InvalidFormatException when the relationships or the parts contain errors
+ *
+ * @see org.apache.poi.xssf.usermodel.XSSFRelation
+ * @see org.apache.poi.xslf.usermodel.XSLFRelation
+ * @see org.apache.poi.xwpf.usermodel.XWPFRelation
+ * @see org.apache.poi.xdgf.usermodel.XDGFRelation
+ */
+ protected PackagePart[] getRelatedByType(String contentType) throws InvalidFormatException {
+ PackageRelationshipCollection partsC =
+ getPackagePart().getRelationshipsByType(contentType);
+
+ PackagePart[] parts = new PackagePart[partsC.size()];
+ int count = 0;
+ for (PackageRelationship rel : partsC) {
+ parts[count] = getPackagePart().getRelatedPart(rel);
+ count++;
+ }
+ return parts;
+ }
+
+ /**
+ * Get the document properties. This gives you access to the
+ * core ooxml properties, and the extended ooxml properties.
+ *
+ * @return the document properties
+ */
+ public POIXMLProperties getProperties() {
+ if(properties == null) {
+ try {
+ properties = new POIXMLProperties(pkg);
+ } catch (Exception e){
+ throw new POIXMLException(e);
+ }
+ }
+ return properties;
+ }
+
+ /**
+ * Get the document's embedded files.
+ *
+ * @return the document's embedded files
+ *
+ * @throws OpenXML4JException if the embedded parts can't be determined
+ */
+ public abstract List<PackagePart> getAllEmbedds() throws OpenXML4JException;
+
+ protected final void load(POIXMLFactory factory) throws IOException {
+ Map<PackagePart, POIXMLDocumentPart> context = new HashMap<>();
+ try {
+ read(factory, context);
+ } catch (OpenXML4JException e){
+ throw new POIXMLException(e);
+ }
+ onDocumentRead();
+ context.clear();
+ }
+
+ /**
+ * Closes the underlying {@link OPCPackage} from which this
+ * document was read, if there is one
+ *
+ * <p>Once this has been called, no further
+ * operations, updates or reads should be performed on the
+ * document.
+ *
+ * @throws IOException for writable packages, if an IO exception occur during the saving process.
+ */
+ @Override
+ public void close() throws IOException {
+ if (pkg != null) {
+ if (pkg.getPackageAccess() == PackageAccess.READ) {
+ pkg.revert();
+ } else {
+ pkg.close();
+ }
+ pkg = null;
+ }
+ }
+
+ /**
+ * Write out this document to an Outputstream.
+ *
+ * Note - if the Document was opened from a {@link File} rather
+ * than an {@link InputStream}, you <b>must</b> write out to
+ * a different file, overwriting via an OutputStream isn't possible.
+ *
+ * If {@code stream} is a {@link java.io.FileOutputStream} on a networked drive
+ * or has a high cost/latency associated with each written byte,
+ * consider wrapping the OutputStream in a {@link java.io.BufferedOutputStream}
+ * to improve write performance.
+ *
+ * @param stream - the java OutputStream you wish to write the file to
+ *
+ * @exception IOException if anything can't be written.
+ */
+ @SuppressWarnings("resource")
+ public final void write(OutputStream stream) throws IOException {
+ OPCPackage p = getPackage();
+ if(p == null) {
+ throw new IOException("Cannot write data, document seems to have been closed already");
+ }
+
+ //force all children to commit their changes into the underlying OOXML Package
+ // TODO Shouldn't they be committing to the new one instead?
+ Set<PackagePart> context = new HashSet<>();
+ onSave(context);
+ context.clear();
+
+ //save extended and custom properties
+ getProperties().commit();
+
+ p.save(stream);
+ }
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.ooxml;
+
+import java.io.IOException;
+import java.net.URI;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
+import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
+import org.apache.poi.openxml4j.exceptions.PartAlreadyExistsException;
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.openxml4j.opc.PackagePart;
+import org.apache.poi.openxml4j.opc.PackagePartName;
+import org.apache.poi.openxml4j.opc.PackageRelationship;
+import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
+import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
+import org.apache.poi.openxml4j.opc.PackagingURIHelper;
+import org.apache.poi.openxml4j.opc.TargetMode;
+import org.apache.poi.util.Internal;
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
+import org.apache.poi.xddf.usermodel.chart.XDDFChart;
+import org.apache.poi.xssf.usermodel.XSSFRelation;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+
+/**
+ * Represents an entry of a OOXML package.
+ * <p>
+ * Each POIXMLDocumentPart keeps a reference to the underlying a {@link org.apache.poi.openxml4j.opc.PackagePart}.
+ * </p>
+ */
+public class POIXMLDocumentPart {
+ private static final POILogger logger = POILogFactory.getLogger(POIXMLDocumentPart.class);
+
+ private String coreDocumentRel = PackageRelationshipTypes.CORE_DOCUMENT;
+ private PackagePart packagePart;
+ private POIXMLDocumentPart parent;
+ private Map<String, RelationPart> relations = new LinkedHashMap<>();
+ private boolean isCommited = false;
+
+ /**
+ * to check whether embedded part is already committed
+ *
+ * @return return true if embedded part is committed
+ */
+ public boolean isCommited() {
+ return isCommited;
+ }
+
+ /**
+ * setter method to set embedded part is committed
+ *
+ * @param isCommited boolean value
+ */
+ public void setCommited(boolean isCommited) {
+ this.isCommited = isCommited;
+ }
+
+ /**
+ * The RelationPart is a cached relationship between the document, which contains the RelationPart,
+ * and one of its referenced child document parts.
+ * The child document parts may only belong to one parent, but it's often referenced by other
+ * parents too, having varying {@link PackageRelationship#getId() relationship ids} pointing to it.
+ */
+ public static class RelationPart {
+ private final PackageRelationship relationship;
+ private final POIXMLDocumentPart documentPart;
+
+ RelationPart(PackageRelationship relationship, POIXMLDocumentPart documentPart) {
+ this.relationship = relationship;
+ this.documentPart = documentPart;
+ }
+
+ /**
+ * @return the cached relationship, which uniquely identifies this child document part within the parent
+ */
+ public PackageRelationship getRelationship() {
+ return relationship;
+ }
+
+ /**
+ * @param <T> the cast of the caller to a document sub class
+ * @return the child document part
+ */
+ @SuppressWarnings("unchecked")
+ public <T extends POIXMLDocumentPart> T getDocumentPart() {
+ return (T) documentPart;
+ }
+ }
+
+ /**
+ * Counter that provides the amount of incoming relations from other parts
+ * to this part.
+ */
+ private int relationCounter;
+
+ int incrementRelationCounter() {
+ relationCounter++;
+ return relationCounter;
+ }
+
+ int decrementRelationCounter() {
+ relationCounter--;
+ return relationCounter;
+ }
+
+ int getRelationCounter() {
+ return relationCounter;
+ }
+
+ /**
+ * Construct POIXMLDocumentPart representing a "core document" package part.
+ *
+ * @param pkg the OPCPackage containing this document
+ */
+ public POIXMLDocumentPart(OPCPackage pkg) {
+ this(pkg, PackageRelationshipTypes.CORE_DOCUMENT);
+ }
+
+ /**
+ * Construct POIXMLDocumentPart representing a custom "core document" package part.
+ *
+ * @param pkg the OPCPackage containing this document
+ * @param coreDocumentRel the relation type of this document
+ */
+ public POIXMLDocumentPart(OPCPackage pkg, String coreDocumentRel) {
+ this(getPartFromOPCPackage(pkg, coreDocumentRel));
+ this.coreDocumentRel = coreDocumentRel;
+ }
+
+ /**
+ * Creates new POIXMLDocumentPart - called by client code to create new parts from scratch.
+ *
+ * @see #createRelationship(POIXMLRelation, POIXMLFactory, int, boolean)
+ */
+ public POIXMLDocumentPart() {
+ }
+
+ /**
+ * Creates an POIXMLDocumentPart representing the given package part and relationship.
+ * Called by {@link #read(POIXMLFactory, java.util.Map)} when reading in an existing file.
+ *
+ * @param part - The package part that holds xml data representing this sheet.
+ * @see #read(POIXMLFactory, java.util.Map)
+ * @since POI 3.14-Beta1
+ */
+ public POIXMLDocumentPart(PackagePart part) {
+ this(null, part);
+ }
+
+ /**
+ * Creates an POIXMLDocumentPart representing the given package part, relationship and parent
+ * Called by {@link #read(POIXMLFactory, java.util.Map)} when reading in an existing file.
+ *
+ * @param parent - Parent part
+ * @param part - The package part that holds xml data representing this sheet.
+ * @see #read(POIXMLFactory, java.util.Map)
+ * @since POI 3.14-Beta1
+ */
+ public POIXMLDocumentPart(POIXMLDocumentPart parent, PackagePart part) {
+ this.packagePart = part;
+ this.parent = parent;
+ }
+
+ /**
+ * When you open something like a theme, call this to
+ * re-base the XML Document onto the core child of the
+ * current core document
+ *
+ * @param pkg the package to be rebased
+ * @throws InvalidFormatException if there was an error in the core document relation
+ * @throws IllegalStateException if there are more than one core document relations
+ */
+ protected final void rebase(OPCPackage pkg) throws InvalidFormatException {
+ PackageRelationshipCollection cores =
+ packagePart.getRelationshipsByType(coreDocumentRel);
+ if (cores.size() != 1) {
+ throw new IllegalStateException(
+ "Tried to rebase using " + coreDocumentRel +
+ " but found " + cores.size() + " parts of the right type"
+ );
+ }
+ packagePart = packagePart.getRelatedPart(cores.getRelationship(0));
+ }
+
+ /**
+ * Provides access to the underlying PackagePart
+ *
+ * @return the underlying PackagePart
+ */
+ public final PackagePart getPackagePart() {
+ return packagePart;
+ }
+
+ /**
+ * Returns the list of child relations for this POIXMLDocumentPart
+ *
+ * @return child relations
+ */
+ public final List<POIXMLDocumentPart> getRelations() {
+ List<POIXMLDocumentPart> l = new ArrayList<>();
+ for (RelationPart rp : relations.values()) {
+ l.add(rp.getDocumentPart());
+ }
+ return Collections.unmodifiableList(l);
+ }
+
+ /**
+ * Returns the list of child relations for this POIXMLDocumentPart
+ *
+ * @return child relations
+ */
+ public final List<RelationPart> getRelationParts() {
+ List<RelationPart> l = new ArrayList<>(relations.values());
+ return Collections.unmodifiableList(l);
+ }
+
+ /**
+ * Returns the target {@link POIXMLDocumentPart}, where a
+ * {@link PackageRelationship} is set from the {@link PackagePart} of this
+ * {@link POIXMLDocumentPart} to the {@link PackagePart} of the target
+ * {@link POIXMLDocumentPart} with a {@link PackageRelationship#getId()}
+ * matching the given parameter value.
+ *
+ * @param id The relation id to look for
+ * @return the target part of the relation, or null, if none exists
+ */
+ public final POIXMLDocumentPart getRelationById(String id) {
+ RelationPart rp = getRelationPartById(id);
+ return (rp == null) ? null : rp.getDocumentPart();
+ }
+
+ /**
+ * Returns the target {@link RelationPart}, where a
+ * {@link PackageRelationship} is set from the {@link PackagePart} of this
+ * {@link POIXMLDocumentPart} to the {@link PackagePart} of the target
+ * {@link POIXMLDocumentPart} with a {@link PackageRelationship#getId()}
+ * matching the given parameter value.
+ *
+ * @param id The relation id to look for
+ * @return the target relation part, or null, if none exists
+ * @since 4.0.0
+ */
+ public final RelationPart getRelationPartById(String id) {
+ return relations.get(id);
+ }
+
+ /**
+ * Returns the first {@link PackageRelationship#getId()} of the
+ * {@link PackageRelationship}, that sources from the {@link PackagePart} of
+ * this {@link POIXMLDocumentPart} to the {@link PackagePart} of the given
+ * parameter value.<p>
+ * <p>
+ * There can be multiple references to the given {@link POIXMLDocumentPart}
+ * and only the first in the order of creation is returned.
+ *
+ * @param part The {@link POIXMLDocumentPart} for which the according
+ * relation-id shall be found.
+ * @return The value of the {@link PackageRelationship#getId()} or null, if
+ * parts are not related.
+ */
+ public final String getRelationId(POIXMLDocumentPart part) {
+ for (RelationPart rp : relations.values()) {
+ if (rp.getDocumentPart() == part) {
+ return rp.getRelationship().getId();
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Add a new child POIXMLDocumentPart
+ *
+ * @param relId the preferred relation id, when null the next free relation id will be used
+ * @param relationshipType the package relationship type
+ * @param part the child to add
+ * @return the new RelationPart
+ * @since 3.14-Beta1
+ */
+ public final RelationPart addRelation(String relId, POIXMLRelation relationshipType, POIXMLDocumentPart part) {
+ PackageRelationship pr = this.packagePart.findExistingRelation(part.getPackagePart());
+ if (pr == null) {
+ PackagePartName ppn = part.getPackagePart().getPartName();
+ String relType = relationshipType.getRelation();
+ pr = packagePart.addRelationship(ppn, TargetMode.INTERNAL, relType, relId);
+ }
+ addRelation(pr, part);
+ return new RelationPart(pr, part);
+ }
+
+ /**
+ * Add a new child POIXMLDocumentPart
+ *
+ * @param pr the relationship of the child
+ * @param part the child to add
+ */
+ private void addRelation(PackageRelationship pr, POIXMLDocumentPart part) {
+ relations.put(pr.getId(), new RelationPart(pr, part));
+ part.incrementRelationCounter();
+
+ }
+
+ /**
+ * Remove the relation to the specified part in this package and remove the
+ * part, if it is no longer needed.<p>
+ * <p>
+ * If there are multiple relationships to the same part, this will only
+ * remove the first relationship in the order of creation. The removal
+ * via the part id ({@link #removeRelation(String)} is preferred.
+ *
+ * @param part the part which relation is to be removed from this document
+ */
+ protected final void removeRelation(POIXMLDocumentPart part) {
+ removeRelation(part, true);
+ }
+
+ /**
+ * Remove the relation to the specified part in this package and remove the
+ * part, if it is no longer needed and flag is set to true.<p>
+ * <p>
+ * If there are multiple relationships to the same part, this will only
+ * remove the first relationship in the order of creation. The removal
+ * via the part id ({@link #removeRelation(String, boolean)} is preferred.
+ *
+ * @param part The related part, to which the relation shall be removed.
+ * @param removeUnusedParts true, if the part shall be removed from the package if not
+ * needed any longer.
+ * @return true, if the relation was removed
+ */
+ protected final boolean removeRelation(POIXMLDocumentPart part, boolean removeUnusedParts) {
+ String id = getRelationId(part);
+ return removeRelation(id, removeUnusedParts);
+ }
+
+ /**
+ * Remove the relation to the specified part in this package and remove the
+ * part, if it is no longer needed.<p>
+ * <p>
+ * If there are multiple relationships to the same part, this will only
+ * remove the first relationship in the order of creation. The removal
+ * via the part id ({@link #removeRelation(String)} is preferred.
+ *
+ * @param partId the part id which relation is to be removed from this document
+ * @since 4.0.0
+ */
+ protected final void removeRelation(String partId) {
+ removeRelation(partId, true);
+ }
+
+ /**
+ * Remove the relation to the specified part in this package and remove the
+ * part, if it is no longer needed and flag is set to true.<p>
+ *
+ * @param partId The related part id, to which the relation shall be removed.
+ * @param removeUnusedParts true, if the part shall be removed from the package if not
+ * needed any longer.
+ * @return true, if the relation was removed
+ * @since 4.0.0
+ */
+ private final boolean removeRelation(String partId, boolean removeUnusedParts) {
+ RelationPart rp = relations.get(partId);
+ if (rp == null) {
+ // part is not related with this POIXMLDocumentPart
+ return false;
+ }
+ POIXMLDocumentPart part = rp.getDocumentPart();
+ /* decrement usage counter */
+ part.decrementRelationCounter();
+ /* remove packagepart relationship */
+ getPackagePart().removeRelationship(partId);
+ /* remove POIXMLDocument from relations */
+ relations.remove(partId);
+
+ if (removeUnusedParts) {
+ /* if last relation to target part was removed, delete according target part */
+ if (part.getRelationCounter() == 0) {
+ try {
+ part.onDocumentRemove();
+ } catch (IOException e) {
+ throw new POIXMLException(e);
+ }
+ getPackagePart().getPackage().removePart(part.getPackagePart());
+ }
+ }
+ return true;
+ }
+
+
+ /**
+ * Returns the parent POIXMLDocumentPart. All parts except root have not-null parent.
+ *
+ * @return the parent POIXMLDocumentPart or <code>null</code> for the root element.
+ */
+ public final POIXMLDocumentPart getParent() {
+ return parent;
+ }
+
+ @Override
+ public String toString() {
+ return packagePart == null ? "" : packagePart.toString();
+ }
+
+ /**
+ * Save the content in the underlying package part.
+ * Default implementation is empty meaning that the package part is left unmodified.
+ * <p>
+ * Sub-classes should override and add logic to marshal the "model" into Ooxml4J.
+ * <p>
+ * For example, the code saving a generic XML entry may look as follows:
+ * <pre>
+ * protected void commit() throws IOException {
+ * PackagePart part = getPackagePart();
+ * OutputStream out = part.getOutputStream();
+ * XmlObject bean = getXmlBean(); //the "model" which holds changes in memory
+ * bean.save(out, DEFAULT_XML_OPTIONS);
+ * out.close();
+ * }
+ * </pre>
+ *
+ * @throws IOException a subclass may throw an IOException if the changes can't be committed
+ */
+ protected void commit() throws IOException {
+
+ }
+
+ /**
+ * Save changes in the underlying OOXML package.
+ * Recursively fires {@link #commit()} for each package part
+ *
+ * @param alreadySaved context set containing already visited nodes
+ * @throws IOException a related part may throw an IOException if the changes can't be saved
+ */
+ protected final void onSave(Set<PackagePart> alreadySaved) throws IOException {
+ //if part is already committed then return
+ if (this.isCommited) {
+ return;
+ }
+
+ // this usually clears out previous content in the part...
+ prepareForCommit();
+
+ commit();
+ alreadySaved.add(this.getPackagePart());
+ for (RelationPart rp : relations.values()) {
+ POIXMLDocumentPart p = rp.getDocumentPart();
+ if (!alreadySaved.contains(p.getPackagePart())) {
+ p.onSave(alreadySaved);
+ }
+ }
+ }
+
+ /**
+ * Ensure that a memory based package part does not have lingering data from previous
+ * commit() calls.
+ * <p>
+ * Note: This is overwritten for some objects, as *PictureData seem to store the actual content
+ * in the part directly without keeping a copy like all others therefore we need to handle them differently.
+ */
+ protected void prepareForCommit() {
+ PackagePart part = this.getPackagePart();
+ if (part != null) {
+ part.clear();
+ }
+ }
+
+ /**
+ * Create a new child POIXMLDocumentPart
+ *
+ * @param descriptor the part descriptor
+ * @param factory the factory that will create an instance of the requested relation
+ * @return the created child POIXMLDocumentPart
+ * @throws PartAlreadyExistsException If rule M1.12 is not verified : Packages shall not contain
+ * equivalent part names and package implementers shall neither
+ * create nor recognize packages with equivalent part names.
+ */
+ public final POIXMLDocumentPart createRelationship(POIXMLRelation descriptor, POIXMLFactory factory) {
+ return createRelationship(descriptor, factory, -1, false).getDocumentPart();
+ }
+
+ /**
+ * Create a new child POIXMLDocumentPart
+ *
+ * @param descriptor the part descriptor
+ * @param factory the factory that will create an instance of the requested relation
+ * @param idx part number
+ * @return the created child POIXMLDocumentPart
+ * @throws PartAlreadyExistsException If rule M1.12 is not verified : Packages shall not contain
+ * equivalent part names and package implementers shall neither
+ * create nor recognize packages with equivalent part names.
+ */
+ public final POIXMLDocumentPart createRelationship(POIXMLRelation descriptor, POIXMLFactory factory, int idx) {
+ return createRelationship(descriptor, factory, idx, false).getDocumentPart();
+ }
+
+ /**
+ * Identifies the next available part number for a part of the given type,
+ * if possible, otherwise -1 if none are available.
+ * The found (valid) index can then be safely given to
+ * {@link #createRelationship(POIXMLRelation, POIXMLFactory, int)} or
+ * {@link #createRelationship(POIXMLRelation, POIXMLFactory, int, boolean)}
+ * without naming clashes.
+ * If parts with other types are already claiming a name for this relationship
+ * type (eg a {@link XSSFRelation#CHART} using the drawing part namespace
+ * normally used by {@link XSSFRelation#DRAWINGS}), those will be considered
+ * when finding the next spare number.
+ *
+ * @param descriptor The relationship type to find the part number for
+ * @param minIdx The minimum free index to assign, use -1 for any
+ * @return The next free part number, or -1 if none available
+ */
+ protected final int getNextPartNumber(POIXMLRelation descriptor, int minIdx) {
+ OPCPackage pkg = packagePart.getPackage();
+
+ try {
+ String name = descriptor.getDefaultFileName();
+ if (name.equals(descriptor.getFileName(9999))) {
+ // Non-index based, check if default is free
+ PackagePartName ppName = PackagingURIHelper.createPartName(name);
+ if (pkg.containPart(ppName)) {
+ // Default name already taken, not index based, nothing free
+ return -1;
+ } else {
+ // Default name free
+ return 0;
+ }
+ }
+
+ // Default to searching from 1, unless they asked for 0+
+ int idx = (minIdx < 0) ? 1 : minIdx;
+ int maxIdx = minIdx + pkg.getParts().size();
+ while (idx <= maxIdx) {
+ name = descriptor.getFileName(idx);
+ PackagePartName ppName = PackagingURIHelper.createPartName(name);
+ if (!pkg.containPart(ppName)) {
+ return idx;
+ }
+ idx++;
+ }
+ } catch (InvalidFormatException e) {
+ // Give a general wrapped exception for the problem
+ throw new POIXMLException(e);
+ }
+ return -1;
+ }
+
+ /**
+ * Create a new child POIXMLDocumentPart
+ *
+ * @param descriptor the part descriptor
+ * @param factory the factory that will create an instance of the requested relation
+ * @param idx part number
+ * @param noRelation if true, then no relationship is added.
+ * @return the created child POIXMLDocumentPart
+ * @throws PartAlreadyExistsException If rule M1.12 is not verified : Packages shall not contain
+ * equivalent part names and package implementers shall neither
+ * create nor recognize packages with equivalent part names.
+ */
+ public final RelationPart createRelationship(POIXMLRelation descriptor, POIXMLFactory factory, int idx, boolean noRelation) {
+ try {
+ PackagePartName ppName = PackagingURIHelper.createPartName(descriptor.getFileName(idx));
+ PackageRelationship rel = null;
+ PackagePart part = packagePart.getPackage().createPart(ppName, descriptor.getContentType());
+ if (!noRelation) {
+ /* only add to relations, if according relationship is being created. */
+ rel = packagePart.addRelationship(ppName, TargetMode.INTERNAL, descriptor.getRelation());
+ }
+ POIXMLDocumentPart doc = factory.newDocumentPart(descriptor);
+ doc.packagePart = part;
+ doc.parent = this;
+ if (!noRelation) {
+ /* only add to relations, if according relationship is being created. */
+ addRelation(rel, doc);
+ }
+
+ return new RelationPart(rel, doc);
+ } catch (PartAlreadyExistsException pae) {
+ // Return the specific exception so the user knows
+ // that the name is already taken
+ throw pae;
+ } catch (Exception e) {
+ // Give a general wrapped exception for the problem
+ throw new POIXMLException(e);
+ }
+ }
+
+ /**
+ * Iterate through the underlying PackagePart and create child POIXMLFactory instances
+ * using the specified factory
+ *
+ * @param factory the factory object that creates POIXMLFactory instances
+ * @param context context map containing already visited noted keyed by targetURI
+ * @throws OpenXML4JException thrown when a related part can't be read
+ */
+ protected void read(POIXMLFactory factory, Map<PackagePart, POIXMLDocumentPart> context) throws OpenXML4JException {
+ PackagePart pp = getPackagePart();
+ // add mapping a second time, in case of initial caller hasn't done so
+ POIXMLDocumentPart otherChild = context.put(pp, this);
+ if (otherChild != null && otherChild != this) {
+ throw new POIXMLException("Unique PackagePart-POIXMLDocumentPart relation broken!");
+ }
+
+ if (!pp.hasRelationships()) return;
+
+ PackageRelationshipCollection rels = packagePart.getRelationships();
+ List<POIXMLDocumentPart> readLater = new ArrayList<>();
+
+ // scan breadth-first, so parent-relations are hopefully the shallowest element
+ for (PackageRelationship rel : rels) {
+ if (rel.getTargetMode() == TargetMode.INTERNAL) {
+ URI uri = rel.getTargetURI();
+
+ // check for internal references (e.g. '#Sheet1!A1')
+ PackagePartName relName;
+ if (uri.getRawFragment() != null) {
+ relName = PackagingURIHelper.createPartName(uri.getPath());
+ } else {
+ relName = PackagingURIHelper.createPartName(uri);
+ }
+
+ final PackagePart p = packagePart.getPackage().getPart(relName);
+ if (p == null) {
+ logger.log(POILogger.ERROR, "Skipped invalid entry " + rel.getTargetURI());
+ continue;
+ }
+
+ POIXMLDocumentPart childPart = context.get(p);
+ if (childPart == null) {
+ childPart = factory.createDocumentPart(this, p);
+ //here we are checking if part if embedded and excel then set it to chart class
+ //so that at the time to writing we can also write updated embedded part
+ if (this instanceof XDDFChart && childPart instanceof XSSFWorkbook) {
+ ((XDDFChart) this).setWorkbook((XSSFWorkbook) childPart);
+ }
+ childPart.parent = this;
+ // already add child to context, so other children can reference it
+ context.put(p, childPart);
+ readLater.add(childPart);
+ }
+
+ addRelation(rel, childPart);
+ }
+ }
+
+ for (POIXMLDocumentPart childPart : readLater) {
+ childPart.read(factory, context);
+ }
+ }
+
+ /**
+ * Get the PackagePart that is the target of a relationship from this Part.
+ *
+ * @param rel The relationship
+ * @return The target part
+ * @throws InvalidFormatException thrown if the related part has is erroneous
+ */
+ protected PackagePart getTargetPart(PackageRelationship rel) throws InvalidFormatException {
+ return getPackagePart().getRelatedPart(rel);
+ }
+
+
+ /**
+ * Fired when a new package part is created
+ *
+ * @throws IOException a subclass may throw an IOException on document creation
+ */
+ protected void onDocumentCreate() throws IOException {
+
+ }
+
+ /**
+ * Fired when a package part is read
+ *
+ * @throws IOException a subclass may throw an IOException when a document is read
+ */
+ protected void onDocumentRead() throws IOException {
+
+ }
+
+ /**
+ * Fired when a package part is about to be removed from the package
+ *
+ * @throws IOException a subclass may throw an IOException when a document is removed
+ */
+ protected void onDocumentRemove() throws IOException {
+
+ }
+
+ /**
+ * Internal method, do not use!
+ * <p>
+ * This method only exists to allow access to protected {@link POIXMLDocumentPart#onDocumentRead()}
+ * from {@link org.apache.poi.xwpf.usermodel.XWPFDocument} without reflection. It should be removed.
+ *
+ * @param part the part which is to be read
+ * @throws IOException if the part can't be read
+ */
+ @Internal
+ @Deprecated
+ public static void _invokeOnDocumentRead(POIXMLDocumentPart part) throws IOException {
+ part.onDocumentRead();
+ }
+
+ /**
+ * Retrieves the core document part
+ *
+ * @since POI 3.14-Beta1
+ */
+ private static PackagePart getPartFromOPCPackage(OPCPackage pkg, String coreDocumentRel) {
+ PackageRelationship coreRel = pkg.getRelationshipsByType(coreDocumentRel).getRelationship(0);
+
+ if (coreRel != null) {
+ PackagePart pp = pkg.getPart(coreRel);
+ if (pp == null) {
+ throw new POIXMLException("OOXML file structure broken/invalid - core document '" + coreRel.getTargetURI() + "' not found.");
+ }
+ return pp;
+ }
+
+ coreRel = pkg.getRelationshipsByType(PackageRelationshipTypes.STRICT_CORE_DOCUMENT).getRelationship(0);
+ if (coreRel != null) {
+ throw new POIXMLException("Strict OOXML isn't currently supported, please see bug #57699");
+ }
+
+ throw new POIXMLException("OOXML file structure broken/invalid - no core document found!");
+ }
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.ooxml;
+
+/**
+ * Indicates a generic OOXML error.
+ *
+ * @author Yegor Kozlov
+ */
+@SuppressWarnings("serial")
+public final class POIXMLException extends RuntimeException{
+ /**
+ * Create a new <code>POIXMLException</code> with no
+ * detail mesage.
+ */
+ public POIXMLException() {
+ super();
+ }
+
+ /**
+ * Create a new <code>POIXMLException</code> with
+ * the <code>String</code> specified as an error message.
+ *
+ * @param msg The error message for the exception.
+ */
+ public POIXMLException(String msg) {
+ super(msg);
+ }
+
+ /**
+ * Create a new <code>POIXMLException</code> with
+ * the <code>String</code> specified as an error message and the cause.
+ *
+ * @param msg The error message for the exception.
+ * @param cause the cause (which is saved for later retrieval by the
+ * {@link #getCause()} method). (A <tt>null</tt> value is
+ * permitted, and indicates that the cause is nonexistent or
+ * unknown.)
+ */
+ public POIXMLException(String msg, Throwable cause) {
+ super(msg, cause);
+ }
+
+ /**
+ * Create a new <code>POIXMLException</code> with
+ * the specified cause.
+ *
+ * @param cause the cause (which is saved for later retrieval by the
+ * {@link #getCause()} method). (A <tt>null</tt> value is
+ * permitted, and indicates that the cause is nonexistent or
+ * unknown.)
+ */
+ public POIXMLException(Throwable cause) {
+ super(cause);
+ }
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.ooxml;
+
+import java.lang.reflect.InvocationTargetException;
+
+import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
+import org.apache.poi.openxml4j.opc.PackagePart;
+import org.apache.poi.openxml4j.opc.PackageRelationship;
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
+
+/**
+ * Defines a factory API that enables sub-classes to create instances of <code>POIXMLDocumentPart</code>
+ */
+public abstract class POIXMLFactory {
+ private static final POILogger LOGGER = POILogFactory.getLogger(POIXMLFactory.class);
+
+ private static final Class<?>[] PARENT_PART = {POIXMLDocumentPart.class, PackagePart.class};
+ private static final Class<?>[] ORPHAN_PART = {PackagePart.class};
+
+ /**
+ * Create a POIXMLDocumentPart from existing package part and relation. This method is called
+ * from {@link POIXMLDocument#load(POIXMLFactory)} when parsing a document
+ *
+ * @param parent parent part
+ * @param part the PackagePart representing the created instance
+ * @return A new instance of a POIXMLDocumentPart.
+ *
+ * @since by POI 3.14-Beta1
+ */
+ public POIXMLDocumentPart createDocumentPart(POIXMLDocumentPart parent, PackagePart part) {
+ PackageRelationship rel = getPackageRelationship(parent, part);
+ POIXMLRelation descriptor = getDescriptor(rel.getRelationshipType());
+
+ if (descriptor == null || descriptor.getRelationClass() == null) {
+ LOGGER.log(POILogger.DEBUG, "using default POIXMLDocumentPart for " + rel.getRelationshipType());
+ return new POIXMLDocumentPart(parent, part);
+ }
+
+ Class<? extends POIXMLDocumentPart> cls = descriptor.getRelationClass();
+ try {
+ try {
+ return createDocumentPart(cls, PARENT_PART, new Object[]{parent, part});
+ } catch (NoSuchMethodException e) {
+ return createDocumentPart(cls, ORPHAN_PART, new Object[]{part});
+ }
+ } catch (Exception e) {
+ throw new POIXMLException((e.getCause() != null ? e.getCause() : e).getMessage(), e);
+ }
+ }
+
+ /**
+ * Need to delegate instantiation to sub class because of constructor visibility
+ *
+ * @param cls the document class to be instantiated
+ * @param classes the classes of the constructor arguments
+ * @param values the values of the constructor arguments
+ * @return the new document / part
+ * @throws SecurityException thrown if the object can't be instantiated
+ * @throws NoSuchMethodException thrown if there is no constructor found for the given arguments
+ * @throws InstantiationException thrown if the object can't be instantiated
+ * @throws IllegalAccessException thrown if the object can't be instantiated
+ * @throws InvocationTargetException thrown if the object can't be instantiated
+ *
+ * @since POI 3.14-Beta1
+ */
+ protected abstract POIXMLDocumentPart createDocumentPart
+ (Class<? extends POIXMLDocumentPart> cls, Class<?>[] classes, Object[] values)
+ throws SecurityException, NoSuchMethodException, InstantiationException, IllegalAccessException, InvocationTargetException;
+
+ /**
+ * returns the descriptor for the given relationship type
+ *
+ * @param relationshipType the relationship type of the descriptor
+ * @return the descriptor or null if type is unknown
+ *
+ * @since POI 3.14-Beta1
+ */
+ protected abstract POIXMLRelation getDescriptor(String relationshipType);
+
+ /**
+ * Create a new POIXMLDocumentPart using the supplied descriptor. This method is used when adding new parts
+ * to a document, for example, when adding a sheet to a workbook, slide to a presentation, etc.
+ *
+ * @param descriptor describes the object to create
+ * @return A new instance of a POIXMLDocumentPart.
+ */
+ public POIXMLDocumentPart newDocumentPart(POIXMLRelation descriptor) {
+ Class<? extends POIXMLDocumentPart> cls = descriptor.getRelationClass();
+ try {
+ return createDocumentPart(cls, null, null);
+ } catch (Exception e) {
+ throw new POIXMLException(e);
+ }
+ }
+
+ /**
+ * Retrieves the package relationship of the child part within the parent
+ *
+ * @param parent the parent to search for the part
+ * @param part the part to look for
+ *
+ * @return the relationship
+ *
+ * @throws POIXMLException if the relations are erroneous or the part is not related
+ *
+ * @since POI 3.14-Beta1
+ */
+ protected PackageRelationship getPackageRelationship(POIXMLDocumentPart parent, PackagePart part) {
+ try {
+ String partName = part.getPartName().getName();
+ for (PackageRelationship pr : parent.getPackagePart().getRelationships()) {
+ String packName = pr.getTargetURI().toASCIIString();
+ if (packName.equalsIgnoreCase(partName)) {
+ return pr;
+ }
+ }
+ } catch (InvalidFormatException e) {
+ throw new POIXMLException("error while determining package relations", e);
+ }
+
+ throw new POIXMLException("package part isn't a child of the parent document.");
+ }
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.ooxml;
+
+import static org.apache.poi.ooxml.POIXMLTypeLoader.DEFAULT_XML_OPTIONS;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.Date;
+
+import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
+import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
+import org.apache.poi.openxml4j.opc.ContentTypes;
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.openxml4j.opc.PackagePart;
+import org.apache.poi.openxml4j.opc.PackagePartName;
+import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
+import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
+import org.apache.poi.openxml4j.opc.PackagingURIHelper;
+import org.apache.poi.openxml4j.opc.StreamHelper;
+import org.apache.poi.openxml4j.opc.TargetMode;
+import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart;
+import org.apache.poi.openxml4j.util.Nullable;
+import org.apache.xmlbeans.XmlException;
+import org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperty;
+
+/**
+ * Wrapper around the three different kinds of OOXML properties
+ * and metadata a document can have (Core, Extended and Custom),
+ * as well Thumbnails.
+ */
+public class POIXMLProperties {
+ private OPCPackage pkg;
+ private CoreProperties core;
+ private ExtendedProperties ext;
+ private CustomProperties cust;
+
+ private PackagePart extPart;
+ private PackagePart custPart;
+
+
+ private static final org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument NEW_EXT_INSTANCE;
+ private static final org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument NEW_CUST_INSTANCE;
+ static {
+ NEW_EXT_INSTANCE = org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument.Factory.newInstance();
+ NEW_EXT_INSTANCE.addNewProperties();
+
+ NEW_CUST_INSTANCE = org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument.Factory.newInstance();
+ NEW_CUST_INSTANCE.addNewProperties();
+ }
+
+ public POIXMLProperties(OPCPackage docPackage) throws IOException, OpenXML4JException, XmlException {
+ this.pkg = docPackage;
+
+ // Core properties
+ core = new CoreProperties((PackagePropertiesPart)pkg.getPackageProperties() );
+
+ // Extended properties
+ PackageRelationshipCollection extRel =
+ pkg.getRelationshipsByType(PackageRelationshipTypes.EXTENDED_PROPERTIES);
+ if(extRel.size() == 1) {
+ extPart = pkg.getPart( extRel.getRelationship(0));
+ org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument props = org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument.Factory.parse(
+ extPart.getInputStream(), DEFAULT_XML_OPTIONS
+ );
+ ext = new ExtendedProperties(props);
+ } else {
+ extPart = null;
+ ext = new ExtendedProperties((org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument)NEW_EXT_INSTANCE.copy());
+ }
+
+ // Custom properties
+ PackageRelationshipCollection custRel =
+ pkg.getRelationshipsByType(PackageRelationshipTypes.CUSTOM_PROPERTIES);
+ if(custRel.size() == 1) {
+ custPart = pkg.getPart( custRel.getRelationship(0));
+ org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument props = org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument.Factory.parse(
+ custPart.getInputStream(), DEFAULT_XML_OPTIONS
+ );
+ cust = new CustomProperties(props);
+ } else {
+ custPart = null;
+ cust = new CustomProperties((org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument)NEW_CUST_INSTANCE.copy());
+ }
+ }
+
+ /**
+ * Returns the core document properties
+ *
+ * @return the core document properties
+ */
+ public CoreProperties getCoreProperties() {
+ return core;
+ }
+
+ /**
+ * Returns the extended document properties
+ *
+ * @return the extended document properties
+ */
+ public ExtendedProperties getExtendedProperties() {
+ return ext;
+ }
+
+ /**
+ * Returns the custom document properties
+ *
+ * @return the custom document properties
+ */
+ public CustomProperties getCustomProperties() {
+ return cust;
+ }
+
+ /**
+ * Returns the {@link PackagePart} for the Document
+ * Thumbnail, or <code>null</code> if there isn't one
+ *
+ * @return The Document Thumbnail part or null
+ */
+ protected PackagePart getThumbnailPart() {
+ PackageRelationshipCollection rels =
+ pkg.getRelationshipsByType(PackageRelationshipTypes.THUMBNAIL);
+ if(rels.size() == 1) {
+ return pkg.getPart(rels.getRelationship(0));
+ }
+ return null;
+ }
+ /**
+ * Returns the name of the Document thumbnail, eg
+ * <code>thumbnail.jpeg</code>, or <code>null</code> if there
+ * isn't one.
+ *
+ * @return The thumbnail filename, or null
+ */
+ public String getThumbnailFilename() {
+ PackagePart tPart = getThumbnailPart();
+ if (tPart == null) return null;
+ String name = tPart.getPartName().getName();
+ return name.substring(name.lastIndexOf('/'));
+ }
+ /**
+ * Returns the Document thumbnail image data, or {@code null} if there isn't one.
+ *
+ * @return The thumbnail data, or null
+ *
+ * @throws IOException if the thumbnail can't be read
+ */
+ public InputStream getThumbnailImage() throws IOException {
+ PackagePart tPart = getThumbnailPart();
+ if (tPart == null) return null;
+ return tPart.getInputStream();
+ }
+
+ /**
+ * Sets the Thumbnail for the document, replacing any existing one.
+ *
+ * @param filename The filename for the thumbnail image, eg {@code thumbnail.jpg}
+ * @param imageData The inputstream to read the thumbnail image from
+ *
+ * @throws IOException if the thumbnail can't be written
+ */
+ public void setThumbnail(String filename, InputStream imageData) throws IOException {
+ PackagePart tPart = getThumbnailPart();
+ if (tPart == null) {
+ // New thumbnail
+ pkg.addThumbnail(filename, imageData);
+ } else {
+ // Change existing
+ String newType = ContentTypes.getContentTypeFromFileExtension(filename);
+ if (! newType.equals(tPart.getContentType())) {
+ throw new IllegalArgumentException("Can't set a Thumbnail of type " +
+ newType + " when existing one is of a different type " +
+ tPart.getContentType());
+ }
+ StreamHelper.copyStream(imageData, tPart.getOutputStream());
+ }
+ }
+
+ /**
+ * Commit changes to the underlying OPC package
+ *
+ * @throws IOException if the properties can't be saved
+ * @throws POIXMLException if the properties are erroneous
+ */
+ public void commit() throws IOException{
+
+ if(extPart == null && !NEW_EXT_INSTANCE.toString().equals(ext.props.toString())){
+ try {
+ PackagePartName prtname = PackagingURIHelper.createPartName("/docProps/app.xml");
+ pkg.addRelationship(prtname, TargetMode.INTERNAL, "http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties");
+ extPart = pkg.createPart(prtname, "application/vnd.openxmlformats-officedocument.extended-properties+xml");
+ } catch (InvalidFormatException e){
+ throw new POIXMLException(e);
+ }
+ }
+ if(custPart == null && !NEW_CUST_INSTANCE.toString().equals(cust.props.toString())){
+ try {
+ PackagePartName prtname = PackagingURIHelper.createPartName("/docProps/custom.xml");
+ pkg.addRelationship(prtname, TargetMode.INTERNAL, "http://schemas.openxmlformats.org/officeDocument/2006/relationships/custom-properties");
+ custPart = pkg.createPart(prtname, "application/vnd.openxmlformats-officedocument.custom-properties+xml");
+ } catch (InvalidFormatException e){
+ throw new POIXMLException(e);
+ }
+ }
+ if(extPart != null){
+ OutputStream out = extPart.getOutputStream();
+ if (extPart.getSize() > 0) {
+ extPart.clear();
+ }
+ ext.props.save(out, DEFAULT_XML_OPTIONS);
+ out.close();
+ }
+ if(custPart != null){
+ OutputStream out = custPart.getOutputStream();
+ cust.props.save(out, DEFAULT_XML_OPTIONS);
+ out.close();
+ }
+ }
+
+ /**
+ * The core document properties
+ */
+ public static class CoreProperties {
+ private PackagePropertiesPart part;
+ private CoreProperties(PackagePropertiesPart part) {
+ this.part = part;
+ }
+
+ public String getCategory() {
+ return part.getCategoryProperty().getValue();
+ }
+ public void setCategory(String category) {
+ part.setCategoryProperty(category);
+ }
+ public String getContentStatus() {
+ return part.getContentStatusProperty().getValue();
+ }
+ public void setContentStatus(String contentStatus) {
+ part.setContentStatusProperty(contentStatus);
+ }
+ public String getContentType() {
+ return part.getContentTypeProperty().getValue();
+ }
+ public void setContentType(String contentType) {
+ part.setContentTypeProperty(contentType);
+ }
+ public Date getCreated() {
+ return part.getCreatedProperty().getValue();
+ }
+ public void setCreated(Nullable<Date> date) {
+ part.setCreatedProperty(date);
+ }
+ public void setCreated(String date) {
+ part.setCreatedProperty(date);
+ }
+ public String getCreator() {
+ return part.getCreatorProperty().getValue();
+ }
+ public void setCreator(String creator) {
+ part.setCreatorProperty(creator);
+ }
+ public String getDescription() {
+ return part.getDescriptionProperty().getValue();
+ }
+ public void setDescription(String description) {
+ part.setDescriptionProperty(description);
+ }
+ public String getIdentifier() {
+ return part.getIdentifierProperty().getValue();
+ }
+ public void setIdentifier(String identifier) {
+ part.setIdentifierProperty(identifier);
+ }
+ public String getKeywords() {
+ return part.getKeywordsProperty().getValue();
+ }
+ public void setKeywords(String keywords) {
+ part.setKeywordsProperty(keywords);
+ }
+ public Date getLastPrinted() {
+ return part.getLastPrintedProperty().getValue();
+ }
+ public void setLastPrinted(Nullable<Date> date) {
+ part.setLastPrintedProperty(date);
+ }
+ public void setLastPrinted(String date) {
+ part.setLastPrintedProperty(date);
+ }
+ /** @since POI 3.15 beta 3 */
+ public String getLastModifiedByUser() {
+ return part.getLastModifiedByProperty().getValue();
+ }
+ /** @since POI 3.15 beta 3 */
+ public void setLastModifiedByUser(String user) {
+ part.setLastModifiedByProperty(user);
+ }
+ public Date getModified() {
+ return part.getModifiedProperty().getValue();
+ }
+ public void setModified(Nullable<Date> date) {
+ part.setModifiedProperty(date);
+ }
+ public void setModified(String date) {
+ part.setModifiedProperty(date);
+ }
+ public String getSubject() {
+ return part.getSubjectProperty().getValue();
+ }
+ public void setSubjectProperty(String subject) {
+ part.setSubjectProperty(subject);
+ }
+ public void setTitle(String title) {
+ part.setTitleProperty(title);
+ }
+ public String getTitle() {
+ return part.getTitleProperty().getValue();
+ }
+ public String getRevision() {
+ return part.getRevisionProperty().getValue();
+ }
+ public void setRevision(String revision) {
+ try {
+ Long.valueOf(revision);
+ part.setRevisionProperty(revision);
+ }
+ catch (NumberFormatException e) {}
+ }
+
+ public PackagePropertiesPart getUnderlyingProperties() {
+ return part;
+ }
+ }
+
+ /**
+ * Extended document properties
+ */
+ public static class ExtendedProperties {
+ private org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument props;
+ private ExtendedProperties(org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument props) {
+ this.props = props;
+ }
+
+ public org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties getUnderlyingProperties() {
+ return props.getProperties();
+ }
+
+ public String getTemplate() {
+ if (props.getProperties().isSetTemplate()) {
+ return props.getProperties().getTemplate();
+ }
+ return null;
+ }
+ public String getManager() {
+ if (props.getProperties().isSetManager()) {
+ return props.getProperties().getManager();
+ }
+ return null;
+ }
+ public String getCompany() {
+ if (props.getProperties().isSetCompany()) {
+ return props.getProperties().getCompany();
+ }
+ return null;
+ }
+ public String getPresentationFormat() {
+ if (props.getProperties().isSetPresentationFormat()) {
+ return props.getProperties().getPresentationFormat();
+ }
+ return null;
+ }
+ public String getApplication() {
+ if (props.getProperties().isSetApplication()) {
+ return props.getProperties().getApplication();
+ }
+ return null;
+ }
+ public String getAppVersion() {
+ if (props.getProperties().isSetAppVersion()) {
+ return props.getProperties().getAppVersion();
+ }
+ return null;
+ }
+
+ public int getPages() {
+ if (props.getProperties().isSetPages()) {
+ return props.getProperties().getPages();
+ }
+ return -1;
+ }
+ public int getWords() {
+ if (props.getProperties().isSetWords()) {
+ return props.getProperties().getWords();
+ }
+ return -1;
+ }
+ public int getCharacters() {
+ if (props.getProperties().isSetCharacters()) {
+ return props.getProperties().getCharacters();
+ }
+ return -1;
+ }
+ public int getCharactersWithSpaces() {
+ if (props.getProperties().isSetCharactersWithSpaces()) {
+ return props.getProperties().getCharactersWithSpaces();
+ }
+ return -1;
+ }
+ public int getLines() {
+ if (props.getProperties().isSetLines()) {
+ return props.getProperties().getLines();
+ }
+ return -1;
+ }
+ public int getParagraphs() {
+ if (props.getProperties().isSetParagraphs()) {
+ return props.getProperties().getParagraphs();
+ }
+ return -1;
+ }
+ public int getSlides() {
+ if (props.getProperties().isSetSlides()) {
+ return props.getProperties().getSlides();
+ }
+ return -1;
+ }
+ public int getNotes() {
+ if (props.getProperties().isSetNotes()) {
+ return props.getProperties().getNotes();
+ }
+ return -1;
+ }
+ public int getTotalTime() {
+ if (props.getProperties().isSetTotalTime()) {
+ return props.getProperties().getTotalTime();
+ }
+ return -1;
+ }
+ public int getHiddenSlides() {
+ if (props.getProperties().isSetHiddenSlides()) {
+ return props.getProperties().getHiddenSlides();
+ }
+ return -1;
+ }
+ public int getMMClips() {
+ if (props.getProperties().isSetMMClips()) {
+ return props.getProperties().getMMClips();
+ }
+ return -1;
+ }
+
+ public String getHyperlinkBase() {
+ if (props.getProperties().isSetHyperlinkBase()) {
+ return props.getProperties().getHyperlinkBase();
+ }
+ return null;
+ }
+ }
+
+ /**
+ * Custom document properties
+ */
+ public static class CustomProperties {
+ /**
+ * Each custom property element contains an fmtid attribute
+ * with the same GUID value ({D5CDD505-2E9C-101B-9397-08002B2CF9AE}).
+ */
+ public static final String FORMAT_ID = "{D5CDD505-2E9C-101B-9397-08002B2CF9AE}";
+
+ private org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument props;
+ private CustomProperties(org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument props) {
+ this.props = props;
+ }
+
+ public org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperties getUnderlyingProperties() {
+ return props.getProperties();
+ }
+
+ /**
+ * Add a new property
+ *
+ * @param name the property name
+ * @throws IllegalArgumentException if a property with this name already exists
+ */
+ private CTProperty add(String name) {
+ if(contains(name)) {
+ throw new IllegalArgumentException("A property with this name " +
+ "already exists in the custom properties");
+ }
+
+ CTProperty p = props.getProperties().addNewProperty();
+ int pid = nextPid();
+ p.setPid(pid);
+ p.setFmtid(FORMAT_ID);
+ p.setName(name);
+ return p;
+ }
+
+ /**
+ * Add a new string property
+ *
+ * @param name the property name
+ * @param value the property value
+ *
+ * @throws IllegalArgumentException if a property with this name already exists
+ */
+ public void addProperty(String name, String value){
+ CTProperty p = add(name);
+ p.setLpwstr(value);
+ }
+
+ /**
+ * Add a new double property
+ *
+ * @param name the property name
+ * @param value the property value
+ *
+ * @throws IllegalArgumentException if a property with this name already exists
+ */
+ public void addProperty(String name, double value){
+ CTProperty p = add(name);
+ p.setR8(value);
+ }
+
+ /**
+ * Add a new integer property
+ *
+ * @param name the property name
+ * @param value the property value
+ *
+ * @throws IllegalArgumentException if a property with this name already exists
+ */
+ public void addProperty(String name, int value){
+ CTProperty p = add(name);
+ p.setI4(value);
+ }
+
+ /**
+ * Add a new boolean property
+ *
+ * @param name the property name
+ * @param value the property value
+ *
+ * @throws IllegalArgumentException if a property with this name already exists
+ */
+ public void addProperty(String name, boolean value){
+ CTProperty p = add(name);
+ p.setBool(value);
+ }
+
+ /**
+ * Generate next id that uniquely relates a custom property
+ *
+ * @return next property id starting with 2
+ */
+ protected int nextPid() {
+ int propid = 1;
+ for(CTProperty p : props.getProperties().getPropertyArray()){
+ if(p.getPid() > propid) propid = p.getPid();
+ }
+ return propid + 1;
+ }
+
+ /**
+ * Check if a property with this name already exists in the collection of custom properties
+ *
+ * @param name the name to check
+ * @return whether a property with the given name exists in the custom properties
+ */
+ public boolean contains(String name) {
+ for(CTProperty p : props.getProperties().getPropertyArray()){
+ if(p.getName().equals(name)) return true;
+ }
+ return false;
+ }
+
+ /**
+ * Retrieve the custom property with this name, or null if none exists.
+ *
+ * You will need to test the various isSetX methods to work out
+ * what the type of the property is, before fetching the
+ * appropriate value for it.
+ *
+ * @param name the name of the property to fetch
+ *
+ * @return the custom property with this name, or null if none exists
+ */
+ public CTProperty getProperty(String name) {
+ for(CTProperty p : props.getProperties().getPropertyArray()){
+ if(p.getName().equals(name)) {
+ return p;
+ }
+ }
+ return null;
+ }
+ }
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.ooxml;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Iterator;
+
+import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
+import org.apache.poi.openxml4j.opc.PackagePart;
+import org.apache.poi.openxml4j.opc.PackagePartName;
+import org.apache.poi.openxml4j.opc.PackageRelationship;
+import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
+import org.apache.poi.openxml4j.opc.PackagingURIHelper;
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
+
+/**
+ * Represents a descriptor of a OOXML relation.
+ */
+public abstract class POIXMLRelation {
+
+ private static final POILogger log = POILogFactory.getLogger(POIXMLRelation.class);
+
+ /**
+ * Describes the content stored in a part.
+ */
+ private String _type;
+
+ /**
+ * The kind of connection between a source part and a target part in a package.
+ */
+ private String _relation;
+
+ /**
+ * The path component of a pack URI.
+ */
+ private String _defaultName;
+
+ /**
+ * Defines what object is used to construct instances of this relationship
+ */
+ private Class<? extends POIXMLDocumentPart> _cls;
+
+ /**
+ * Instantiates a POIXMLRelation.
+ *
+ * @param type content type
+ * @param rel relationship
+ * @param defaultName default item name
+ * @param cls defines what object is used to construct instances of this relationship
+ */
+ public POIXMLRelation(String type, String rel, String defaultName, Class<? extends POIXMLDocumentPart> cls) {
+ _type = type;
+ _relation = rel;
+ _defaultName = defaultName;
+ _cls = cls;
+ }
+
+ /**
+ * Instantiates a POIXMLRelation.
+ *
+ * @param type content type
+ * @param rel relationship
+ * @param defaultName default item name
+ */
+ public POIXMLRelation(String type, String rel, String defaultName) {
+ this(type, rel, defaultName, null);
+ }
+ /**
+ * Return the content type. Content types define a media type, a subtype, and an
+ * optional set of parameters, as defined in RFC 2616.
+ *
+ * @return the content type
+ */
+ public String getContentType() {
+ return _type;
+ }
+
+ /**
+ * Return the relationship, the kind of connection between a source part and a target part in a package.
+ * Relationships make the connections between parts directly discoverable without looking at the content
+ * in the parts, and without altering the parts themselves.
+ *
+ * @return the relationship
+ */
+ public String getRelation() {
+ return _relation;
+ }
+
+ /**
+ * Return the default part name. Part names are used to refer to a part in the context of a
+ * package, typically as part of a URI.
+ *
+ * @return the default part name
+ */
+ public String getDefaultFileName() {
+ return _defaultName;
+ }
+
+ /**
+ * Returns the filename for the nth one of these, e.g. /xl/comments4.xml
+ *
+ * @param index the suffix for the document type
+ * @return the filename including the suffix
+ */
+ public String getFileName(int index) {
+ if(! _defaultName.contains("#")) {
+ // Generic filename in all cases
+ return getDefaultFileName();
+ }
+ return _defaultName.replace("#", Integer.toString(index));
+ }
+
+ /**
+ * Returns the index of the filename within the package for the given part.
+ * e.g. 4 for /xl/comments4.xml
+ *
+ * @param part the part to read the suffix from
+ * @return the suffix
+ */
+ public Integer getFileNameIndex(POIXMLDocumentPart part) {
+ String regex = _defaultName.replace("#", "(\\d+)");
+ return Integer.valueOf(part.getPackagePart().getPartName().getName().replaceAll(regex, "$1"));
+ }
+
+ /**
+ * Return type of the object used to construct instances of this relationship
+ *
+ * @return the class of the object used to construct instances of this relation
+ */
+ public Class<? extends POIXMLDocumentPart> getRelationClass(){
+ return _cls;
+ }
+
+ /**
+ * Fetches the InputStream to read the contents, based
+ * of the specified core part, for which we are defined
+ * as a suitable relationship
+ *
+ * @since 3.16-beta3
+ */
+ public InputStream getContents(PackagePart corePart) throws IOException, InvalidFormatException {
+ PackageRelationshipCollection prc =
+ corePart.getRelationshipsByType(getRelation());
+ Iterator<PackageRelationship> it = prc.iterator();
+ if(it.hasNext()) {
+ PackageRelationship rel = it.next();
+ PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI());
+ PackagePart part = corePart.getPackage().getPart(relName);
+ return part.getInputStream();
+ }
+ log.log(POILogger.WARN, "No part " + getDefaultFileName() + " found");
+ return null;
+ }
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.ooxml;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Reader;
+import java.io.StringReader;
+import java.net.URL;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+import javax.xml.stream.XMLStreamReader;
+
+import org.apache.poi.openxml4j.opc.PackageNamespaces;
+import org.apache.poi.ooxml.util.DocumentHelper;
+import org.apache.xmlbeans.SchemaType;
+import org.apache.xmlbeans.SchemaTypeLoader;
+import org.apache.xmlbeans.XmlBeans;
+import org.apache.xmlbeans.XmlException;
+import org.apache.xmlbeans.XmlObject;
+import org.apache.xmlbeans.XmlOptions;
+import org.apache.xmlbeans.xml.stream.XMLInputStream;
+import org.apache.xmlbeans.xml.stream.XMLStreamException;
+import org.w3c.dom.Document;
+import org.w3c.dom.Node;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+
+@SuppressWarnings("deprecation")
+public class POIXMLTypeLoader {
+
+ private static ThreadLocal<SchemaTypeLoader> typeLoader = new ThreadLocal<>();
+
+ // TODO: Do these have a good home like o.a.p.openxml4j.opc.PackageNamespaces and PackageRelationshipTypes?
+ // These constants should be common to all of POI and easy to use by other applications such as Tika
+ private static final String MS_OFFICE_URN = "urn:schemas-microsoft-com:office:office";
+ private static final String MS_EXCEL_URN = "urn:schemas-microsoft-com:office:excel";
+ private static final String MS_WORD_URN = "urn:schemas-microsoft-com:office:word";
+ private static final String MS_VML_URN = "urn:schemas-microsoft-com:vml";
+
+ public static final XmlOptions DEFAULT_XML_OPTIONS;
+ static {
+ DEFAULT_XML_OPTIONS = new XmlOptions();
+ DEFAULT_XML_OPTIONS.setSaveOuter();
+ DEFAULT_XML_OPTIONS.setUseDefaultNamespace();
+ DEFAULT_XML_OPTIONS.setSaveAggressiveNamespaces();
+ DEFAULT_XML_OPTIONS.setCharacterEncoding("UTF-8");
+ // Piccolo is disabled for POI builts, i.e. JAXP is used for parsing
+ // so only user code using XmlObject/XmlToken.Factory.parse
+ // directly can bypass the entity check, which is probably unlikely (... and not within our responsibility :))
+ // DEFAULT_XML_OPTIONS.setLoadEntityBytesLimit(4096);
+
+ // POI is not thread-safe - so we can switch to unsynchronized xmlbeans mode - see #61350
+ // Update: disabled again for now as it caused strange NPEs and other problems
+ // when reading properties in separate workbooks in multiple threads
+ // DEFAULT_XML_OPTIONS.setUnsynchronized();
+
+ Map<String, String> map = new HashMap<>();
+ map.put("http://schemas.openxmlformats.org/drawingml/2006/main", "a");
+ map.put("http://schemas.openxmlformats.org/drawingml/2006/chart", "c");
+ map.put("http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing", "wp");
+ map.put(PackageNamespaces.MARKUP_COMPATIBILITY, "ve");
+ map.put("http://schemas.openxmlformats.org/officeDocument/2006/math", "m");
+ map.put("http://schemas.openxmlformats.org/officeDocument/2006/relationships", "r");
+ map.put("http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes", "vt");
+ map.put("http://schemas.openxmlformats.org/presentationml/2006/main", "p");
+ map.put("http://schemas.openxmlformats.org/wordprocessingml/2006/main", "w");
+ map.put("http://schemas.microsoft.com/office/word/2006/wordml", "wne");
+ map.put(MS_OFFICE_URN, "o");
+ map.put(MS_EXCEL_URN, "x");
+ map.put(MS_WORD_URN, "w10");
+ map.put(MS_VML_URN, "v");
+ DEFAULT_XML_OPTIONS.setSaveSuggestedPrefixes(Collections.unmodifiableMap(map));
+ }
+
+ private static XmlOptions getXmlOptions(XmlOptions options) {
+ return options == null ? DEFAULT_XML_OPTIONS : options;
+ }
+
+ private static SchemaTypeLoader getTypeLoader(SchemaType type) {
+ SchemaTypeLoader tl = typeLoader.get();
+ if (tl == null) {
+ ClassLoader cl = type.getClass().getClassLoader();
+ tl = XmlBeans.typeLoaderForClassLoader(cl);
+ typeLoader.set(tl);
+ }
+ return tl;
+ }
+
+ public static XmlObject newInstance(SchemaType type, XmlOptions options) {
+ return getTypeLoader(type).newInstance(type, getXmlOptions(options));
+ }
+
+ public static XmlObject parse(String xmlText, SchemaType type, XmlOptions options) throws XmlException {
+ try {
+ return parse(new StringReader(xmlText), type, options);
+ } catch (IOException e) {
+ throw new XmlException("Unable to parse xml bean", e);
+ }
+ }
+
+ public static XmlObject parse(File file, SchemaType type, XmlOptions options) throws XmlException, IOException {
+ try (InputStream is = new FileInputStream(file)) {
+ return parse(is, type, options);
+ }
+ }
+
+ public static XmlObject parse(URL file, SchemaType type, XmlOptions options) throws XmlException, IOException {
+ try (InputStream is = file.openStream()) {
+ return parse(is, type, options);
+ }
+ }
+
+ public static XmlObject parse(InputStream jiois, SchemaType type, XmlOptions options) throws XmlException, IOException {
+ try {
+ Document doc = DocumentHelper.readDocument(jiois);
+ return getTypeLoader(type).parse(doc.getDocumentElement(), type, getXmlOptions(options));
+ } catch (SAXException e) {
+ throw new IOException("Unable to parse xml bean", e);
+ }
+ }
+
+ public static XmlObject parse(XMLStreamReader xsr, SchemaType type, XmlOptions options) throws XmlException {
+ return getTypeLoader(type).parse(xsr, type, getXmlOptions(options));
+ }
+
+ public static XmlObject parse(Reader jior, SchemaType type, XmlOptions options) throws XmlException, IOException {
+ try {
+ Document doc = DocumentHelper.readDocument(new InputSource(jior));
+ return getTypeLoader(type).parse(doc.getDocumentElement(), type, getXmlOptions(options));
+ } catch (SAXException e) {
+ throw new XmlException("Unable to parse xml bean", e);
+ }
+ }
+
+ public static XmlObject parse(Node node, SchemaType type, XmlOptions options) throws XmlException {
+ return getTypeLoader(type).parse(node, type, getXmlOptions(options));
+ }
+
+ public static XmlObject parse(XMLInputStream xis, SchemaType type, XmlOptions options) throws XmlException, XMLStreamException {
+ return getTypeLoader(type).parse(xis, type, getXmlOptions(options));
+ }
+
+ public static XMLInputStream newValidatingXMLInputStream ( XMLInputStream xis, SchemaType type, XmlOptions options ) throws XmlException, XMLStreamException {
+ return getTypeLoader(type).newValidatingXMLInputStream(xis, type, getXmlOptions(options));
+ }
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.ooxml.dev;
+
+import java.io.*;
+import java.util.ArrayList;
+
+import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.openxml4j.opc.PackageAccess;
+import org.apache.poi.openxml4j.opc.PackagePart;
+import org.apache.poi.openxml4j.opc.PackageRelationship;
+import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
+
+/**
+ * Prints out the contents of a OOXML container.
+ * Useful for seeing what parts are defined, and how
+ * they're all related to each other.
+ */
+public class OOXMLLister implements Closeable {
+ private final OPCPackage container;
+ private final PrintStream disp;
+
+ public OOXMLLister(OPCPackage container) {
+ this(container, System.out);
+ }
+ public OOXMLLister(OPCPackage container, PrintStream disp) {
+ this.container = container;
+ this.disp = disp;
+ }
+
+ /**
+ * Figures out how big a given PackagePart is.
+ *
+ * @param part the PackagePart
+ * @return the size of the PackagePart
+ *
+ * @throws IOException if the part can't be read
+ */
+ public static long getSize(PackagePart part) throws IOException {
+ InputStream in = part.getInputStream();
+ try {
+ byte[] b = new byte[8192];
+ long size = 0;
+ int read = 0;
+
+ while(read > -1) {
+ read = in.read(b);
+ if(read > 0) {
+ size += read;
+ }
+ }
+
+ return size;
+ } finally {
+ in.close();
+ }
+ }
+
+ /**
+ * Displays information on all the different
+ * parts of the OOXML file container.
+ * @throws InvalidFormatException if the package relations are invalid
+ * @throws IOException if the package can't be read
+ */
+ public void displayParts() throws InvalidFormatException, IOException {
+ ArrayList<PackagePart> parts = container.getParts();
+ for (PackagePart part : parts) {
+ disp.println(part.getPartName());
+ disp.println("\t" + part.getContentType());
+
+ if(! part.getPartName().toString().equals("/docProps/core.xml")) {
+ disp.println("\t" + getSize(part) + " bytes");
+ }
+
+ if(! part.isRelationshipPart()) {
+ disp.println("\t" + part.getRelationships().size() + " relations");
+ for(PackageRelationship rel : part.getRelationships()) {
+ displayRelation(rel, "\t ");
+ }
+ }
+ }
+ }
+ /**
+ * Displays information on all the different
+ * relationships between different parts
+ * of the OOXML file container.
+ */
+ public void displayRelations() {
+ PackageRelationshipCollection rels =
+ container.getRelationships();
+ for (PackageRelationship rel : rels) {
+ displayRelation(rel, "");
+ }
+ }
+
+ private void displayRelation(PackageRelationship rel, String indent) {
+ disp.println(indent+"Relationship:");
+ disp.println(indent+"\tFrom: "+ rel.getSourceURI());
+ disp.println(indent+"\tTo: " + rel.getTargetURI());
+ disp.println(indent+"\tID: " + rel.getId());
+ disp.println(indent+"\tMode: " + rel.getTargetMode());
+ disp.println(indent+"\tType: " + rel.getRelationshipType());
+ }
+
+ @Override
+ public void close() throws IOException {
+ container.close();
+ }
+
+ public static void main(String[] args) throws IOException, InvalidFormatException {
+ if(args.length == 0) {
+ System.err.println("Use:");
+ System.err.println("\tjava OOXMLLister <filename>");
+ System.exit(1);
+ }
+
+ File f = new File(args[0]);
+ if(! f.exists()) {
+ System.err.println("Error, file not found!");
+ System.err.println("\t" + f);
+ System.exit(2);
+ }
+
+ OOXMLLister lister = new OOXMLLister(
+ OPCPackage.open(f.toString(), PackageAccess.READ)
+ );
+
+ try {
+ lister.disp.println(f + "\n");
+ lister.displayParts();
+ lister.disp.println();
+ lister.displayRelations();
+ } finally {
+ lister.close();
+ }
+ }
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.ooxml.dev;
+
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.Enumeration;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipException;
+import java.util.zip.ZipFile;
+import java.util.zip.ZipOutputStream;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.transform.OutputKeys;
+import javax.xml.transform.Result;
+import javax.xml.transform.Source;
+import javax.xml.transform.Transformer;
+import javax.xml.transform.TransformerException;
+import javax.xml.transform.TransformerFactory;
+import javax.xml.transform.dom.DOMSource;
+import javax.xml.transform.stream.StreamResult;
+
+import org.apache.poi.openxml4j.opc.internal.ZipHelper;
+import org.apache.poi.openxml4j.util.ZipSecureFile;
+import org.apache.poi.util.IOUtils;
+import org.w3c.dom.Document;
+import org.xml.sax.InputSource;
+
+/**
+ * Reads a zipped OOXML file and produces a copy with the included
+ * pretty-printed XML files.
+ *
+ * This is useful for comparing OOXML files produced by different tools as the often
+ * use different formatting of the XML.
+ */
+public class OOXMLPrettyPrint {
+ private final DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
+ private final DocumentBuilder documentBuilder;
+
+ public OOXMLPrettyPrint() throws ParserConfigurationException {
+ // allow files with much lower inflation rate here as there is no risk of Zip Bomb attacks in this developer tool
+ ZipSecureFile.setMinInflateRatio(0.00001);
+
+ documentBuilder = documentBuilderFactory.newDocumentBuilder();
+ }
+
+ public static void main(String[] args) throws Exception {
+ if(args.length <= 1 || args.length % 2 != 0) {
+ System.err.println("Use:");
+ System.err.println("\tjava OOXMLPrettyPrint [<filename> <outfilename>] ...");
+ System.exit(1);
+ }
+
+ for(int i = 0;i < args.length;i+=2) {
+ File f = new File(args[i]);
+ if(! f.exists()) {
+ System.err.println("Error, file not found!");
+ System.err.println("\t" + f);
+ System.exit(2);
+ }
+
+ handleFile(f, new File(args[i+1]));
+ }
+ System.out.println("Done.");
+ }
+
+ private static void handleFile(File file, File outFile) throws ZipException,
+ IOException, ParserConfigurationException {
+ System.out.println("Reading zip-file " + file + " and writing pretty-printed XML to " + outFile);
+
+ try (ZipFile zipFile = ZipHelper.openZipFile(file)) {
+ try (ZipOutputStream out = new ZipOutputStream(new BufferedOutputStream(new FileOutputStream(outFile)))) {
+ new OOXMLPrettyPrint().handle(zipFile, out);
+ }
+ } finally {
+ System.out.println();
+ }
+ }
+
+ private void handle(ZipFile file, ZipOutputStream out) throws IOException {
+ Enumeration<? extends ZipEntry> entries = file.entries();
+ while(entries.hasMoreElements()) {
+ ZipEntry entry = entries.nextElement();
+
+ String name = entry.getName();
+ out.putNextEntry(new ZipEntry(name));
+ try {
+ if(name.endsWith(".xml") || name.endsWith(".rels")) {
+ Document document = documentBuilder.parse(new InputSource(file.getInputStream(entry)));
+ document.setXmlStandalone(true);
+ pretty(document, out, 2);
+ } else {
+ System.out.println("Not pretty-printing non-XML file " + name);
+ IOUtils.copy(file.getInputStream(entry), out);
+ }
+ } catch (Exception e) {
+ throw new IOException("While handling entry " + name, e);
+ } finally {
+ out.closeEntry();
+ }
+ System.out.print(".");
+ }
+ }
+
+ private static void pretty(Document document, OutputStream outputStream, int indent) throws TransformerException {
+ TransformerFactory transformerFactory = TransformerFactory.newInstance();
+ Transformer transformer = transformerFactory.newTransformer();
+ transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
+ if (indent > 0) {
+ // set properties to indent the resulting XML nicely
+ transformer.setOutputProperty(OutputKeys.INDENT, "yes");
+ transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", Integer.toString(indent));
+ }
+ Result result = new StreamResult(outputStream);
+ Source source = new DOMSource(document);
+ transformer.transform(source, result);
+ }
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.ooxml.extractor;
+
+import java.io.File;
+
+import org.apache.poi.extractor.POITextExtractor;
+
+/**
+ * A command line wrapper around {@link ExtractorFactory}, useful
+ * for when debugging.
+ */
+public class CommandLineTextExtractor {
+ public static final String DIVIDER = "=======================";
+
+ public static void main(String[] args) throws Exception {
+ if(args.length < 1) {
+ System.err.println("Use:");
+ System.err.println(" CommandLineTextExtractor <filename> [filename] [filename]");
+ System.exit(1);
+ }
+
+ for (String arg : args) {
+ System.out.println(DIVIDER);
+
+ File f = new File(arg);
+ System.out.println(f);
+
+ POITextExtractor extractor =
+ ExtractorFactory.createExtractor(f);
+ try {
+ POITextExtractor metadataExtractor =
+ extractor.getMetadataTextExtractor();
+
+ System.out.println(" " + DIVIDER);
+ String metaData = metadataExtractor.getText();
+ System.out.println(metaData);
+ System.out.println(" " + DIVIDER);
+ String text = extractor.getText();
+ System.out.println(text);
+ System.out.println(DIVIDER);
+ System.out.println("Had " + metaData.length() + " characters of metadata and " + text.length() + " characters of text");
+ } finally {
+ extractor.close();
+ }
+ }
+ }
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.ooxml.extractor;
+
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Iterator;
+
+import org.apache.poi.EncryptedDocumentException;
+import org.apache.poi.extractor.POIOLE2TextExtractor;
+import org.apache.poi.extractor.POITextExtractor;
+import org.apache.poi.extractor.OLE2ExtractorFactory;
+import org.apache.poi.hsmf.MAPIMessage;
+import org.apache.poi.hsmf.datatypes.AttachmentChunks;
+import org.apache.poi.hsmf.extractor.OutlookTextExtactor;
+import org.apache.poi.hssf.extractor.ExcelExtractor;
+import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey;
+import org.apache.poi.hwpf.extractor.WordExtractor;
+import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.openxml4j.opc.PackageAccess;
+import org.apache.poi.openxml4j.opc.PackagePart;
+import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
+import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
+import org.apache.poi.poifs.crypt.Decryptor;
+import org.apache.poi.poifs.crypt.EncryptionInfo;
+import org.apache.poi.poifs.filesystem.DirectoryEntry;
+import org.apache.poi.poifs.filesystem.DirectoryNode;
+import org.apache.poi.poifs.filesystem.Entry;
+import org.apache.poi.poifs.filesystem.FileMagic;
+import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
+import org.apache.poi.poifs.filesystem.NotOLE2FileException;
+import org.apache.poi.poifs.filesystem.OPOIFSFileSystem;
+import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.sl.extractor.SlideShowExtractor;
+import org.apache.poi.util.IOUtils;
+import org.apache.poi.util.NotImplemented;
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
+import org.apache.poi.util.Removal;
+import org.apache.poi.xdgf.extractor.XDGFVisioExtractor;
+import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
+import org.apache.poi.xslf.usermodel.XMLSlideShow;
+import org.apache.poi.xslf.usermodel.XSLFRelation;
+import org.apache.poi.xssf.extractor.XSSFBEventBasedExcelExtractor;
+import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor;
+import org.apache.poi.xssf.extractor.XSSFExcelExtractor;
+import org.apache.poi.xssf.usermodel.XSSFRelation;
+import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
+import org.apache.poi.xwpf.usermodel.XWPFRelation;
+import org.apache.xmlbeans.XmlException;
+
+/**
+ * Figures out the correct POITextExtractor for your supplied
+ * document, and returns it.
+ *
+ * <p>Note 1 - will fail for many file formats if the POI Scratchpad jar is
+ * not present on the runtime classpath</p>
+ * <p>Note 2 - rather than using this, for most cases you would be better
+ * off switching to <a href="http://tika.apache.org">Apache Tika</a> instead!</p>
+ */
+@SuppressWarnings("WeakerAccess")
+public class ExtractorFactory {
+ private static final POILogger logger = POILogFactory.getLogger(ExtractorFactory.class);
+
+ public static final String CORE_DOCUMENT_REL = PackageRelationshipTypes.CORE_DOCUMENT;
+ protected static final String VISIO_DOCUMENT_REL = PackageRelationshipTypes.VISIO_CORE_DOCUMENT;
+ protected static final String STRICT_DOCUMENT_REL = PackageRelationshipTypes.STRICT_CORE_DOCUMENT;
+
+ /**
+ * Should this thread prefer event based over usermodel based extractors?
+ * (usermodel extractors tend to be more accurate, but use more memory)
+ * Default is false.
+ */
+ public static boolean getThreadPrefersEventExtractors() {
+ return OLE2ExtractorFactory.getThreadPrefersEventExtractors();
+ }
+
+ /**
+ * Should all threads prefer event based over usermodel based extractors?
+ * (usermodel extractors tend to be more accurate, but use more memory)
+ * Default is to use the thread level setting, which defaults to false.
+ */
+ public static Boolean getAllThreadsPreferEventExtractors() {
+ return OLE2ExtractorFactory.getAllThreadsPreferEventExtractors();
+ }
+
+ /**
+ * Should this thread prefer event based over usermodel based extractors?
+ * Will only be used if the All Threads setting is null.
+ */
+ public static void setThreadPrefersEventExtractors(boolean preferEventExtractors) {
+ OLE2ExtractorFactory.setThreadPrefersEventExtractors(preferEventExtractors);
+ }
+
+ /**
+ * Should all threads prefer event based over usermodel based extractors?
+ * If set, will take preference over the Thread level setting.
+ */
+ public static void setAllThreadsPreferEventExtractors(Boolean preferEventExtractors) {
+ OLE2ExtractorFactory.setAllThreadsPreferEventExtractors(preferEventExtractors);
+ }
+
+ /**
+ * Should this thread use event based extractors is available?
+ * Checks the all-threads one first, then thread specific.
+ */
+ public static boolean getPreferEventExtractor() {
+ return OLE2ExtractorFactory.getPreferEventExtractor();
+ }
+
+ public static <T extends POITextExtractor> T createExtractor(File f) throws IOException, OpenXML4JException, XmlException {
+ NPOIFSFileSystem fs = null;
+ try {
+ fs = new NPOIFSFileSystem(f);
+ if (fs.getRoot().hasEntry(Decryptor.DEFAULT_POIFS_ENTRY)) {
+ return (T)createEncryptedOOXMLExtractor(fs);
+ }
+ POITextExtractor extractor = createExtractor(fs);
+ extractor.setFilesystem(fs);
+ return (T)extractor;
+ } catch (OfficeXmlFileException e) {
+ // ensure file-handle release
+ IOUtils.closeQuietly(fs);
+ return (T)createExtractor(OPCPackage.open(f.toString(), PackageAccess.READ));
+ } catch (NotOLE2FileException ne) {
+ // ensure file-handle release
+ IOUtils.closeQuietly(fs);
+ throw new IllegalArgumentException("Your File was neither an OLE2 file, nor an OOXML file");
+ } catch (OpenXML4JException | Error | RuntimeException | IOException | XmlException e) {
+ // ensure file-handle release
+ IOUtils.closeQuietly(fs);
+ throw e;
+ }
+ }
+
+ public static POITextExtractor createExtractor(InputStream inp) throws IOException, OpenXML4JException, XmlException {
+ InputStream is = FileMagic.prepareToCheckMagic(inp);
+
+ FileMagic fm = FileMagic.valueOf(is);
+
+ switch (fm) {
+ case OLE2:
+ NPOIFSFileSystem fs = new NPOIFSFileSystem(is);
+ boolean isEncrypted = fs.getRoot().hasEntry(Decryptor.DEFAULT_POIFS_ENTRY);
+ return isEncrypted ? createEncryptedOOXMLExtractor(fs) : createExtractor(fs);
+ case OOXML:
+ return createExtractor(OPCPackage.open(is));
+ default:
+ throw new IllegalArgumentException("Your InputStream was neither an OLE2 stream, nor an OOXML stream");
+ }
+ }
+
+ /**
+ * Tries to determine the actual type of file and produces a matching text-extractor for it.
+ *
+ * @param pkg An {@link OPCPackage}.
+ * @return A {@link POIXMLTextExtractor} for the given file.
+ * @throws IOException If an error occurs while reading the file
+ * @throws OpenXML4JException If an error parsing the OpenXML file format is found.
+ * @throws XmlException If an XML parsing error occurs.
+ * @throws IllegalArgumentException If no matching file type could be found.
+ */
+ public static POITextExtractor createExtractor(OPCPackage pkg) throws IOException, OpenXML4JException, XmlException {
+ try {
+ // Check for the normal Office core document
+ PackageRelationshipCollection core;
+ core = pkg.getRelationshipsByType(CORE_DOCUMENT_REL);
+
+ // If nothing was found, try some of the other OOXML-based core types
+ if (core.size() == 0) {
+ // Could it be an OOXML-Strict one?
+ core = pkg.getRelationshipsByType(STRICT_DOCUMENT_REL);
+ }
+ if (core.size() == 0) {
+ // Could it be a visio one?
+ core = pkg.getRelationshipsByType(VISIO_DOCUMENT_REL);
+ if (core.size() == 1)
+ return new XDGFVisioExtractor(pkg);
+ }
+
+ // Should just be a single core document, complain if not
+ if (core.size() != 1) {
+ throw new IllegalArgumentException("Invalid OOXML Package received - expected 1 core document, found " + core.size());
+ }
+
+ // Grab the core document part, and try to identify from that
+ final PackagePart corePart = pkg.getPart(core.getRelationship(0));
+ final String contentType = corePart.getContentType();
+
+ // Is it XSSF?
+ for (XSSFRelation rel : XSSFExcelExtractor.SUPPORTED_TYPES) {
+ if ( rel.getContentType().equals( contentType ) ) {
+ if (getPreferEventExtractor()) {
+ return new XSSFEventBasedExcelExtractor(pkg);
+ }
+ return new XSSFExcelExtractor(pkg);
+ }
+ }
+
+ // Is it XWPF?
+ for (XWPFRelation rel : XWPFWordExtractor.SUPPORTED_TYPES) {
+ if ( rel.getContentType().equals( contentType ) ) {
+ return new XWPFWordExtractor(pkg);
+ }
+ }
+
+ // Is it XSLF?
+ for (XSLFRelation rel : XSLFPowerPointExtractor.SUPPORTED_TYPES) {
+ if ( rel.getContentType().equals( contentType ) ) {
+ return new SlideShowExtractor(new XMLSlideShow(pkg));
+ }
+ }
+
+ // special handling for SlideShow-Theme-files,
+ if (XSLFRelation.THEME_MANAGER.getContentType().equals(contentType)) {
+ return new SlideShowExtractor(new XMLSlideShow(pkg));
+ }
+
+ // How about xlsb?
+ for (XSSFRelation rel : XSSFBEventBasedExcelExtractor.SUPPORTED_TYPES) {
+ if (rel.getContentType().equals(contentType)) {
+ return new XSSFBEventBasedExcelExtractor(pkg);
+ }
+ }
+
+ throw new IllegalArgumentException("No supported documents found in the OOXML package (found "+contentType+")");
+
+ } catch (IOException | Error | RuntimeException | XmlException | OpenXML4JException e) {
+ // ensure that we close the package again if there is an error opening it, however
+ // we need to revert the package to not re-write the file via close(), which is very likely not wanted for a TextExtractor!
+ pkg.revert();
+ throw e;
+ }
+ }
+
+ public static <T extends POITextExtractor> T createExtractor(POIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
+ return createExtractor(fs.getRoot());
+ }
+ public static <T extends POITextExtractor> T createExtractor(NPOIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
+ return createExtractor(fs.getRoot());
+ }
+ public static <T extends POITextExtractor> T createExtractor(OPOIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
+ return createExtractor(fs.getRoot());
+ }
+
+ public static <T extends POITextExtractor> T createExtractor(DirectoryNode poifsDir) throws IOException, OpenXML4JException, XmlException
+ {
+ // First, check for OOXML
+ for (String entryName : poifsDir.getEntryNames()) {
+ if (entryName.equals("Package")) {
+ OPCPackage pkg = OPCPackage.open(poifsDir.createDocumentInputStream("Package"));
+ return (T)createExtractor(pkg);
+ }
+ }
+
+ // If not, ask the OLE2 code to check, with Scratchpad if possible
+ return (T)OLE2ExtractorFactory.createExtractor(poifsDir);
+ }
+
+ /**
+ * Returns an array of text extractors, one for each of
+ * the embedded documents in the file (if there are any).
+ * If there are no embedded documents, you'll get back an
+ * empty array. Otherwise, you'll get one open
+ * {@link POITextExtractor} for each embedded file.
+ *
+ * @deprecated Use the method with correct "embedded"
+ */
+ @Deprecated
+ @Removal(version="4.2")
+ public static POITextExtractor[] getEmbededDocsTextExtractors(POIOLE2TextExtractor ext) throws IOException, OpenXML4JException, XmlException {
+ return getEmbeddedDocsTextExtractors(ext);
+ }
+
+ /**
+ * Returns an array of text extractors, one for each of
+ * the embedded documents in the file (if there are any).
+ * If there are no embedded documents, you'll get back an
+ * empty array. Otherwise, you'll get one open
+ * {@link POITextExtractor} for each embedded file.
+ */
+ public static POITextExtractor[] getEmbeddedDocsTextExtractors(POIOLE2TextExtractor ext) throws IOException, OpenXML4JException, XmlException {
+ // All the embedded directories we spotted
+ ArrayList<Entry> dirs = new ArrayList<>();
+ // For anything else not directly held in as a POIFS directory
+ ArrayList<InputStream> nonPOIFS = new ArrayList<>();
+
+ // Find all the embedded directories
+ DirectoryEntry root = ext.getRoot();
+ if (root == null) {
+ throw new IllegalStateException("The extractor didn't know which POIFS it came from!");
+ }
+
+ if (ext instanceof ExcelExtractor) {
+ // These are in MBD... under the root
+ Iterator<Entry> it = root.getEntries();
+ while (it.hasNext()) {
+ Entry entry = it.next();
+ if (entry.getName().startsWith("MBD")) {
+ dirs.add(entry);
+ }
+ }
+ } else if (ext instanceof WordExtractor) {
+ // These are in ObjectPool -> _... under the root
+ try {
+ DirectoryEntry op = (DirectoryEntry) root.getEntry("ObjectPool");
+ Iterator<Entry> it = op.getEntries();
+ while (it.hasNext()) {
+ Entry entry = it.next();
+ if (entry.getName().startsWith("_")) {
+ dirs.add(entry);
+ }
+ }
+ } catch (FileNotFoundException e) {
+ logger.log(POILogger.INFO, "Ignoring FileNotFoundException while extracting Word document", e.getLocalizedMessage());
+ // ignored here
+ }
+ //} else if(ext instanceof PowerPointExtractor) {
+ // Tricky, not stored directly in poifs
+ // TODO
+ } else if (ext instanceof OutlookTextExtactor) {
+ // Stored in the Attachment blocks
+ MAPIMessage msg = ((OutlookTextExtactor)ext).getMAPIMessage();
+ for (AttachmentChunks attachment : msg.getAttachmentFiles()) {
+ if (attachment.getAttachData() != null) {
+ byte[] data = attachment.getAttachData().getValue();
+ nonPOIFS.add( new ByteArrayInputStream(data) );
+ } else if (attachment.getAttachmentDirectory() != null) {
+ dirs.add(attachment.getAttachmentDirectory().getDirectory());
+ }
+ }
+ }
+
+ // Create the extractors
+ if (dirs.size() == 0 && nonPOIFS.size() == 0){
+ return new POITextExtractor[0];
+ }
+
+ ArrayList<POITextExtractor> textExtractors = new ArrayList<>();
+ for (Entry dir : dirs) {
+ textExtractors.add(createExtractor((DirectoryNode) dir));
+ }
+ for (InputStream nonPOIF : nonPOIFS) {
+ try {
+ textExtractors.add(createExtractor(nonPOIF));
+ } catch (IllegalArgumentException e) {
+ // Ignore, just means it didn't contain
+ // a format we support as yet
+ logger.log(POILogger.INFO, "Format not supported yet", e.getLocalizedMessage());
+ } catch (XmlException | OpenXML4JException e) {
+ throw new IOException(e.getMessage(), e);
+ }
+ }
+ return textExtractors.toArray(new POITextExtractor[textExtractors.size()]);
+ }
+
+ /**
+ * Returns an array of text extractors, one for each of
+ * the embedded documents in the file (if there are any).
+ * If there are no embedded documents, you'll get back an
+ * empty array. Otherwise, you'll get one open
+ * {@link POITextExtractor} for each embedded file.
+ *
+ * @deprecated Use the method with correct "embedded"
+ */
+ @Deprecated
+ @Removal(version="4.2")
+ @NotImplemented
+ @SuppressWarnings({"UnusedParameters", "UnusedReturnValue"})
+ public static POITextExtractor[] getEmbededDocsTextExtractors(POIXMLTextExtractor ext) {
+ return getEmbeddedDocsTextExtractors(ext);
+ }
+
+ /**
+ * Returns an array of text extractors, one for each of
+ * the embedded documents in the file (if there are any).
+ * If there are no embedded documents, you'll get back an
+ * empty array. Otherwise, you'll get one open
+ * {@link POITextExtractor} for each embedded file.
+ */
+ @NotImplemented
+ @SuppressWarnings({"UnusedParameters", "UnusedReturnValue"})
+ public static POITextExtractor[] getEmbeddedDocsTextExtractors(POIXMLTextExtractor ext) {
+ throw new IllegalStateException("Not yet supported");
+ }
+
+ private static POITextExtractor createEncryptedOOXMLExtractor(NPOIFSFileSystem fs)
+ throws IOException {
+ String pass = Biff8EncryptionKey.getCurrentUserPassword();
+ if (pass == null) {
+ pass = Decryptor.DEFAULT_PASSWORD;
+ }
+
+ EncryptionInfo ei = new EncryptionInfo(fs);
+ Decryptor dec = ei.getDecryptor();
+ InputStream is = null;
+ try {
+ if (!dec.verifyPassword(pass)) {
+ throw new EncryptedDocumentException("Invalid password specified - use Biff8EncryptionKey.setCurrentUserPassword() before calling extractor");
+ }
+ is = dec.getDataStream(fs);
+ return createExtractor(OPCPackage.open(is));
+ } catch (IOException e) {
+ throw e;
+ } catch (Exception e) {
+ throw new EncryptedDocumentException(e);
+ } finally {
+ IOUtils.closeQuietly(is);
+
+ // also close the NPOIFSFileSystem here as we read all the data
+ // while decrypting
+ fs.close();
+ }
+ }
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.ooxml.extractor;
+
+import java.math.BigDecimal;
+import java.text.DateFormat;
+import java.text.DateFormatSymbols;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.Locale;
+
+import org.apache.poi.extractor.POITextExtractor;
+import org.apache.poi.ooxml.POIXMLDocument;
+import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart;
+import org.apache.poi.util.LocaleUtil;
+import org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperty;
+
+/**
+ * A {@link POITextExtractor} for returning the textual
+ * content of the OOXML file properties, eg author
+ * and title.
+ */
+public class POIXMLPropertiesTextExtractor extends POIXMLTextExtractor {
+
+ private final DateFormat dateFormat;
+
+ /**
+ * Creates a new POIXMLPropertiesTextExtractor for the given open document.
+ *
+ * @param doc the given open document
+ */
+ public POIXMLPropertiesTextExtractor(POIXMLDocument doc) {
+ super(doc);
+ DateFormatSymbols dfs = DateFormatSymbols.getInstance(Locale.ROOT);
+ dateFormat = new SimpleDateFormat("EEE MMM dd HH:mm:ss zzz yyyy", dfs);
+ dateFormat.setTimeZone(LocaleUtil.TIMEZONE_UTC);
+ }
+
+ /**
+ * Creates a new POIXMLPropertiesTextExtractor, for the
+ * same file that another TextExtractor is already
+ * working on.
+ *
+ * @param otherExtractor the extractor referencing the given file
+ */
+ public POIXMLPropertiesTextExtractor(POIXMLTextExtractor otherExtractor) {
+ this(otherExtractor.getDocument());
+ }
+
+ private void appendIfPresent(StringBuilder text, String thing, boolean value) {
+ appendIfPresent(text, thing, Boolean.toString(value));
+ }
+
+ private void appendIfPresent(StringBuilder text, String thing, int value) {
+ appendIfPresent(text, thing, Integer.toString(value));
+ }
+
+ private void appendIfPresent(StringBuilder text, String thing, Date value) {
+ if (value == null) {
+ return;
+ }
+ appendIfPresent(text, thing, dateFormat.format(value));
+ }
+
+ private void appendIfPresent(StringBuilder text, String thing, String value) {
+ if (value == null) {
+ return;
+ }
+ text.append(thing);
+ text.append(" = ");
+ text.append(value);
+ text.append("\n");
+ }
+
+ /**
+ * Returns the core document properties, eg author
+ *
+ * @return the core document properties
+ */
+ @SuppressWarnings("resource")
+ public String getCorePropertiesText() {
+ POIXMLDocument document = getDocument();
+ if (document == null) { // event based extractor does not have a document
+ return "";
+ }
+
+ StringBuilder text = new StringBuilder(64);
+ PackagePropertiesPart props =
+ document.getProperties().getCoreProperties().getUnderlyingProperties();
+
+ appendIfPresent(text, "Category", props.getCategoryProperty().getValue());
+ appendIfPresent(text, "Category", props.getCategoryProperty().getValue());
+ appendIfPresent(text, "ContentStatus", props.getContentStatusProperty().getValue());
+ appendIfPresent(text, "ContentType", props.getContentTypeProperty().getValue());
+ appendIfPresent(text, "Created", props.getCreatedProperty().getValue());
+ appendIfPresent(text, "CreatedString", props.getCreatedPropertyString());
+ appendIfPresent(text, "Creator", props.getCreatorProperty().getValue());
+ appendIfPresent(text, "Description", props.getDescriptionProperty().getValue());
+ appendIfPresent(text, "Identifier", props.getIdentifierProperty().getValue());
+ appendIfPresent(text, "Keywords", props.getKeywordsProperty().getValue());
+ appendIfPresent(text, "Language", props.getLanguageProperty().getValue());
+ appendIfPresent(text, "LastModifiedBy", props.getLastModifiedByProperty().getValue());
+ appendIfPresent(text, "LastPrinted", props.getLastPrintedProperty().getValue());
+ appendIfPresent(text, "LastPrintedString", props.getLastPrintedPropertyString());
+ appendIfPresent(text, "Modified", props.getModifiedProperty().getValue());
+ appendIfPresent(text, "ModifiedString", props.getModifiedPropertyString());
+ appendIfPresent(text, "Revision", props.getRevisionProperty().getValue());
+ appendIfPresent(text, "Subject", props.getSubjectProperty().getValue());
+ appendIfPresent(text, "Title", props.getTitleProperty().getValue());
+ appendIfPresent(text, "Version", props.getVersionProperty().getValue());
+
+ return text.toString();
+ }
+
+ /**
+ * Returns the extended document properties, eg application
+ *
+ * @return the extended document properties
+ */
+ @SuppressWarnings("resource")
+ public String getExtendedPropertiesText() {
+ POIXMLDocument document = getDocument();
+ if (document == null) { // event based extractor does not have a document
+ return "";
+ }
+
+ StringBuilder text = new StringBuilder(64);
+ org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties
+ props = document.getProperties().getExtendedProperties().getUnderlyingProperties();
+
+ appendIfPresent(text, "Application", props.getApplication());
+ appendIfPresent(text, "AppVersion", props.getAppVersion());
+ appendIfPresent(text, "Characters", props.getCharacters());
+ appendIfPresent(text, "CharactersWithSpaces", props.getCharactersWithSpaces());
+ appendIfPresent(text, "Company", props.getCompany());
+ appendIfPresent(text, "HyperlinkBase", props.getHyperlinkBase());
+ appendIfPresent(text, "HyperlinksChanged", props.getHyperlinksChanged());
+ appendIfPresent(text, "Lines", props.getLines());
+ appendIfPresent(text, "LinksUpToDate", props.getLinksUpToDate());
+ appendIfPresent(text, "Manager", props.getManager());
+ appendIfPresent(text, "Pages", props.getPages());
+ appendIfPresent(text, "Paragraphs", props.getParagraphs());
+ appendIfPresent(text, "PresentationFormat", props.getPresentationFormat());
+ appendIfPresent(text, "Template", props.getTemplate());
+ appendIfPresent(text, "TotalTime", props.getTotalTime());
+
+ return text.toString();
+ }
+
+ /**
+ * Returns the custom document properties, if there are any
+ *
+ * @return the custom document properties
+ */
+ @SuppressWarnings({"resource"})
+ public String getCustomPropertiesText() {
+ POIXMLDocument document = getDocument();
+ if (document == null) { // event based extractor does not have a document
+ return "";
+ }
+
+ StringBuilder text = new StringBuilder();
+ org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperties
+ props = document.getProperties().getCustomProperties().getUnderlyingProperties();
+
+ for (CTProperty property : props.getPropertyArray()) {
+ String val = "(not implemented!)";
+
+ if (property.isSetLpwstr()) {
+ val = property.getLpwstr();
+ } else if (property.isSetLpstr()) {
+ val = property.getLpstr();
+ } else if (property.isSetDate()) {
+ val = property.getDate().toString();
+ } else if (property.isSetFiletime()) {
+ val = property.getFiletime().toString();
+ } else if (property.isSetBool()) {
+ val = Boolean.toString(property.getBool());
+ }
+
+ // Integers
+ else if (property.isSetI1()) {
+ val = Integer.toString(property.getI1());
+ } else if (property.isSetI2()) {
+ val = Integer.toString(property.getI2());
+ } else if (property.isSetI4()) {
+ val = Integer.toString(property.getI4());
+ } else if (property.isSetI8()) {
+ val = Long.toString(property.getI8());
+ } else if (property.isSetInt()) {
+ val = Integer.toString(property.getInt());
+ }
+
+ // Unsigned Integers
+ else if (property.isSetUi1()) {
+ val = Integer.toString(property.getUi1());
+ } else if (property.isSetUi2()) {
+ val = Integer.toString(property.getUi2());
+ } else if (property.isSetUi4()) {
+ val = Long.toString(property.getUi4());
+ } else if (property.isSetUi8()) {
+ val = property.getUi8().toString();
+ } else if (property.isSetUint()) {
+ val = Long.toString(property.getUint());
+ }
+
+ // Reals
+ else if (property.isSetR4()) {
+ val = Float.toString(property.getR4());
+ } else if (property.isSetR8()) {
+ val = Double.toString(property.getR8());
+ } else if (property.isSetDecimal()) {
+ BigDecimal d = property.getDecimal();
+ if (d == null) {
+ val = null;
+ } else {
+ val = d.toPlainString();
+ }
+ }
+
+ /*else if (property.isSetArray()) {
+ // TODO Fetch the array values and output
+ }
+ else if (property.isSetVector()) {
+ // TODO Fetch the vector values and output
+ }
+
+ else if (property.isSetBlob() || property.isSetOblob()) {
+ // TODO Decode, if possible
+ }
+ else if (property.isSetStream() || property.isSetOstream() ||
+ property.isSetVstream()) {
+ // TODO Decode, if possible
+ }
+ else if (property.isSetStorage() || property.isSetOstorage()) {
+ // TODO Decode, if possible
+ }*/
+
+ text.append(property.getName()).append(" = ").append(val).append("\n");
+ }
+
+ return text.toString();
+ }
+
+ @Override
+ public String getText() {
+ try {
+ return
+ getCorePropertiesText() +
+ getExtendedPropertiesText() +
+ getCustomPropertiesText();
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ @Override
+ public POIXMLPropertiesTextExtractor getMetadataTextExtractor() {
+ throw new IllegalStateException("You already have the Metadata Text Extractor, not recursing!");
+ }
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.ooxml.extractor;
+
+import java.io.IOException;
+
+import org.apache.poi.extractor.POITextExtractor;
+import org.apache.poi.ooxml.POIXMLDocument;
+import org.apache.poi.ooxml.POIXMLProperties.CoreProperties;
+import org.apache.poi.ooxml.POIXMLProperties.CustomProperties;
+import org.apache.poi.ooxml.POIXMLProperties.ExtendedProperties;
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.openxml4j.util.ZipSecureFile;
+
+public abstract class POIXMLTextExtractor extends POITextExtractor {
+ /** The POIXMLDocument that's open */
+ private final POIXMLDocument _document;
+
+ /**
+ * Creates a new text extractor for the given document
+ *
+ * @param document the document to extract from
+ */
+ public POIXMLTextExtractor(POIXMLDocument document) {
+ _document = document;
+ }
+
+ /**
+ * Returns the core document properties
+ *
+ * @return the core document properties
+ */
+ public CoreProperties getCoreProperties() {
+ return _document.getProperties().getCoreProperties();
+ }
+ /**
+ * Returns the extended document properties
+ *
+ * @return the extended document properties
+ */
+ public ExtendedProperties getExtendedProperties() {
+ return _document.getProperties().getExtendedProperties();
+ }
+ /**
+ * Returns the custom document properties
+ *
+ * @return the custom document properties
+ */
+ public CustomProperties getCustomProperties() {
+ return _document.getProperties().getCustomProperties();
+ }
+
+ /**
+ * Returns opened document
+ *
+ * @return the opened document
+ */
+ @Override
+ public final POIXMLDocument getDocument() {
+ return _document;
+ }
+
+ /**
+ * Returns the opened OPCPackage that contains the document
+ *
+ * @return the opened OPCPackage
+ */
+ public OPCPackage getPackage() {
+ return _document.getPackage();
+ }
+
+ /**
+ * Returns an OOXML properties text extractor for the
+ * document properties metadata, such as title and author.
+ */
+ @Override
+ public POIXMLPropertiesTextExtractor getMetadataTextExtractor() {
+ return new POIXMLPropertiesTextExtractor(_document);
+ }
+
+ @Override
+ public void close() throws IOException {
+ // e.g. XSSFEventBaseExcelExtractor passes a null-document
+ if(_document != null) {
+ @SuppressWarnings("resource")
+ OPCPackage pkg = _document.getPackage();
+ if(pkg != null) {
+ // revert the package to not re-write the file, which is very likely not wanted for a TextExtractor!
+ pkg.revert();
+ }
+ }
+ super.close();
+ }
+
+ protected void checkMaxTextSize(CharSequence text, String string) {
+ if(string == null) {
+ return;
+ }
+
+ int size = text.length() + string.length();
+ if(size > ZipSecureFile.getMaxTextSize()) {
+ throw new IllegalStateException("The text would exceed the max allowed overall size of extracted text. "
+ + "By default this is prevented as some documents may exhaust available memory and it may indicate that the file is used to inflate memory usage and thus could pose a security risk. "
+ + "You can adjust this limit via ZipSecureFile.setMaxTextSize() if you need to work with files which have a lot of text. "
+ + "Size: " + size + ", limit: MAX_TEXT_SIZE: " + ZipSecureFile.getMaxTextSize());
+ }
+ }
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.ooxml.util;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.lang.reflect.Method;
+
+import javax.xml.XMLConstants;
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.stream.events.Namespace;
+
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.xml.sax.ErrorHandler;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXParseException;
+
+public final class DocumentHelper {
+ private static POILogger logger = POILogFactory.getLogger(DocumentHelper.class);
+
+ private DocumentHelper() {}
+
+ private static class DocHelperErrorHandler implements ErrorHandler {
+
+ public void warning(SAXParseException exception) throws SAXException {
+ printError(POILogger.WARN, exception);
+ }
+
+ public void error(SAXParseException exception) throws SAXException {
+ printError(POILogger.ERROR, exception);
+ }
+
+ public void fatalError(SAXParseException exception) throws SAXException {
+ printError(POILogger.FATAL, exception);
+ throw exception;
+ }
+
+ /** Prints the error message. */
+ private void printError(int type, SAXParseException ex) {
+ StringBuilder sb = new StringBuilder();
+
+ String systemId = ex.getSystemId();
+ if (systemId != null) {
+ int index = systemId.lastIndexOf('/');
+ if (index != -1)
+ systemId = systemId.substring(index + 1);
+ sb.append(systemId);
+ }
+ sb.append(':');
+ sb.append(ex.getLineNumber());
+ sb.append(':');
+ sb.append(ex.getColumnNumber());
+ sb.append(": ");
+ sb.append(ex.getMessage());
+
+ logger.log(type, sb.toString(), ex);
+ }
+ }
+
+ /**
+ * Creates a new document builder, with sensible defaults
+ *
+ * @throws IllegalStateException If creating the DocumentBuilder fails, e.g.
+ * due to {@link ParserConfigurationException}.
+ */
+ public static synchronized DocumentBuilder newDocumentBuilder() {
+ try {
+ DocumentBuilder documentBuilder = documentBuilderFactory.newDocumentBuilder();
+ documentBuilder.setEntityResolver(SAXHelper.IGNORING_ENTITY_RESOLVER);
+ documentBuilder.setErrorHandler(new DocHelperErrorHandler());
+ return documentBuilder;
+ } catch (ParserConfigurationException e) {
+ throw new IllegalStateException("cannot create a DocumentBuilder", e);
+ }
+ }
+
+ private static final DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
+ static {
+ documentBuilderFactory.setNamespaceAware(true);
+ documentBuilderFactory.setValidating(false);
+ trySetSAXFeature(documentBuilderFactory, XMLConstants.FEATURE_SECURE_PROCESSING, true);
+ trySetXercesSecurityManager(documentBuilderFactory);
+ }
+
+ private static void trySetSAXFeature(DocumentBuilderFactory dbf, String feature, boolean enabled) {
+ try {
+ dbf.setFeature(feature, enabled);
+ } catch (Exception e) {
+ logger.log(POILogger.WARN, "SAX Feature unsupported", feature, e);
+ } catch (AbstractMethodError ame) {
+ logger.log(POILogger.WARN, "Cannot set SAX feature because outdated XML parser in classpath", feature, ame);
+ }
+ }
+
+ private static void trySetXercesSecurityManager(DocumentBuilderFactory dbf) {
+ // Try built-in JVM one first, standalone if not
+ for (String securityManagerClassName : new String[]{
+ //"com.sun.org.apache.xerces.internal.util.SecurityManager",
+ "org.apache.xerces.util.SecurityManager"
+ }) {
+ try {
+ Object mgr = Class.forName(securityManagerClassName).newInstance();
+ Method setLimit = mgr.getClass().getMethod("setEntityExpansionLimit", Integer.TYPE);
+ setLimit.invoke(mgr, 4096);
+ dbf.setAttribute("http://apache.org/xml/properties/security-manager", mgr);
+ // Stop once one can be setup without error
+ return;
+ } catch (ClassNotFoundException e) {
+ // continue without log, this is expected in some setups
+ } catch (Throwable e) { // NOSONAR - also catch things like NoClassDefError here
+ logger.log(POILogger.WARN, "SAX Security Manager could not be setup", e);
+ }
+ }
+
+ // separate old version of Xerces not found => use the builtin way of setting the property
+ dbf.setAttribute("http://www.oracle.com/xml/jaxp/properties/entityExpansionLimit", 4096);
+ }
+
+ /**
+ * Parses the given stream via the default (sensible)
+ * DocumentBuilder
+ * @param inp Stream to read the XML data from
+ * @return the parsed Document
+ */
+ public static Document readDocument(InputStream inp) throws IOException, SAXException {
+ return newDocumentBuilder().parse(inp);
+ }
+
+ /**
+ * Parses the given stream via the default (sensible)
+ * DocumentBuilder
+ * @param inp sax source to read the XML data from
+ * @return the parsed Document
+ */
+ public static Document readDocument(InputSource inp) throws IOException, SAXException {
+ return newDocumentBuilder().parse(inp);
+ }
+
+ // must only be used to create empty documents, do not use it for parsing!
+ private static final DocumentBuilder documentBuilderSingleton = newDocumentBuilder();
+
+ /**
+ * Creates a new DOM Document
+ */
+ public static synchronized Document createDocument() {
+ return documentBuilderSingleton.newDocument();
+ }
+
+ /**
+ * Adds a namespace declaration attribute to the given element.
+ */
+ public static void addNamespaceDeclaration(Element element, String namespacePrefix, String namespaceURI) {
+ element.setAttributeNS(XMLConstants.XMLNS_ATTRIBUTE_NS_URI,
+ XMLConstants.XMLNS_ATTRIBUTE + ':' + namespacePrefix,
+ namespaceURI);
+ }
+
+ /**
+ * Adds a namespace declaration attribute to the given element.
+ */
+ public static void addNamespaceDeclaration(Element element, Namespace namespace) {
+ addNamespaceDeclaration(element, namespace.getPrefix(), namespace.getNamespaceURI());
+ }
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.ooxml.util;
+
+import java.util.LinkedList;
+import java.util.ListIterator;
+
+/**
+ * <p>
+ * 24.08.2009<br>
+ * </p>
+ *
+ * @author Stefan Stern<br>
+ */
+
+public class IdentifierManager {
+
+ public static final long MAX_ID = Long.MAX_VALUE - 1;
+
+ public static final long MIN_ID = 0L;
+
+ /**
+ *
+ */
+ private final long upperbound;
+
+ /**
+ *
+ */
+ private final long lowerbound;
+
+ /**
+ * List of segments of available identifiers
+ */
+ private LinkedList<Segment> segments;
+
+ /**
+ * @param lowerbound the lower limit of the id-range to manage. Must be greater than or equal to {@link #MIN_ID}.
+ * @param upperbound the upper limit of the id-range to manage. Must be less then or equal {@link #MAX_ID}.
+ */
+ public IdentifierManager(long lowerbound, long upperbound) {
+ if (lowerbound > upperbound) {
+ throw new IllegalArgumentException("lowerbound must not be greater than upperbound, had " + lowerbound + " and " + upperbound);
+ }
+ else if (lowerbound < MIN_ID) {
+ String message = "lowerbound must be greater than or equal to " + Long.toString(MIN_ID);
+ throw new IllegalArgumentException(message);
+ }
+ else if (upperbound > MAX_ID) {
+ /*
+ * while MAX_ID is Long.MAX_VALUE, this check is pointless. But if
+ * someone subclasses / tweaks the limits, this check is fine.
+ */
+ throw new IllegalArgumentException("upperbound must be less than or equal to " + Long.toString(MAX_ID) + " but had " + upperbound);
+ }
+ this.lowerbound = lowerbound;
+ this.upperbound = upperbound;
+ this.segments = new LinkedList<>();
+ segments.add(new Segment(lowerbound, upperbound));
+ }
+
+ public long reserve(long id) {
+ if (id < lowerbound || id > upperbound) {
+ throw new IllegalArgumentException("Value for parameter 'id' was out of bounds, had " + id + ", but should be within [" + lowerbound + ":" + upperbound + "]");
+ }
+ verifyIdentifiersLeft();
+
+ if (id == upperbound) {
+ Segment lastSegment = segments.getLast();
+ if (lastSegment.end == upperbound) {
+ lastSegment.end = upperbound - 1;
+ if (lastSegment.start > lastSegment.end) {
+ segments.removeLast();
+ }
+ return id;
+ }
+ return reserveNew();
+ }
+
+ if (id == lowerbound) {
+ Segment firstSegment = segments.getFirst();
+ if (firstSegment.start == lowerbound) {
+ firstSegment.start = lowerbound + 1;
+ if (firstSegment.end < firstSegment.start) {
+ segments.removeFirst();
+ }
+ return id;
+ }
+ return reserveNew();
+ }
+
+ ListIterator<Segment> iter = segments.listIterator();
+ while (iter.hasNext()) {
+ Segment segment = iter.next();
+ if (segment.end < id) {
+ continue;
+ }
+ else if (segment.start > id) {
+ break;
+ }
+ else if (segment.start == id) {
+ segment.start = id + 1;
+ if (segment.end < segment.start) {
+ iter.remove();
+ }
+ return id;
+ }
+ else if (segment.end == id) {
+ segment.end = id - 1;
+ if (segment.start > segment.end) {
+ iter.remove();
+ }
+ return id;
+ }
+ else {
+ iter.add(new Segment(id + 1, segment.end));
+ segment.end = id - 1;
+ return id;
+ }
+ }
+ return reserveNew();
+ }
+
+ /**
+ * @return a new identifier.
+ * @throws IllegalStateException if no more identifiers are available, then an Exception is raised.
+ */
+ public long reserveNew() {
+ verifyIdentifiersLeft();
+ Segment segment = segments.getFirst();
+ long result = segment.start;
+ segment.start += 1;
+ if (segment.start > segment.end) {
+ segments.removeFirst();
+ }
+ return result;
+ }
+
+ /**
+ * @param id
+ * the identifier to release. Must be greater than or equal to
+ * {@link #lowerbound} and must be less than or equal to {@link #upperbound}
+ * @return true, if the identifier was reserved and has been successfully
+ * released, false, if the identifier was not reserved.
+ */
+ public boolean release(long id) {
+ if (id < lowerbound || id > upperbound) {
+ throw new IllegalArgumentException("Value for parameter 'id' was out of bounds, had " + id + ", but should be within [" + lowerbound + ":" + upperbound + "]");
+ }
+
+ if (id == upperbound) {
+ Segment lastSegment = segments.getLast();
+ if (lastSegment.end == upperbound - 1) {
+ lastSegment.end = upperbound;
+ return true;
+ } else if (lastSegment.end == upperbound) {
+ return false;
+ } else {
+ segments.add(new Segment(upperbound, upperbound));
+ return true;
+ }
+ }
+
+ if (id == lowerbound) {
+ Segment firstSegment = segments.getFirst();
+ if (firstSegment.start == lowerbound + 1) {
+ firstSegment.start = lowerbound;
+ return true;
+ } else if (firstSegment.start == lowerbound) {
+ return false;
+ } else {
+ segments.addFirst(new Segment(lowerbound, lowerbound));
+ return true;
+ }
+ }
+
+ long higher = id + 1;
+ long lower = id - 1;
+ ListIterator<Segment> iter = segments.listIterator();
+
+ while (iter.hasNext()) {
+ Segment segment = iter.next();
+ if (segment.end < lower) {
+ continue;
+ }
+ if (segment.start > higher) {
+ iter.previous();
+ iter.add(new Segment(id, id));
+ return true;
+ }
+ if (segment.start == higher) {
+ segment.start = id;
+ return true;
+ }
+ else if (segment.end == lower) {
+ segment.end = id;
+ /* check if releasing this elements glues two segments into one */
+ if (iter.hasNext()) {
+ Segment next = iter.next();
+ if (next.start == segment.end + 1) {
+ segment.end = next.end;
+ iter.remove();
+ }
+ }
+ return true;
+ }
+ else {
+ /* id was not reserved, return false */
+ break;
+ }
+ }
+ return false;
+ }
+
+ public long getRemainingIdentifiers() {
+ long result = 0;
+ for (Segment segment : segments) {
+ result = result - segment.start;
+ result = result + segment.end + 1;
+ }
+ return result;
+ }
+
+ /**
+ *
+ */
+ private void verifyIdentifiersLeft() {
+ if (segments.isEmpty()) {
+ throw new IllegalStateException("No identifiers left");
+ }
+ }
+
+ private static class Segment {
+
+ public Segment(long start, long end) {
+ this.start = start;
+ this.end = end;
+ }
+
+ public long start;
+ public long end;
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see java.lang.Object#toString()
+ */
+ public String toString() {
+ return "[" + start + "; " + end + "]";
+ }
+ }
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.ooxml.util;
+
+import org.apache.poi.openxml4j.opc.*;
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
+import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
+import org.apache.poi.ooxml.POIXMLException;
+import org.apache.poi.util.IOUtils;
+
+import java.io.*;
+import java.net.URI;
+
+/**
+ * Provides handy methods to work with OOXML packages
+ */
+public final class PackageHelper {
+
+ public static OPCPackage open(InputStream is) throws IOException {
+ try {
+ return OPCPackage.open(is);
+ } catch (InvalidFormatException e){
+ throw new POIXMLException(e);
+ }
+ }
+
+ /**
+ * Clone the specified package.
+ *
+ * @param pkg the package to clone
+ * @param file the destination file
+ * @return the cloned package
+ */
+ public static OPCPackage clone(OPCPackage pkg, File file) throws OpenXML4JException, IOException {
+
+ String path = file.getAbsolutePath();
+
+ OPCPackage dest = OPCPackage.create(path);
+ PackageRelationshipCollection rels = pkg.getRelationships();
+ for (PackageRelationship rel : rels) {
+ PackagePart part = pkg.getPart(rel);
+ PackagePart part_tgt;
+ if (rel.getRelationshipType().equals(PackageRelationshipTypes.CORE_PROPERTIES)) {
+ copyProperties(pkg.getPackageProperties(), dest.getPackageProperties());
+ continue;
+ }
+ dest.addRelationship(part.getPartName(), rel.getTargetMode(), rel.getRelationshipType());
+ part_tgt = dest.createPart(part.getPartName(), part.getContentType());
+
+ OutputStream out = part_tgt.getOutputStream();
+ IOUtils.copy(part.getInputStream(), out);
+ out.close();
+
+ if(part.hasRelationships()) {
+ copy(pkg, part, dest, part_tgt);
+ }
+ }
+ dest.close();
+
+ //the temp file will be deleted when JVM terminates
+ new File(path).deleteOnExit();
+ return OPCPackage.open(path);
+ }
+
+ /**
+ * Recursively copy package parts to the destination package
+ */
+ private static void copy(OPCPackage pkg, PackagePart part, OPCPackage tgt, PackagePart part_tgt) throws OpenXML4JException, IOException {
+ PackageRelationshipCollection rels = part.getRelationships();
+ if(rels != null) for (PackageRelationship rel : rels) {
+ PackagePart p;
+ if(rel.getTargetMode() == TargetMode.EXTERNAL){
+ part_tgt.addExternalRelationship(rel.getTargetURI().toString(), rel.getRelationshipType(), rel.getId());
+ //external relations don't have associated package parts
+ continue;
+ }
+ URI uri = rel.getTargetURI();
+
+ if(uri.getRawFragment() != null) {
+ part_tgt.addRelationship(uri, rel.getTargetMode(), rel.getRelationshipType(), rel.getId());
+ continue;
+ }
+ PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI());
+ p = pkg.getPart(relName);
+ part_tgt.addRelationship(p.getPartName(), rel.getTargetMode(), rel.getRelationshipType(), rel.getId());
+
+
+
+
+ PackagePart dest;
+ if(!tgt.containPart(p.getPartName())){
+ dest = tgt.createPart(p.getPartName(), p.getContentType());
+ OutputStream out = dest.getOutputStream();
+ IOUtils.copy(p.getInputStream(), out);
+ out.close();
+ copy(pkg, p, tgt, dest);
+ }
+ }
+ }
+
+ /**
+ * Copy core package properties
+ *
+ * @param src source properties
+ * @param tgt target properties
+ */
+ private static void copyProperties(PackageProperties src, PackageProperties tgt){
+ tgt.setCategoryProperty(src.getCategoryProperty().getValue());
+ tgt.setContentStatusProperty(src.getContentStatusProperty().getValue());
+ tgt.setContentTypeProperty(src.getContentTypeProperty().getValue());
+ tgt.setCreatorProperty(src.getCreatorProperty().getValue());
+ tgt.setDescriptionProperty(src.getDescriptionProperty().getValue());
+ tgt.setIdentifierProperty(src.getIdentifierProperty().getValue());
+ tgt.setKeywordsProperty(src.getKeywordsProperty().getValue());
+ tgt.setLanguageProperty(src.getLanguageProperty().getValue());
+ tgt.setRevisionProperty(src.getRevisionProperty().getValue());
+ tgt.setSubjectProperty(src.getSubjectProperty().getValue());
+ tgt.setTitleProperty(src.getTitleProperty().getValue());
+ tgt.setVersionProperty(src.getVersionProperty().getValue());
+ }
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.ooxml.util;
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.lang.reflect.Method;
+import java.util.concurrent.TimeUnit;
+
+import javax.xml.XMLConstants;
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.parsers.SAXParserFactory;
+
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
+import org.xml.sax.EntityResolver;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.XMLReader;
+
+
+/**
+ * Provides handy methods for working with SAX parsers and readers
+ */
+public final class SAXHelper {
+ private static final POILogger logger = POILogFactory.getLogger(SAXHelper.class);
+ private static long lastLog;
+
+ private SAXHelper() {}
+
+ /**
+ * Creates a new SAX XMLReader, with sensible defaults
+ */
+ public static synchronized XMLReader newXMLReader() throws SAXException, ParserConfigurationException {
+ XMLReader xmlReader = saxFactory.newSAXParser().getXMLReader();
+ xmlReader.setEntityResolver(IGNORING_ENTITY_RESOLVER);
+ trySetSAXFeature(xmlReader, XMLConstants.FEATURE_SECURE_PROCESSING);
+ trySetXercesSecurityManager(xmlReader);
+ return xmlReader;
+ }
+
+ static final EntityResolver IGNORING_ENTITY_RESOLVER = new EntityResolver() {
+ @Override
+ public InputSource resolveEntity(String publicId, String systemId)
+ throws SAXException, IOException {
+ return new InputSource(new StringReader(""));
+ }
+ };
+
+ private static final SAXParserFactory saxFactory;
+ static {
+ try {
+ saxFactory = SAXParserFactory.newInstance();
+ saxFactory.setValidating(false);
+ saxFactory.setNamespaceAware(true);
+ } catch (RuntimeException | Error re) {
+ // this also catches NoClassDefFoundError, which may be due to a local class path issue
+ // This may occur if the code is run inside a web container
+ // or a restricted JVM
+ // See bug 61170: https://bz.apache.org/bugzilla/show_bug.cgi?id=61170
+ logger.log(POILogger.WARN, "Failed to create SAXParserFactory", re);
+ throw re;
+ } catch (Exception e) {
+ logger.log(POILogger.WARN, "Failed to create SAXParserFactory", e);
+ throw new RuntimeException("Failed to create SAXParserFactory", e);
+ }
+ }
+
+ private static void trySetSAXFeature(XMLReader xmlReader, String feature) {
+ try {
+ xmlReader.setFeature(feature, true);
+ } catch (Exception e) {
+ logger.log(POILogger.WARN, "SAX Feature unsupported", feature, e);
+ } catch (AbstractMethodError ame) {
+ logger.log(POILogger.WARN, "Cannot set SAX feature because outdated XML parser in classpath", feature, ame);
+ }
+ }
+
+ private static void trySetXercesSecurityManager(XMLReader xmlReader) {
+ // Try built-in JVM one first, standalone if not
+ for (String securityManagerClassName : new String[] {
+ //"com.sun.org.apache.xerces.internal.util.SecurityManager",
+ "org.apache.xerces.util.SecurityManager"
+ }) {
+ try {
+ Object mgr = Class.forName(securityManagerClassName).newInstance();
+ Method setLimit = mgr.getClass().getMethod("setEntityExpansionLimit", Integer.TYPE);
+ setLimit.invoke(mgr, 4096);
+ xmlReader.setProperty("http://apache.org/xml/properties/security-manager", mgr);
+ // Stop once one can be setup without error
+ return;
+ } catch (ClassNotFoundException e) {
+ // continue without log, this is expected in some setups
+ } catch (Throwable e) { // NOSONAR - also catch things like NoClassDefError here
+ // throttle the log somewhat as it can spam the log otherwise
+ if(System.currentTimeMillis() > lastLog + TimeUnit.MINUTES.toMillis(5)) {
+ logger.log(POILogger.WARN, "SAX Security Manager could not be setup [log suppressed for 5 minutes]", e);
+ lastLog = System.currentTimeMillis();
+ }
+ }
+ }
+
+ // separate old version of Xerces not found => use the builtin way of setting the property
+ try {
+ xmlReader.setProperty("http://www.oracle.com/xml/jaxp/properties/entityExpansionLimit", 4096);
+ } catch (SAXException e) { // NOSONAR - also catch things like NoClassDefError here
+ // throttle the log somewhat as it can spam the log otherwise
+ if(System.currentTimeMillis() > lastLog + TimeUnit.MINUTES.toMillis(5)) {
+ logger.log(POILogger.WARN, "SAX Security Manager could not be setup [log suppressed for 5 minutes]", e);
+ lastLog = System.currentTimeMillis();
+ }
+ }
+ }
+}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-
-package org.apache.poi.ss.extractor;
-
-import org.apache.poi.ss.usermodel.Shape;
-
-/**
- * A collection of embedded object informations and content
- */
-public class EmbeddedData {
- private String filename;
- private byte[] embeddedData;
- private Shape shape;
- private String contentType = "binary/octet-stream";
-
- public EmbeddedData(String filename, byte[] embeddedData, String contentType) {
- setFilename(filename);
- setEmbeddedData(embeddedData);
- setContentType(contentType);
- }
-
- /**
- * @return the filename
- */
- public String getFilename() {
- return filename;
- }
-
- /**
- * Sets the filename
- *
- * @param filename the filename
- */
- public void setFilename(String filename) {
- if (filename == null) {
- this.filename = "unknown.bin";
- } else {
- this.filename = filename.replaceAll("[^/\\\\]*[/\\\\]", "").trim();
- }
- }
-
- /**
- * @return the embedded object byte array
- */
- public byte[] getEmbeddedData() {
- return embeddedData;
- }
-
- /**
- * Sets the embedded object as byte array
- *
- * @param embeddedData the embedded object byte array
- */
- public void setEmbeddedData(byte[] embeddedData) {
- this.embeddedData = (embeddedData == null) ? null : embeddedData.clone();
- }
-
- /**
- * @return the shape which links to the embedded object
- */
- public Shape getShape() {
- return shape;
- }
-
- /**
- * Sets the shape which links to the embedded object
- *
- * @param shape the shape
- */
- public void setShape(Shape shape) {
- this.shape = shape;
- }
-
- /**
- * @return the content-/mime-type of the embedded object, the default (if unknown) is {@code binary/octet-stream}
- */
- public String getContentType() {
- return contentType;
- }
-
- /**
- * Sets the content-/mime-type
- *
- * @param contentType the content-type
- */
- public void setContentType(String contentType) {
- this.contentType = contentType;
- }
-}
\ No newline at end of file
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-
-package org.apache.poi.ss.extractor;
-
-import static org.apache.poi.util.StringUtil.endsWithIgnoreCase;
-
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.Iterator;
-import java.util.List;
-
-import org.apache.poi.hpsf.ClassID;
-import org.apache.poi.hpsf.ClassIDPredefined;
-import org.apache.poi.poifs.filesystem.DirectoryNode;
-import org.apache.poi.poifs.filesystem.DocumentInputStream;
-import org.apache.poi.poifs.filesystem.Entry;
-import org.apache.poi.poifs.filesystem.Ole10Native;
-import org.apache.poi.poifs.filesystem.Ole10NativeException;
-import org.apache.poi.poifs.filesystem.POIFSFileSystem;
-import org.apache.poi.ss.usermodel.Drawing;
-import org.apache.poi.ss.usermodel.ObjectData;
-import org.apache.poi.ss.usermodel.Picture;
-import org.apache.poi.ss.usermodel.PictureData;
-import org.apache.poi.ss.usermodel.Shape;
-import org.apache.poi.ss.usermodel.ShapeContainer;
-import org.apache.poi.ss.usermodel.Sheet;
-import org.apache.poi.ss.usermodel.Workbook;
-import org.apache.poi.util.Beta;
-import org.apache.poi.util.IOUtils;
-import org.apache.poi.util.LocaleUtil;
-import org.apache.poi.util.POILogFactory;
-import org.apache.poi.util.POILogger;
-import org.apache.poi.xssf.usermodel.XSSFObjectData;
-
-/**
- * This extractor class tries to identify various embedded documents within Excel files
- * and provide them via a common interface, i.e. the EmbeddedData instances
- */
-@Beta
-public class EmbeddedExtractor implements Iterable<EmbeddedExtractor> {
- private static final POILogger LOG = POILogFactory.getLogger(EmbeddedExtractor.class);
- //arbitrarily selected; may need to increase
- private static final int MAX_RECORD_LENGTH = 1_000_000;
-
- // contentType
- private static final String CONTENT_TYPE_BYTES = "binary/octet-stream";
- private static final String CONTENT_TYPE_PDF = "application/pdf";
- private static final String CONTENT_TYPE_DOC = "application/msword";
- private static final String CONTENT_TYPE_XLS = "application/vnd.ms-excel";
-
- /**
- * @return the list of known extractors, if you provide custom extractors, override this method
- */
- @Override
- public Iterator<EmbeddedExtractor> iterator() {
- EmbeddedExtractor[] ee = {
- new Ole10Extractor(), new PdfExtractor(), new BiffExtractor(), new OOXMLExtractor(), new FsExtractor()
- };
- return Arrays.asList(ee).iterator();
- }
-
- public EmbeddedData extractOne(DirectoryNode src) throws IOException {
- for (EmbeddedExtractor ee : this) {
- if (ee.canExtract(src)) {
- return ee.extract(src);
- }
- }
- return null;
- }
-
- public EmbeddedData extractOne(Picture src) throws IOException {
- for (EmbeddedExtractor ee : this) {
- if (ee.canExtract(src)) {
- return ee.extract(src);
- }
- }
- return null;
- }
-
- public List<EmbeddedData> extractAll(Sheet sheet) throws IOException {
- Drawing<?> patriarch = sheet.getDrawingPatriarch();
- if (null == patriarch){
- return Collections.emptyList();
- }
- List<EmbeddedData> embeddings = new ArrayList<>();
- extractAll(patriarch, embeddings);
- return embeddings;
- }
-
- protected void extractAll(ShapeContainer<?> parent, List<EmbeddedData> embeddings) throws IOException {
- for (Shape shape : parent) {
- EmbeddedData data = null;
- if (shape instanceof ObjectData) {
- ObjectData od = (ObjectData)shape;
- try {
- if (od.hasDirectoryEntry()) {
- data = extractOne((DirectoryNode)od.getDirectory());
- } else {
- String contentType = CONTENT_TYPE_BYTES;
- if (od instanceof XSSFObjectData) {
- contentType = ((XSSFObjectData)od).getObjectPart().getContentType();
- }
- data = new EmbeddedData(od.getFileName(), od.getObjectData(), contentType);
- }
- } catch (Exception e) {
- LOG.log(POILogger.WARN, "Entry not found / readable - ignoring OLE embedding", e);
- }
- } else if (shape instanceof Picture) {
- data = extractOne((Picture)shape);
- } else if (shape instanceof ShapeContainer) {
- extractAll((ShapeContainer<?>)shape, embeddings);
- }
-
- if (data == null) {
- continue;
- }
-
- data.setShape(shape);
- String filename = data.getFilename();
- String extension = (filename == null || filename.lastIndexOf('.') == -1) ? ".bin" : filename.substring(filename.lastIndexOf('.'));
-
- // try to find an alternative name
- if (filename == null || filename.isEmpty() || filename.startsWith("MBD") || filename.startsWith("Root Entry")) {
- filename = shape.getShapeName();
- if (filename != null) {
- filename += extension;
- }
- }
- // default to dummy name
- if (filename == null || filename.isEmpty()) {
- filename = "picture_" + embeddings.size() + extension;
- }
- filename = filename.trim();
- data.setFilename(filename);
-
- embeddings.add(data);
- }
- }
-
-
- public boolean canExtract(DirectoryNode source) {
- return false;
- }
-
- public boolean canExtract(Picture source) {
- return false;
- }
-
- protected EmbeddedData extract(DirectoryNode dn) throws IOException {
- assert(canExtract(dn));
- ByteArrayOutputStream bos = new ByteArrayOutputStream(20000);
- try (POIFSFileSystem dest = new POIFSFileSystem()) {
- copyNodes(dn, dest.getRoot());
- // start with a reasonable big size
- dest.writeFilesystem(bos);
- }
-
- return new EmbeddedData(dn.getName(), bos.toByteArray(), CONTENT_TYPE_BYTES);
- }
-
- protected EmbeddedData extract(Picture source) throws IOException {
- return null;
- }
-
- public static class Ole10Extractor extends EmbeddedExtractor {
- @Override
- public boolean canExtract(DirectoryNode dn) {
- ClassID clsId = dn.getStorageClsid();
- return ClassIDPredefined.lookup(clsId) == ClassIDPredefined.OLE_V1_PACKAGE;
- }
-
- @Override
- public EmbeddedData extract(DirectoryNode dn) throws IOException {
- try {
- // TODO: inspect the CompObj record for more details, i.e. the content type
- Ole10Native ole10 = Ole10Native.createFromEmbeddedOleObject(dn);
- return new EmbeddedData(ole10.getFileName(), ole10.getDataBuffer(), CONTENT_TYPE_BYTES);
- } catch (Ole10NativeException e) {
- throw new IOException(e);
- }
- }
- }
-
- static class PdfExtractor extends EmbeddedExtractor {
- static ClassID PdfClassID = new ClassID("{B801CA65-A1FC-11D0-85AD-444553540000}");
- @Override
- public boolean canExtract(DirectoryNode dn) {
- ClassID clsId = dn.getStorageClsid();
- return (PdfClassID.equals(clsId) || dn.hasEntry("CONTENTS"));
- }
-
- @Override
- public EmbeddedData extract(DirectoryNode dn) throws IOException {
- try(ByteArrayOutputStream bos = new ByteArrayOutputStream();
- InputStream is = dn.createDocumentInputStream("CONTENTS")) {
- IOUtils.copy(is, bos);
- return new EmbeddedData(dn.getName() + ".pdf", bos.toByteArray(), CONTENT_TYPE_PDF);
- }
- }
-
- @Override
- public boolean canExtract(Picture source) {
- PictureData pd = source.getPictureData();
- return (pd != null && pd.getPictureType() == Workbook.PICTURE_TYPE_EMF);
- }
-
- /**
- * Mac Office encodes embedded objects inside the picture, e.g. PDF is part of an EMF.
- * If an embedded stream is inside an EMF picture, this method extracts the payload.
- *
- * @return the embedded data in an EMF picture or null if none is found
- */
- @Override
- protected EmbeddedData extract(Picture source) throws IOException {
- // check for emf+ embedded pdf (poor mans style :( )
- // Mac Excel 2011 embeds pdf files with this method.
- PictureData pd = source.getPictureData();
- if (pd == null || pd.getPictureType() != Workbook.PICTURE_TYPE_EMF) {
- return null;
- }
-
- // TODO: investigate if this is just an EMF-hack or if other formats are also embedded in EMF
- byte pictureBytes[] = pd.getData();
- int idxStart = indexOf(pictureBytes, 0, "%PDF-".getBytes(LocaleUtil.CHARSET_1252));
- if (idxStart == -1) {
- return null;
- }
-
- int idxEnd = indexOf(pictureBytes, idxStart, "%%EOF".getBytes(LocaleUtil.CHARSET_1252));
- if (idxEnd == -1) {
- return null;
- }
-
- int pictureBytesLen = idxEnd-idxStart+6;
- byte[] pdfBytes = IOUtils.safelyAllocate(pictureBytesLen, MAX_RECORD_LENGTH);
- System.arraycopy(pictureBytes, idxStart, pdfBytes, 0, pictureBytesLen);
- String filename = source.getShapeName().trim();
- if (!endsWithIgnoreCase(filename, ".pdf")) {
- filename += ".pdf";
- }
- return new EmbeddedData(filename, pdfBytes, CONTENT_TYPE_PDF);
- }
-
-
- }
-
- static class OOXMLExtractor extends EmbeddedExtractor {
- @Override
- public boolean canExtract(DirectoryNode dn) {
- return dn.hasEntry("package");
- }
-
- @Override
- public EmbeddedData extract(DirectoryNode dn) throws IOException {
-
- ClassIDPredefined clsId = ClassIDPredefined.lookup(dn.getStorageClsid());
-
- String contentType = null;
- String ext = null;
-
- if (clsId != null) {
- contentType = clsId.getContentType();
- ext = clsId.getFileExtension();
- }
-
- if (contentType == null || ext == null) {
- contentType = "application/zip";
- ext = ".zip";
- }
-
- DocumentInputStream dis = dn.createDocumentInputStream("package");
- byte data[] = IOUtils.toByteArray(dis);
- dis.close();
-
- return new EmbeddedData(dn.getName()+ext, data, contentType);
- }
- }
-
- static class BiffExtractor extends EmbeddedExtractor {
- @Override
- public boolean canExtract(DirectoryNode dn) {
- return canExtractExcel(dn) || canExtractWord(dn);
- }
-
- protected boolean canExtractExcel(DirectoryNode dn) {
- ClassIDPredefined clsId = ClassIDPredefined.lookup(dn.getStorageClsid());
- return (ClassIDPredefined.EXCEL_V7 == clsId
- || ClassIDPredefined.EXCEL_V8 == clsId
- || dn.hasEntry("Workbook") /*...*/);
- }
-
- protected boolean canExtractWord(DirectoryNode dn) {
- ClassIDPredefined clsId = ClassIDPredefined.lookup(dn.getStorageClsid());
- return (ClassIDPredefined.WORD_V7 == clsId
- || ClassIDPredefined.WORD_V8 == clsId
- || dn.hasEntry("WordDocument"));
- }
-
- @Override
- public EmbeddedData extract(DirectoryNode dn) throws IOException {
- EmbeddedData ed = super.extract(dn);
- if (canExtractExcel(dn)) {
- ed.setFilename(dn.getName() + ".xls");
- ed.setContentType(CONTENT_TYPE_XLS);
- } else if (canExtractWord(dn)) {
- ed.setFilename(dn.getName() + ".doc");
- ed.setContentType(CONTENT_TYPE_DOC);
- }
-
- return ed;
- }
- }
-
- static class FsExtractor extends EmbeddedExtractor {
- @Override
- public boolean canExtract(DirectoryNode dn) {
- return true;
- }
- @Override
- public EmbeddedData extract(DirectoryNode dn) throws IOException {
- EmbeddedData ed = super.extract(dn);
- ed.setFilename(dn.getName() + ".ole");
- // TODO: read the content type from CombObj stream
- return ed;
- }
- }
-
- protected static void copyNodes(DirectoryNode src, DirectoryNode dest) throws IOException {
- for (Entry e : src) {
- if (e instanceof DirectoryNode) {
- DirectoryNode srcDir = (DirectoryNode)e;
- DirectoryNode destDir = (DirectoryNode)dest.createDirectory(srcDir.getName());
- destDir.setStorageClsid(srcDir.getStorageClsid());
- copyNodes(srcDir, destDir);
- } else {
- try (InputStream is = src.createDocumentInputStream(e)) {
- dest.createDocument(e.getName(), is);
- }
- }
- }
- }
-
-
-
- /**
- * Knuth-Morris-Pratt Algorithm for Pattern Matching
- * Finds the first occurrence of the pattern in the text.
- */
- private static int indexOf(byte[] data, int offset, byte[] pattern) {
- int[] failure = computeFailure(pattern);
-
- int j = 0;
- if (data.length == 0) {
- return -1;
- }
-
- for (int i = offset; i < data.length; i++) {
- while (j > 0 && pattern[j] != data[i]) {
- j = failure[j - 1];
- }
- if (pattern[j] == data[i]) { j++; }
- if (j == pattern.length) {
- return i - pattern.length + 1;
- }
- }
- return -1;
- }
-
- /**
- * Computes the failure function using a boot-strapping process,
- * where the pattern is matched against itself.
- */
- private static int[] computeFailure(byte[] pattern) {
- int[] failure = new int[pattern.length];
-
- int j = 0;
- for (int i = 1; i < pattern.length; i++) {
- while (j > 0 && pattern[j] != pattern[i]) {
- j = failure[j - 1];
- }
- if (pattern[j] == pattern[i]) {
- j++;
- }
- failure[i] = j;
- }
-
- return failure;
- }
-
-
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.ss.usermodel;
-
-import java.io.BufferedInputStream;
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.InputStream;
-
-import org.apache.poi.EmptyFileException;
-import org.apache.poi.EncryptedDocumentException;
-import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey;
-import org.apache.poi.hssf.usermodel.HSSFWorkbook;
-import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
-import org.apache.poi.openxml4j.opc.OPCPackage;
-import org.apache.poi.openxml4j.opc.PackageAccess;
-import org.apache.poi.poifs.crypt.Decryptor;
-import org.apache.poi.poifs.filesystem.DirectoryNode;
-import org.apache.poi.poifs.filesystem.DocumentFactoryHelper;
-import org.apache.poi.poifs.filesystem.FileMagic;
-import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
-import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
-import org.apache.poi.poifs.filesystem.POIFSFileSystem;
-import org.apache.poi.util.IOUtils;
-import org.apache.poi.xssf.usermodel.XSSFWorkbook;
-
-/**
- * Factory for creating the appropriate kind of Workbook
- * (be it {@link HSSFWorkbook} or {@link XSSFWorkbook}),
- * by auto-detecting from the supplied input.
- */
-public class WorkbookFactory {
- /**
- * Creates a HSSFWorkbook from the given POIFSFileSystem
- * <p>Note that in order to properly release resources the
- * Workbook should be closed after use.
- */
- public static Workbook create(POIFSFileSystem fs) throws IOException {
- return new HSSFWorkbook(fs);
- }
-
- /**
- * Creates a HSSFWorkbook from the given NPOIFSFileSystem
- * <p>Note that in order to properly release resources the
- * Workbook should be closed after use.
- */
- public static Workbook create(NPOIFSFileSystem fs) throws IOException {
- try {
- return create(fs, null);
- } catch (InvalidFormatException e) {
- // Special case of OOXML-in-POIFS which is broken
- throw new IOException(e);
- }
- }
-
- /**
- * Creates a Workbook from the given NPOIFSFileSystem, which may
- * be password protected
- *
- * @param fs The {@link NPOIFSFileSystem} to read the document from
- * @param password The password that should be used or null if no password is necessary.
- *
- * @return The created Workbook
- *
- * @throws IOException if an error occurs while reading the data
- * @throws InvalidFormatException if the contents of the file cannot be parsed into a {@link Workbook}
- */
- private static Workbook create(final NPOIFSFileSystem fs, String password) throws IOException, InvalidFormatException {
- DirectoryNode root = fs.getRoot();
-
- // Encrypted OOXML files go inside OLE2 containers, is this one?
- if (root.hasEntry(Decryptor.DEFAULT_POIFS_ENTRY)) {
- InputStream stream = DocumentFactoryHelper.getDecryptedStream(fs, password);
-
- OPCPackage pkg = OPCPackage.open(stream);
- return create(pkg);
- }
-
- // If we get here, it isn't an encrypted XLSX file
- // So, treat it as a regular HSSF XLS one
- boolean passwordSet = false;
- if (password != null) {
- Biff8EncryptionKey.setCurrentUserPassword(password);
- passwordSet = true;
- }
- try {
- return new HSSFWorkbook(root, true);
- } finally {
- if (passwordSet) {
- Biff8EncryptionKey.setCurrentUserPassword(null);
- }
- }
- }
-
- /**
- * Creates a XSSFWorkbook from the given OOXML Package
- *
- * <p>Note that in order to properly release resources the
- * Workbook should be closed after use.</p>
- *
- * @param pkg The {@link OPCPackage} opened for reading data.
- *
- * @return The created Workbook
- *
- * @throws IOException if an error occurs while reading the data
- */
- public static Workbook create(OPCPackage pkg) throws IOException {
- return new XSSFWorkbook(pkg);
- }
-
- /**
- * Creates the appropriate HSSFWorkbook / XSSFWorkbook from
- * the given InputStream.
- *
- * <p>Your input stream MUST either support mark/reset, or
- * be wrapped as a {@link BufferedInputStream}! Note that
- * using an {@link InputStream} has a higher memory footprint
- * than using a {@link File}.</p>
- *
- * <p>Note that in order to properly release resources the
- * Workbook should be closed after use. Note also that loading
- * from an InputStream requires more memory than loading
- * from a File, so prefer {@link #create(File)} where possible.
- *
- * @param inp The {@link InputStream} to read data from.
- *
- * @return The created Workbook
- *
- * @throws IOException if an error occurs while reading the data
- * @throws InvalidFormatException if the contents of the file cannot be parsed into a {@link Workbook}
- * @throws EncryptedDocumentException If the workbook given is password protected
- */
- public static Workbook create(InputStream inp) throws IOException, InvalidFormatException, EncryptedDocumentException {
- return create(inp, null);
- }
-
- /**
- * Creates the appropriate HSSFWorkbook / XSSFWorkbook from
- * the given InputStream, which may be password protected.<p>
- *
- * Note that using an {@link InputStream} has a higher memory footprint
- * than using a {@link File}.<p>
- *
- * Note that in order to properly release resources the
- * Workbook should be closed after use. Note also that loading
- * from an InputStream requires more memory than loading
- * from a File, so prefer {@link #create(File)} where possible.
- *
- * @param inp The {@link InputStream} to read data from.
- * @param password The password that should be used or null if no password is necessary.
- *
- * @return The created Workbook
- *
- * @throws IOException if an error occurs while reading the data
- * @throws InvalidFormatException if the contents of the file cannot be parsed into a {@link Workbook}
- * @throws EncryptedDocumentException If the wrong password is given for a protected file
- * @throws EmptyFileException If an empty stream is given
- */
- public static Workbook create(InputStream inp, String password) throws IOException, InvalidFormatException, EncryptedDocumentException {
- InputStream is = FileMagic.prepareToCheckMagic(inp);
-
- FileMagic fm = FileMagic.valueOf(is);
-
- switch (fm) {
- case OLE2:
- NPOIFSFileSystem fs = new NPOIFSFileSystem(is);
- return create(fs, password);
- case OOXML:
- return new XSSFWorkbook(OPCPackage.open(is));
- default:
- throw new InvalidFormatException("Your InputStream was neither an OLE2 stream, nor an OOXML stream");
- }
- }
-
- /**
- * Creates the appropriate HSSFWorkbook / XSSFWorkbook from
- * the given File, which must exist and be readable.
- * <p>Note that in order to properly release resources the
- * Workbook should be closed after use.
- *
- * @param file The file to read data from.
- *
- * @return The created Workbook
- *
- * @throws IOException if an error occurs while reading the data
- * @throws InvalidFormatException if the contents of the file cannot be parsed into a {@link Workbook}
- * @throws EncryptedDocumentException If the workbook given is password protected
- */
- public static Workbook create(File file) throws IOException, InvalidFormatException, EncryptedDocumentException {
- return create(file, null);
- }
-
- /**
- * Creates the appropriate HSSFWorkbook / XSSFWorkbook from
- * the given File, which must exist and be readable, and
- * may be password protected
- * <p>Note that in order to properly release resources the
- * Workbook should be closed after use.
- *
- * @param file The file to read data from.
- * @param password The password that should be used or null if no password is necessary.
- *
- * @return The created Workbook
- *
- * @throws IOException if an error occurs while reading the data
- * @throws InvalidFormatException if the contents of the file cannot be parsed into a {@link Workbook}
- * @throws EncryptedDocumentException If the wrong password is given for a protected file
- * @throws EmptyFileException If an empty stream is given
- */
- public static Workbook create(File file, String password) throws IOException, InvalidFormatException, EncryptedDocumentException {
- return create(file, password, false);
- }
-
- /**
- * Creates the appropriate HSSFWorkbook / XSSFWorkbook from
- * the given File, which must exist and be readable, and
- * may be password protected
- * <p>Note that in order to properly release resources the
- * Workbook should be closed after use.
- *
- * @param file The file to read data from.
- * @param password The password that should be used or null if no password is necessary.
- * @param readOnly If the Workbook should be opened in read-only mode to avoid writing back
- * changes when the document is closed.
- *
- * @return The created Workbook
- *
- * @throws IOException if an error occurs while reading the data
- * @throws InvalidFormatException if the contents of the file cannot be parsed into a {@link Workbook}
- * @throws EncryptedDocumentException If the wrong password is given for a protected file
- * @throws EmptyFileException If an empty stream is given
- */
- public static Workbook create(File file, String password, boolean readOnly) throws IOException, InvalidFormatException, EncryptedDocumentException {
- if (! file.exists()) {
- throw new FileNotFoundException(file.toString());
- }
-
- try (NPOIFSFileSystem fs = new NPOIFSFileSystem(file, readOnly)) {
- return create(fs, password);
- } catch(OfficeXmlFileException e) {
- // opening as .xls failed => try opening as .xlsx
- OPCPackage pkg = OPCPackage.open(file, readOnly ? PackageAccess.READ : PackageAccess.READ_WRITE); // NOSONAR
- try {
- return new XSSFWorkbook(pkg);
- } catch (Exception ioe) {
- // ensure that file handles are closed - use revert() to not re-write the file
- pkg.revert();
- // do not pkg.close();
-
- if (ioe instanceof IOException) {
- throw (IOException)ioe;
- } else if (ioe instanceof RuntimeException) {
- throw (RuntimeException)ioe;
- } else {
- throw new IOException(ioe);
- }
- }
- }
- }
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-
-package org.apache.poi.util;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.lang.reflect.Method;
-
-import javax.xml.XMLConstants;
-import javax.xml.parsers.DocumentBuilder;
-import javax.xml.parsers.DocumentBuilderFactory;
-import javax.xml.parsers.ParserConfigurationException;
-import javax.xml.stream.events.Namespace;
-
-import org.w3c.dom.Document;
-import org.w3c.dom.Element;
-import org.xml.sax.ErrorHandler;
-import org.xml.sax.InputSource;
-import org.xml.sax.SAXException;
-import org.xml.sax.SAXParseException;
-
-public final class DocumentHelper {
- private static POILogger logger = POILogFactory.getLogger(DocumentHelper.class);
-
- private DocumentHelper() {}
-
- private static class DocHelperErrorHandler implements ErrorHandler {
-
- public void warning(SAXParseException exception) throws SAXException {
- printError(POILogger.WARN, exception);
- }
-
- public void error(SAXParseException exception) throws SAXException {
- printError(POILogger.ERROR, exception);
- }
-
- public void fatalError(SAXParseException exception) throws SAXException {
- printError(POILogger.FATAL, exception);
- throw exception;
- }
-
- /** Prints the error message. */
- private void printError(int type, SAXParseException ex) {
- StringBuilder sb = new StringBuilder();
-
- String systemId = ex.getSystemId();
- if (systemId != null) {
- int index = systemId.lastIndexOf('/');
- if (index != -1)
- systemId = systemId.substring(index + 1);
- sb.append(systemId);
- }
- sb.append(':');
- sb.append(ex.getLineNumber());
- sb.append(':');
- sb.append(ex.getColumnNumber());
- sb.append(": ");
- sb.append(ex.getMessage());
-
- logger.log(type, sb.toString(), ex);
- }
- }
-
- /**
- * Creates a new document builder, with sensible defaults
- *
- * @throws IllegalStateException If creating the DocumentBuilder fails, e.g.
- * due to {@link ParserConfigurationException}.
- */
- public static synchronized DocumentBuilder newDocumentBuilder() {
- try {
- DocumentBuilder documentBuilder = documentBuilderFactory.newDocumentBuilder();
- documentBuilder.setEntityResolver(SAXHelper.IGNORING_ENTITY_RESOLVER);
- documentBuilder.setErrorHandler(new DocHelperErrorHandler());
- return documentBuilder;
- } catch (ParserConfigurationException e) {
- throw new IllegalStateException("cannot create a DocumentBuilder", e);
- }
- }
-
- private static final DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
- static {
- documentBuilderFactory.setNamespaceAware(true);
- documentBuilderFactory.setValidating(false);
- trySetSAXFeature(documentBuilderFactory, XMLConstants.FEATURE_SECURE_PROCESSING, true);
- trySetXercesSecurityManager(documentBuilderFactory);
- }
-
- private static void trySetSAXFeature(DocumentBuilderFactory dbf, String feature, boolean enabled) {
- try {
- dbf.setFeature(feature, enabled);
- } catch (Exception e) {
- logger.log(POILogger.WARN, "SAX Feature unsupported", feature, e);
- } catch (AbstractMethodError ame) {
- logger.log(POILogger.WARN, "Cannot set SAX feature because outdated XML parser in classpath", feature, ame);
- }
- }
-
- private static void trySetXercesSecurityManager(DocumentBuilderFactory dbf) {
- // Try built-in JVM one first, standalone if not
- for (String securityManagerClassName : new String[]{
- //"com.sun.org.apache.xerces.internal.util.SecurityManager",
- "org.apache.xerces.util.SecurityManager"
- }) {
- try {
- Object mgr = Class.forName(securityManagerClassName).newInstance();
- Method setLimit = mgr.getClass().getMethod("setEntityExpansionLimit", Integer.TYPE);
- setLimit.invoke(mgr, 4096);
- dbf.setAttribute("http://apache.org/xml/properties/security-manager", mgr);
- // Stop once one can be setup without error
- return;
- } catch (ClassNotFoundException e) {
- // continue without log, this is expected in some setups
- } catch (Throwable e) { // NOSONAR - also catch things like NoClassDefError here
- logger.log(POILogger.WARN, "SAX Security Manager could not be setup", e);
- }
- }
-
- // separate old version of Xerces not found => use the builtin way of setting the property
- dbf.setAttribute("http://www.oracle.com/xml/jaxp/properties/entityExpansionLimit", 4096);
- }
-
- /**
- * Parses the given stream via the default (sensible)
- * DocumentBuilder
- * @param inp Stream to read the XML data from
- * @return the parsed Document
- */
- public static Document readDocument(InputStream inp) throws IOException, SAXException {
- return newDocumentBuilder().parse(inp);
- }
-
- /**
- * Parses the given stream via the default (sensible)
- * DocumentBuilder
- * @param inp sax source to read the XML data from
- * @return the parsed Document
- */
- public static Document readDocument(InputSource inp) throws IOException, SAXException {
- return newDocumentBuilder().parse(inp);
- }
-
- // must only be used to create empty documents, do not use it for parsing!
- private static final DocumentBuilder documentBuilderSingleton = newDocumentBuilder();
-
- /**
- * Creates a new DOM Document
- */
- public static synchronized Document createDocument() {
- return documentBuilderSingleton.newDocument();
- }
-
- /**
- * Adds a namespace declaration attribute to the given element.
- */
- public static void addNamespaceDeclaration(Element element, String namespacePrefix, String namespaceURI) {
- element.setAttributeNS(XMLConstants.XMLNS_ATTRIBUTE_NS_URI,
- XMLConstants.XMLNS_ATTRIBUTE + ':' + namespacePrefix,
- namespaceURI);
- }
-
- /**
- * Adds a namespace declaration attribute to the given element.
- */
- public static void addNamespaceDeclaration(Element element, Namespace namespace) {
- addNamespaceDeclaration(element, namespace.getPrefix(), namespace.getNamespaceURI());
- }
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.util;
-
-import java.util.LinkedList;
-import java.util.ListIterator;
-
-/**
- * <p>
- * 24.08.2009<br>
- * </p>
- *
- * @author Stefan Stern<br>
- */
-
-public class IdentifierManager {
-
- public static final long MAX_ID = Long.MAX_VALUE - 1;
-
- public static final long MIN_ID = 0L;
-
- /**
- *
- */
- private final long upperbound;
-
- /**
- *
- */
- private final long lowerbound;
-
- /**
- * List of segments of available identifiers
- */
- private LinkedList<Segment> segments;
-
- /**
- * @param lowerbound the lower limit of the id-range to manage. Must be greater than or equal to {@link #MIN_ID}.
- * @param upperbound the upper limit of the id-range to manage. Must be less then or equal {@link #MAX_ID}.
- */
- public IdentifierManager(long lowerbound, long upperbound) {
- if (lowerbound > upperbound) {
- throw new IllegalArgumentException("lowerbound must not be greater than upperbound, had " + lowerbound + " and " + upperbound);
- }
- else if (lowerbound < MIN_ID) {
- String message = "lowerbound must be greater than or equal to " + Long.toString(MIN_ID);
- throw new IllegalArgumentException(message);
- }
- else if (upperbound > MAX_ID) {
- /*
- * while MAX_ID is Long.MAX_VALUE, this check is pointless. But if
- * someone subclasses / tweaks the limits, this check is fine.
- */
- throw new IllegalArgumentException("upperbound must be less than or equal to " + Long.toString(MAX_ID) + " but had " + upperbound);
- }
- this.lowerbound = lowerbound;
- this.upperbound = upperbound;
- this.segments = new LinkedList<>();
- segments.add(new Segment(lowerbound, upperbound));
- }
-
- public long reserve(long id) {
- if (id < lowerbound || id > upperbound) {
- throw new IllegalArgumentException("Value for parameter 'id' was out of bounds, had " + id + ", but should be within [" + lowerbound + ":" + upperbound + "]");
- }
- verifyIdentifiersLeft();
-
- if (id == upperbound) {
- Segment lastSegment = segments.getLast();
- if (lastSegment.end == upperbound) {
- lastSegment.end = upperbound - 1;
- if (lastSegment.start > lastSegment.end) {
- segments.removeLast();
- }
- return id;
- }
- return reserveNew();
- }
-
- if (id == lowerbound) {
- Segment firstSegment = segments.getFirst();
- if (firstSegment.start == lowerbound) {
- firstSegment.start = lowerbound + 1;
- if (firstSegment.end < firstSegment.start) {
- segments.removeFirst();
- }
- return id;
- }
- return reserveNew();
- }
-
- ListIterator<Segment> iter = segments.listIterator();
- while (iter.hasNext()) {
- Segment segment = iter.next();
- if (segment.end < id) {
- continue;
- }
- else if (segment.start > id) {
- break;
- }
- else if (segment.start == id) {
- segment.start = id + 1;
- if (segment.end < segment.start) {
- iter.remove();
- }
- return id;
- }
- else if (segment.end == id) {
- segment.end = id - 1;
- if (segment.start > segment.end) {
- iter.remove();
- }
- return id;
- }
- else {
- iter.add(new Segment(id + 1, segment.end));
- segment.end = id - 1;
- return id;
- }
- }
- return reserveNew();
- }
-
- /**
- * @return a new identifier.
- * @throws IllegalStateException if no more identifiers are available, then an Exception is raised.
- */
- public long reserveNew() {
- verifyIdentifiersLeft();
- Segment segment = segments.getFirst();
- long result = segment.start;
- segment.start += 1;
- if (segment.start > segment.end) {
- segments.removeFirst();
- }
- return result;
- }
-
- /**
- * @param id
- * the identifier to release. Must be greater than or equal to
- * {@link #lowerbound} and must be less than or equal to {@link #upperbound}
- * @return true, if the identifier was reserved and has been successfully
- * released, false, if the identifier was not reserved.
- */
- public boolean release(long id) {
- if (id < lowerbound || id > upperbound) {
- throw new IllegalArgumentException("Value for parameter 'id' was out of bounds, had " + id + ", but should be within [" + lowerbound + ":" + upperbound + "]");
- }
-
- if (id == upperbound) {
- Segment lastSegment = segments.getLast();
- if (lastSegment.end == upperbound - 1) {
- lastSegment.end = upperbound;
- return true;
- } else if (lastSegment.end == upperbound) {
- return false;
- } else {
- segments.add(new Segment(upperbound, upperbound));
- return true;
- }
- }
-
- if (id == lowerbound) {
- Segment firstSegment = segments.getFirst();
- if (firstSegment.start == lowerbound + 1) {
- firstSegment.start = lowerbound;
- return true;
- } else if (firstSegment.start == lowerbound) {
- return false;
- } else {
- segments.addFirst(new Segment(lowerbound, lowerbound));
- return true;
- }
- }
-
- long higher = id + 1;
- long lower = id - 1;
- ListIterator<Segment> iter = segments.listIterator();
-
- while (iter.hasNext()) {
- Segment segment = iter.next();
- if (segment.end < lower) {
- continue;
- }
- if (segment.start > higher) {
- iter.previous();
- iter.add(new Segment(id, id));
- return true;
- }
- if (segment.start == higher) {
- segment.start = id;
- return true;
- }
- else if (segment.end == lower) {
- segment.end = id;
- /* check if releasing this elements glues two segments into one */
- if (iter.hasNext()) {
- Segment next = iter.next();
- if (next.start == segment.end + 1) {
- segment.end = next.end;
- iter.remove();
- }
- }
- return true;
- }
- else {
- /* id was not reserved, return false */
- break;
- }
- }
- return false;
- }
-
- public long getRemainingIdentifiers() {
- long result = 0;
- for (Segment segment : segments) {
- result = result - segment.start;
- result = result + segment.end + 1;
- }
- return result;
- }
-
- /**
- *
- */
- private void verifyIdentifiersLeft() {
- if (segments.isEmpty()) {
- throw new IllegalStateException("No identifiers left");
- }
- }
-
- private static class Segment {
-
- public Segment(long start, long end) {
- this.start = start;
- this.end = end;
- }
-
- public long start;
- public long end;
-
- /*
- * (non-Javadoc)
- *
- * @see java.lang.Object#toString()
- */
- public String toString() {
- return "[" + start + "; " + end + "]";
- }
- }
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-
-package org.apache.poi.util;
-
-import java.io.File;
-import java.io.IOException;
-import java.lang.reflect.Field;
-import java.lang.reflect.Method;
-import java.net.URL;
-import java.security.AccessController;
-import java.security.CodeSource;
-import java.security.PrivilegedAction;
-import java.security.ProtectionDomain;
-import java.util.ArrayList;
-import java.util.Enumeration;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.Vector;
-import java.util.jar.JarEntry;
-import java.util.jar.JarFile;
-import java.util.regex.Pattern;
-
-import junit.framework.TestCase;
-
-import org.junit.Test;
-import org.junit.internal.TextListener;
-import org.junit.runner.Description;
-import org.junit.runner.JUnitCore;
-import org.junit.runner.Result;
-
-/**
- * Build a 'lite' version of the ooxml-schemas.jar
- *
- * @author Yegor Kozlov
- */
-public final class OOXMLLite {
- private static final Pattern SCHEMA_PATTERN = Pattern.compile("schemaorg_apache_xmlbeans/(system|element)/.*\\.xsb");
-
- /**
- * Destination directory to copy filtered classes
- */
- private File _destDest;
-
- /**
- * Directory with the compiled ooxml tests
- */
- private File _testDir;
-
- /**
- * Reference to the ooxml-schemas.jar
- */
- private File _ooxmlJar;
-
-
- OOXMLLite(String dest, String test, String ooxmlJar) {
- _destDest = new File(dest);
- _testDir = new File(test);
- _ooxmlJar = new File(ooxmlJar);
- }
-
- public static void main(String[] args) throws IOException {
- System.out.println("Free memory (bytes): " +
- Runtime.getRuntime().freeMemory());
- long maxMemory = Runtime.getRuntime().maxMemory();
- System.out.println("Maximum memory (bytes): " +
- (maxMemory == Long.MAX_VALUE ? "no limit" : maxMemory));
- System.out.println("Total memory (bytes): " +
- Runtime.getRuntime().totalMemory());
-
- String dest = null, test = null, ooxml = null;
-
- for (int i = 0; i < args.length; i++) {
- switch (args[i]) {
- case "-dest":
- dest = args[++i];
- break;
- case "-test":
- test = args[++i];
- break;
- case "-ooxml":
- ooxml = args[++i];
- break;
- }
- }
- OOXMLLite builder = new OOXMLLite(dest, test, ooxml);
- builder.build();
- }
-
- void build() throws IOException {
- List<Class<?>> lst = new ArrayList<>();
- //collect unit tests
- String exclude = StringUtil.join("|",
- "BaseTestXWorkbook",
- "BaseTestXSheet",
- "BaseTestXRow",
- "BaseTestXCell",
- "BaseTestXSSFPivotTable",
- "TestSXSSFWorkbook\\$\\d",
- "TestUnfixedBugs",
- "MemoryUsage",
- "TestDataProvider",
- "TestDataSamples",
- "All.+Tests",
- "ZipFileAssert",
- "AesZipFileZipEntrySource",
- "TempFileRecordingSXSSFWorkbookWithCustomZipEntrySource",
- "PkiTestUtils",
- "TestCellFormatPart\\$\\d",
- "TestSignatureInfo\\$\\d",
- "TestCertificateEncryption\\$CertData",
- "TestPOIXMLDocument\\$OPCParser",
- "TestPOIXMLDocument\\$TestFactory",
- "TestXSLFTextParagraph\\$DrawTextParagraphProxy",
- "TestXSSFExportToXML\\$\\d",
- "TestXSSFExportToXML\\$DummyEntityResolver",
- "TestFormulaEvaluatorOnXSSF\\$Result",
- "TestFormulaEvaluatorOnXSSF\\$SS",
- "TestMultiSheetFormulaEvaluatorOnXSSF\\$Result",
- "TestMultiSheetFormulaEvaluatorOnXSSF\\$SS",
- "TestXSSFBugs\\$\\d",
- "AddImageBench",
- "AddImageBench_jmhType_B\\d",
- "AddImageBench_benchCreatePicture_jmhTest",
- "TestEvilUnclosedBRFixingInputStream\\$EvilUnclosedBRFixingInputStream",
- "TempFileRecordingSXSSFWorkbookWithCustomZipEntrySource\\$TempFileRecordingSheetDataWriterWithDecorator",
- "TestXSSFBReader\\$1",
- "TestXSSFBReader\\$TestSheetHandler",
- "TestFormulaEvaluatorOnXSSF\\$1",
- "TestMultiSheetFormulaEvaluatorOnXSSF\\$1",
- "TestZipPackagePropertiesMarshaller\\$1",
- "SLCommonUtils",
- "TestPPTX2PNG\\$1",
- "TestMatrixFormulasFromXMLSpreadsheet\\$1",
- "TestMatrixFormulasFromXMLSpreadsheet\\$Navigator",
- "TestPOIXMLDocument\\$UncaughtHandler",
- "TestOleShape\\$Api",
- "TestOleShape\\$1",
- "TestPOIXMLDocument\\$1",
- "TestXMLSlideShow\\$1",
- "TestXMLSlideShow\\$BufAccessBAOS",
- "TestXDDFChart\\$1",
- "TestOOXMLLister\\$1",
- "TestOOXMLPrettyPrint\\$1"
- );
- System.out.println("Collecting unit tests from " + _testDir);
- collectTests(_testDir, _testDir, lst, ".+.class$", ".+(" + exclude + ").class");
- System.out.println("Found " + lst.size() + " classes");
-
- //run tests
- JUnitCore jUnitCore = new JUnitCore();
- jUnitCore.addListener(new TextListener(System.out) {
- private final Set<String> classes = new HashSet<>();
- private int count;
-
- @Override
- public void testStarted(Description description) {
- // count how many test-classes we already saw
- classes.add(description.getClassName());
- count++;
- if(count % 100 == 0) {
- System.out.println();
- System.out.println(classes.size() + "/" + lst.size() + ": " + description.getDisplayName());
- }
-
- super.testStarted(description);
- }
- });
- Result result = jUnitCore.run(lst.toArray(new Class<?>[0]));
- if (!result.wasSuccessful()) {
- throw new RuntimeException("Tests did not succeed, cannot build ooxml-lite jar");
- }
-
- //see what classes from the ooxml-schemas.jar are loaded
- System.out.println("Copying classes to " + _destDest);
- Map<String, Class<?>> classes = getLoadedClasses(_ooxmlJar.getName());
- for (Class<?> cls : classes.values()) {
- String className = cls.getName();
- String classRef = className.replace('.', '/') + ".class";
- File destFile = new File(_destDest, classRef);
- IOUtils.copy(cls.getResourceAsStream('/' + classRef), destFile);
-
- if(cls.isInterface()){
- /// Copy classes and interfaces declared as members of this class
- for(Class<?> fc : cls.getDeclaredClasses()){
- className = fc.getName();
- classRef = className.replace('.', '/') + ".class";
- destFile = new File(_destDest, classRef);
- IOUtils.copy(fc.getResourceAsStream('/' + classRef), destFile);
- }
- }
- }
-
- //finally copy the compiled .xsb files
- System.out.println("Copying .xsb resources");
- try (JarFile jar = new JarFile(_ooxmlJar)) {
- for (Enumeration<JarEntry> e = jar.entries(); e.hasMoreElements(); ) {
- JarEntry je = e.nextElement();
- if (SCHEMA_PATTERN.matcher(je.getName()).matches()) {
- File destFile = new File(_destDest, je.getName());
- IOUtils.copy(jar.getInputStream(je), destFile);
- }
- }
- }
- }
-
- private static boolean checkForTestAnnotation(Class<?> testclass) {
- for (Method m : testclass.getDeclaredMethods()) {
- if(m.isAnnotationPresent(Test.class)) {
- return true;
- }
- }
-
- // also check super classes
- if(testclass.getSuperclass() != null) {
- for (Method m : testclass.getSuperclass().getDeclaredMethods()) {
- if(m.isAnnotationPresent(Test.class)) {
- return true;
- }
- }
- }
-
- System.out.println("Class " + testclass.getName() + " does not derive from TestCase and does not have a @Test annotation");
-
- // Should we also look at superclasses to find cases
- // where we have abstract base classes with derived tests?
- // if(checkForTestAnnotation(testclass.getSuperclass())) return true;
-
- return false;
- }
-
- /**
- * Recursively collect classes from the supplied directory
- *
- * @param arg the directory to search in
- * @param out output
- * @param ptrn the pattern (regexp) to filter found files
- */
- private static void collectTests(File root, File arg, List<Class<?>> out, String ptrn, String exclude) {
- if (arg.isDirectory()) {
- File files[] = arg.listFiles();
- if (files != null) {
- for (File f : files) {
- collectTests(root, f, out, ptrn, exclude);
- }
- }
- } else {
- String path = arg.getAbsolutePath();
- String prefix = root.getAbsolutePath();
- String cls = path.substring(prefix.length() + 1).replace(File.separator, ".");
- if(!cls.matches(ptrn)) return;
- if (cls.matches(exclude)) return;
- //ignore inner classes defined in tests
- if (cls.indexOf('$') != -1) {
- System.out.println("Inner class " + cls + " not included");
- return;
- }
-
- cls = cls.replace(".class", "");
-
- try {
- Class<?> testclass = Class.forName(cls);
- if (TestCase.class.isAssignableFrom(testclass)
- || checkForTestAnnotation(testclass)) {
- out.add(testclass);
- }
- } catch (Throwable e) { // NOSONAR
- System.out.println("Class " + cls + " is not in classpath");
- }
- }
- }
-
- /**
- *
- * @param ptrn the pattern to filter output
- * @return the classes loaded by the system class loader keyed by class name
- */
- @SuppressWarnings("unchecked")
- private static Map<String, Class<?>> getLoadedClasses(String ptrn) {
- // make the field accessible, we defer this from static initialization to here to
- // allow JDKs which do not have this field (e.g. IBM JDK) to at least load the class
- // without failing, see https://issues.apache.org/bugzilla/show_bug.cgi?id=56550
- final Field _classes = AccessController.doPrivileged(new PrivilegedAction<Field>() {
- @SuppressForbidden("TODO: Reflection works until Java 8 on Oracle/Sun JDKs, but breaks afterwards (different classloader types, access checks)")
- public Field run() {
- try {
- Field fld = ClassLoader.class.getDeclaredField("classes");
- fld.setAccessible(true);
- return fld;
- } catch (Exception e) {
- throw new RuntimeException(e);
- }
-
- }
- });
-
- ClassLoader appLoader = ClassLoader.getSystemClassLoader();
- try {
- Vector<Class<?>> classes = (Vector<Class<?>>) _classes.get(appLoader);
- Map<String, Class<?>> map = new HashMap<>();
- for (Class<?> cls : classes) {
- // e.g. proxy-classes, ...
- ProtectionDomain pd = cls.getProtectionDomain();
- if (pd == null) continue;
- CodeSource cs = pd.getCodeSource();
- if (cs == null) continue;
- URL loc = cs.getLocation();
- if (loc == null) continue;
-
- String jar = loc.toString();
- if (jar.contains(ptrn)) {
- map.put(cls.getName(), cls);
- }
- }
- return map;
- } catch (IllegalAccessException e) {
- throw new RuntimeException(e);
- }
- }
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-
-package org.apache.poi.util;
-
-import org.apache.poi.openxml4j.opc.*;
-import org.apache.poi.openxml4j.opc.OPCPackage;
-import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
-import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
-import org.apache.poi.POIXMLException;
-
-import java.io.*;
-import java.net.URI;
-
-/**
- * Provides handy methods to work with OOXML packages
- */
-public final class PackageHelper {
-
- public static OPCPackage open(InputStream is) throws IOException {
- try {
- return OPCPackage.open(is);
- } catch (InvalidFormatException e){
- throw new POIXMLException(e);
- }
- }
-
- /**
- * Clone the specified package.
- *
- * @param pkg the package to clone
- * @param file the destination file
- * @return the cloned package
- */
- public static OPCPackage clone(OPCPackage pkg, File file) throws OpenXML4JException, IOException {
-
- String path = file.getAbsolutePath();
-
- OPCPackage dest = OPCPackage.create(path);
- PackageRelationshipCollection rels = pkg.getRelationships();
- for (PackageRelationship rel : rels) {
- PackagePart part = pkg.getPart(rel);
- PackagePart part_tgt;
- if (rel.getRelationshipType().equals(PackageRelationshipTypes.CORE_PROPERTIES)) {
- copyProperties(pkg.getPackageProperties(), dest.getPackageProperties());
- continue;
- }
- dest.addRelationship(part.getPartName(), rel.getTargetMode(), rel.getRelationshipType());
- part_tgt = dest.createPart(part.getPartName(), part.getContentType());
-
- OutputStream out = part_tgt.getOutputStream();
- IOUtils.copy(part.getInputStream(), out);
- out.close();
-
- if(part.hasRelationships()) {
- copy(pkg, part, dest, part_tgt);
- }
- }
- dest.close();
-
- //the temp file will be deleted when JVM terminates
- new File(path).deleteOnExit();
- return OPCPackage.open(path);
- }
-
- /**
- * Recursively copy package parts to the destination package
- */
- private static void copy(OPCPackage pkg, PackagePart part, OPCPackage tgt, PackagePart part_tgt) throws OpenXML4JException, IOException {
- PackageRelationshipCollection rels = part.getRelationships();
- if(rels != null) for (PackageRelationship rel : rels) {
- PackagePart p;
- if(rel.getTargetMode() == TargetMode.EXTERNAL){
- part_tgt.addExternalRelationship(rel.getTargetURI().toString(), rel.getRelationshipType(), rel.getId());
- //external relations don't have associated package parts
- continue;
- }
- URI uri = rel.getTargetURI();
-
- if(uri.getRawFragment() != null) {
- part_tgt.addRelationship(uri, rel.getTargetMode(), rel.getRelationshipType(), rel.getId());
- continue;
- }
- PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI());
- p = pkg.getPart(relName);
- part_tgt.addRelationship(p.getPartName(), rel.getTargetMode(), rel.getRelationshipType(), rel.getId());
-
-
-
-
- PackagePart dest;
- if(!tgt.containPart(p.getPartName())){
- dest = tgt.createPart(p.getPartName(), p.getContentType());
- OutputStream out = dest.getOutputStream();
- IOUtils.copy(p.getInputStream(), out);
- out.close();
- copy(pkg, p, tgt, dest);
- }
- }
- }
-
- /**
- * Copy core package properties
- *
- * @param src source properties
- * @param tgt target properties
- */
- private static void copyProperties(PackageProperties src, PackageProperties tgt){
- tgt.setCategoryProperty(src.getCategoryProperty().getValue());
- tgt.setContentStatusProperty(src.getContentStatusProperty().getValue());
- tgt.setContentTypeProperty(src.getContentTypeProperty().getValue());
- tgt.setCreatorProperty(src.getCreatorProperty().getValue());
- tgt.setDescriptionProperty(src.getDescriptionProperty().getValue());
- tgt.setIdentifierProperty(src.getIdentifierProperty().getValue());
- tgt.setKeywordsProperty(src.getKeywordsProperty().getValue());
- tgt.setLanguageProperty(src.getLanguageProperty().getValue());
- tgt.setRevisionProperty(src.getRevisionProperty().getValue());
- tgt.setSubjectProperty(src.getSubjectProperty().getValue());
- tgt.setTitleProperty(src.getTitleProperty().getValue());
- tgt.setVersionProperty(src.getVersionProperty().getValue());
- }
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-
-package org.apache.poi.util;
-
-import java.io.IOException;
-import java.io.StringReader;
-import java.lang.reflect.Method;
-import java.util.concurrent.TimeUnit;
-
-import javax.xml.XMLConstants;
-import javax.xml.parsers.ParserConfigurationException;
-import javax.xml.parsers.SAXParserFactory;
-
-import org.xml.sax.EntityResolver;
-import org.xml.sax.InputSource;
-import org.xml.sax.SAXException;
-import org.xml.sax.XMLReader;
-
-
-/**
- * Provides handy methods for working with SAX parsers and readers
- */
-public final class SAXHelper {
- private static final POILogger logger = POILogFactory.getLogger(SAXHelper.class);
- private static long lastLog;
-
- private SAXHelper() {}
-
- /**
- * Creates a new SAX XMLReader, with sensible defaults
- */
- public static synchronized XMLReader newXMLReader() throws SAXException, ParserConfigurationException {
- XMLReader xmlReader = saxFactory.newSAXParser().getXMLReader();
- xmlReader.setEntityResolver(IGNORING_ENTITY_RESOLVER);
- trySetSAXFeature(xmlReader, XMLConstants.FEATURE_SECURE_PROCESSING);
- trySetXercesSecurityManager(xmlReader);
- return xmlReader;
- }
-
- static final EntityResolver IGNORING_ENTITY_RESOLVER = new EntityResolver() {
- @Override
- public InputSource resolveEntity(String publicId, String systemId)
- throws SAXException, IOException {
- return new InputSource(new StringReader(""));
- }
- };
-
- private static final SAXParserFactory saxFactory;
- static {
- try {
- saxFactory = SAXParserFactory.newInstance();
- saxFactory.setValidating(false);
- saxFactory.setNamespaceAware(true);
- } catch (RuntimeException | Error re) {
- // this also catches NoClassDefFoundError, which may be due to a local class path issue
- // This may occur if the code is run inside a web container
- // or a restricted JVM
- // See bug 61170: https://bz.apache.org/bugzilla/show_bug.cgi?id=61170
- logger.log(POILogger.WARN, "Failed to create SAXParserFactory", re);
- throw re;
- } catch (Exception e) {
- logger.log(POILogger.WARN, "Failed to create SAXParserFactory", e);
- throw new RuntimeException("Failed to create SAXParserFactory", e);
- }
- }
-
- private static void trySetSAXFeature(XMLReader xmlReader, String feature) {
- try {
- xmlReader.setFeature(feature, true);
- } catch (Exception e) {
- logger.log(POILogger.WARN, "SAX Feature unsupported", feature, e);
- } catch (AbstractMethodError ame) {
- logger.log(POILogger.WARN, "Cannot set SAX feature because outdated XML parser in classpath", feature, ame);
- }
- }
-
- private static void trySetXercesSecurityManager(XMLReader xmlReader) {
- // Try built-in JVM one first, standalone if not
- for (String securityManagerClassName : new String[] {
- //"com.sun.org.apache.xerces.internal.util.SecurityManager",
- "org.apache.xerces.util.SecurityManager"
- }) {
- try {
- Object mgr = Class.forName(securityManagerClassName).newInstance();
- Method setLimit = mgr.getClass().getMethod("setEntityExpansionLimit", Integer.TYPE);
- setLimit.invoke(mgr, 4096);
- xmlReader.setProperty("http://apache.org/xml/properties/security-manager", mgr);
- // Stop once one can be setup without error
- return;
- } catch (ClassNotFoundException e) {
- // continue without log, this is expected in some setups
- } catch (Throwable e) { // NOSONAR - also catch things like NoClassDefError here
- // throttle the log somewhat as it can spam the log otherwise
- if(System.currentTimeMillis() > lastLog + TimeUnit.MINUTES.toMillis(5)) {
- logger.log(POILogger.WARN, "SAX Security Manager could not be setup [log suppressed for 5 minutes]", e);
- lastLog = System.currentTimeMillis();
- }
- }
- }
-
- // separate old version of Xerces not found => use the builtin way of setting the property
- try {
- xmlReader.setProperty("http://www.oracle.com/xml/jaxp/properties/entityExpansionLimit", 4096);
- } catch (SAXException e) { // NOSONAR - also catch things like NoClassDefError here
- // throttle the log somewhat as it can spam the log otherwise
- if(System.currentTimeMillis() > lastLog + TimeUnit.MINUTES.toMillis(5)) {
- logger.log(POILogger.WARN, "SAX Security Manager could not be setup [log suppressed for 5 minutes]", e);
- lastLog = System.currentTimeMillis();
- }
- }
- }
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-
-package org.apache.poi;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertSame;
-import static org.junit.Assert.fail;
-
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.lang.Thread.UncaughtExceptionHandler;
-import java.lang.reflect.InvocationTargetException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-
-import org.apache.poi.POIXMLDocumentPart.RelationPart;
-import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
-import org.apache.poi.openxml4j.exceptions.OpenXML4JRuntimeException;
-import org.apache.poi.openxml4j.opc.OPCPackage;
-import org.apache.poi.openxml4j.opc.PackagePart;
-import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
-import org.apache.poi.util.IOUtils;
-import org.apache.poi.util.NullOutputStream;
-import org.apache.poi.util.PackageHelper;
-import org.apache.poi.util.TempFile;
-import org.apache.poi.xslf.usermodel.XMLSlideShow;
-import org.apache.poi.xssf.usermodel.XSSFRelation;
-import org.apache.poi.xwpf.usermodel.XWPFRelation;
-import org.junit.Test;
-
-/**
- * Test recursive read and write of OPC packages
- */
-public final class TestPOIXMLDocument {
-
- private static class OPCParser extends POIXMLDocument {
-
- public OPCParser(OPCPackage pkg) {
- super(pkg);
- }
-
- public OPCParser(OPCPackage pkg, String coreDocumentRel) {
- super(pkg, coreDocumentRel);
- }
-
- @Override
- public List<PackagePart> getAllEmbedds() {
- throw new RuntimeException("not supported");
- }
-
- public void parse(POIXMLFactory factory) throws IOException{
- load(factory);
- }
- }
-
- private static final class TestFactory extends POIXMLFactory {
-
- public TestFactory() {
- //
- }
-
- @Override
- protected POIXMLRelation getDescriptor(String relationshipType) {
- return null;
- }
-
- /**
- * @since POI 3.14-Beta1
- */
- @Override
- protected POIXMLDocumentPart createDocumentPart
- (Class<? extends POIXMLDocumentPart> cls, Class<?>[] classes, Object[] values)
- throws SecurityException, NoSuchMethodException, InstantiationException, IllegalAccessException, InvocationTargetException {
- return null;
- }
- }
-
- private static void traverse(POIXMLDocument doc) throws IOException{
- HashMap<String,POIXMLDocumentPart> context = new HashMap<>();
- for (RelationPart p : doc.getRelationParts()){
- traverse(p, context);
- }
- }
-
- /**
- * Recursively traverse a OOXML document and assert that same logical parts have the same physical instances
- */
- private static void traverse(RelationPart rp, HashMap<String,POIXMLDocumentPart> context) throws IOException{
- POIXMLDocumentPart dp = rp.getDocumentPart();
- assertEquals(rp.getRelationship().getTargetURI().toString(), dp.getPackagePart().getPartName().getName());
-
- context.put(dp.getPackagePart().getPartName().getName(), dp);
- for(RelationPart p : dp.getRelationParts()){
- assertNotNull(p.getRelationship().toString());
-
- String uri = p.getDocumentPart().getPackagePart().getPartName().getURI().toString();
- assertEquals(uri, p.getRelationship().getTargetURI().toString());
- if (!context.containsKey(uri)) {
- traverse(p, context);
- } else {
- POIXMLDocumentPart prev = context.get(uri);
- assertSame("Duplicate POIXMLDocumentPart instance for targetURI=" + uri, prev, p.getDocumentPart());
- }
- }
- }
-
- public void assertReadWrite(OPCPackage pkg1) throws Exception {
-
- OPCParser doc = new OPCParser(pkg1);
- doc.parse(new TestFactory());
-
- traverse(doc);
-
- File tmp = TempFile.createTempFile("poi-ooxml", ".tmp");
- FileOutputStream out = new FileOutputStream(tmp);
- doc.write(out);
- out.close();
-
- // Should not be able to write to an output stream that has been closed
- try {
- doc.write(out);
- fail("Should not be able to write to an output stream that has been closed.");
- } catch (final OpenXML4JRuntimeException e) {
- // FIXME: A better exception class (IOException?) and message should be raised
- // indicating that the document could not be written because the output stream is closed.
- // see {@link org.apache.poi.openxml4j.opc.ZipPackage#saveImpl(java.io.OutputStream)}
- if (e.getMessage().matches("Fail to save: an error occurs while saving the package : The part .+ failed to be saved in the stream with marshaller .+")) {
- // expected
- } else {
- throw e;
- }
- }
-
- // Should not be able to write a document that has been closed
- doc.close();
- try {
- doc.write(new NullOutputStream());
- fail("Should not be able to write a document that has been closed.");
- } catch (final IOException e) {
- if (e.getMessage().equals("Cannot write data, document seems to have been closed already")) {
- // expected
- } else {
- throw e;
- }
- }
-
- // Should be able to close a document multiple times, though subsequent closes will have no effect.
- doc.close();
-
-
- @SuppressWarnings("resource")
- OPCPackage pkg2 = OPCPackage.open(tmp.getAbsolutePath());
- doc = new OPCParser(pkg1);
- try {
- doc.parse(new TestFactory());
- traverse(doc);
-
- assertEquals(pkg1.getRelationships().size(), pkg2.getRelationships().size());
-
- ArrayList<PackagePart> l1 = pkg1.getParts();
- ArrayList<PackagePart> l2 = pkg2.getParts();
-
- assertEquals(l1.size(), l2.size());
- for (int i=0; i < l1.size(); i++){
- PackagePart p1 = l1.get(i);
- PackagePart p2 = l2.get(i);
-
- assertEquals(p1.getContentType(), p2.getContentType());
- assertEquals(p1.hasRelationships(), p2.hasRelationships());
- if(p1.hasRelationships()){
- assertEquals(p1.getRelationships().size(), p2.getRelationships().size());
- }
- assertEquals(p1.getPartName(), p2.getPartName());
- }
- } finally {
- doc.close();
- pkg1.close();
- pkg2.close();
- }
- }
-
- @Test
- public void testPPTX() throws Exception {
- POIDataSamples pds = POIDataSamples.getSlideShowInstance();
- assertReadWrite(PackageHelper.open(pds.openResourceAsStream("PPTWithAttachments.pptm")));
- }
-
- @Test
- public void testXLSX() throws Exception {
- POIDataSamples pds = POIDataSamples.getSpreadSheetInstance();
- assertReadWrite(PackageHelper.open(pds.openResourceAsStream("ExcelWithAttachments.xlsm")));
- }
-
- @Test
- public void testDOCX() throws Exception {
- POIDataSamples pds = POIDataSamples.getDocumentInstance();
- assertReadWrite(PackageHelper.open(pds.openResourceAsStream("WordWithAttachments.docx")));
- }
-
- @Test
- public void testRelationOrder() throws Exception {
- POIDataSamples pds = POIDataSamples.getDocumentInstance();
- @SuppressWarnings("resource")
- OPCPackage pkg = PackageHelper.open(pds.openResourceAsStream("WordWithAttachments.docx"));
- OPCParser doc = new OPCParser(pkg);
- try {
- doc.parse(new TestFactory());
-
- for(POIXMLDocumentPart rel : doc.getRelations()){
- //TODO finish me
- assertNotNull(rel);
- }
- } finally {
- doc.close();
- }
- }
-
- @Test
- public void testGetNextPartNumber() throws Exception {
- POIDataSamples pds = POIDataSamples.getDocumentInstance();
- @SuppressWarnings("resource")
- OPCPackage pkg = PackageHelper.open(pds.openResourceAsStream("WordWithAttachments.docx"));
- OPCParser doc = new OPCParser(pkg);
- try {
- doc.parse(new TestFactory());
-
- // Non-indexed parts: Word is taken, Excel is not
- assertEquals(-1, doc.getNextPartNumber(XWPFRelation.DOCUMENT, 0));
- assertEquals(-1, doc.getNextPartNumber(XWPFRelation.DOCUMENT, -1));
- assertEquals(-1, doc.getNextPartNumber(XWPFRelation.DOCUMENT, 99));
- assertEquals(0, doc.getNextPartNumber(XSSFRelation.WORKBOOK, 0));
- assertEquals(0, doc.getNextPartNumber(XSSFRelation.WORKBOOK, -1));
- assertEquals(0, doc.getNextPartNumber(XSSFRelation.WORKBOOK, 99));
-
- // Indexed parts:
- // Has 2 headers
- assertEquals(0, doc.getNextPartNumber(XWPFRelation.HEADER, 0));
- assertEquals(3, doc.getNextPartNumber(XWPFRelation.HEADER, -1));
- assertEquals(3, doc.getNextPartNumber(XWPFRelation.HEADER, 1));
- assertEquals(8, doc.getNextPartNumber(XWPFRelation.HEADER, 8));
-
- // Has no Excel Sheets
- assertEquals(0, doc.getNextPartNumber(XSSFRelation.WORKSHEET, 0));
- assertEquals(1, doc.getNextPartNumber(XSSFRelation.WORKSHEET, -1));
- assertEquals(1, doc.getNextPartNumber(XSSFRelation.WORKSHEET, 1));
- } finally {
- doc.close();
- }
- }
-
- @Test
- public void testCommitNullPart() throws IOException, InvalidFormatException {
- POIXMLDocumentPart part = new POIXMLDocumentPart();
- part.prepareForCommit();
- part.commit();
- part.onSave(new HashSet<>());
-
- assertNull(part.getRelationById(null));
- assertNull(part.getRelationId(null));
- assertFalse(part.removeRelation(null, true));
- part.removeRelation((POIXMLDocumentPart)null);
- assertEquals("",part.toString());
- part.onDocumentCreate();
- //part.getTargetPart(null);
- }
-
- @Test
- public void testVSDX() throws Exception {
- POIDataSamples pds = POIDataSamples.getDiagramInstance();
- @SuppressWarnings("resource")
- OPCPackage open = PackageHelper.open(pds.openResourceAsStream("test.vsdx"));
- POIXMLDocument part = new OPCParser(open, PackageRelationshipTypes.VISIO_CORE_DOCUMENT);
-
- assertNotNull(part);
- assertEquals(0, part.getRelationCounter());
- part.close();
- }
-
- @Test
- public void testVSDXPart() throws IOException {
- POIDataSamples pds = POIDataSamples.getDiagramInstance();
- OPCPackage open = PackageHelper.open(pds.openResourceAsStream("test.vsdx"));
-
- POIXMLDocumentPart part = new POIXMLDocumentPart(open, PackageRelationshipTypes.VISIO_CORE_DOCUMENT);
-
- assertNotNull(part);
- assertEquals(0, part.getRelationCounter());
-
- open.close();
- }
-
- @Test(expected=POIXMLException.class)
- public void testInvalidCoreRel() throws IOException {
- POIDataSamples pds = POIDataSamples.getDiagramInstance();
- OPCPackage open = PackageHelper.open(pds.openResourceAsStream("test.vsdx"));
-
- try {
- new POIXMLDocumentPart(open, "somethingillegal");
- } finally {
- open.close();
- }
- }
-
- @Test
- public void testOSGIClassLoading() {
- // the schema type loader is cached per thread in POIXMLTypeLoader.
- // So create a new Thread and change the context class loader (which would normally be used)
- // to not contain the OOXML classes
- Runnable run = new Runnable() {
- public void run() {
- InputStream is = POIDataSamples.getSlideShowInstance().openResourceAsStream("table_test.pptx");
- XMLSlideShow ppt = null;
- try {
- ppt = new XMLSlideShow(is);
- ppt.getSlides().get(0).getShapes();
- } catch (IOException e) {
- fail("failed to load XMLSlideShow");
- } finally {
- IOUtils.closeQuietly(ppt);
- IOUtils.closeQuietly(is);
- }
- }
- };
-
- Thread thread = Thread.currentThread();
- ClassLoader cl = thread.getContextClassLoader();
- UncaughtHandler uh = new UncaughtHandler();
-
- // check schema type loading and check if we could run in an OOM
- Thread ta[] = new Thread[30];
- for (int j=0; j<10; j++) {
- for (int i=0; i<ta.length; i++) {
- ta[i] = new Thread(run);
- ta[i].setContextClassLoader(cl.getParent());
- ta[i].setUncaughtExceptionHandler(uh);
- ta[i].start();
- }
- for (int i=0; i<ta.length; i++) {
- try {
- ta[i].join();
- } catch (InterruptedException e) {
- fail("failed to join thread");
- }
- }
- }
- assertFalse(uh.hasException());
- }
-
- private static class UncaughtHandler implements UncaughtExceptionHandler {
- Throwable e;
-
- public synchronized void uncaughtException(Thread t, Throwable e) {
- this.e = e;
-
- }
-
- public synchronized boolean hasException() {
- return e != null;
- }
- }
-
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-
-package org.apache.poi;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import java.io.ByteArrayInputStream;
-import java.io.IOException;
-import java.util.Calendar;
-import java.util.Date;
-
-import org.apache.poi.POIXMLProperties.CoreProperties;
-import org.apache.poi.openxml4j.util.Nullable;
-import org.apache.poi.util.IOUtils;
-import org.apache.poi.util.LocaleUtil;
-import org.apache.poi.xssf.XSSFTestDataSamples;
-import org.apache.poi.xssf.usermodel.XSSFWorkbook;
-import org.apache.poi.xwpf.XWPFTestDataSamples;
-import org.apache.poi.xwpf.usermodel.XWPFDocument;
-import org.junit.After;
-import org.junit.Before;
-import org.junit.Ignore;
-import org.junit.Test;
-
-/**
- * Test setting extended and custom OOXML properties
- */
-public final class TestPOIXMLProperties {
- private XWPFDocument sampleDoc;
- private XWPFDocument sampleNoThumb;
- private POIXMLProperties _props;
- private CoreProperties _coreProperties;
-
- @Before
- public void setUp() throws IOException {
- sampleDoc = XWPFTestDataSamples.openSampleDocument("documentProperties.docx");
- sampleNoThumb = XWPFTestDataSamples.openSampleDocument("SampleDoc.docx");
- assertNotNull(sampleDoc);
- assertNotNull(sampleNoThumb);
- _props = sampleDoc.getProperties();
- _coreProperties = _props.getCoreProperties();
- assertNotNull(_props);
- }
-
- @After
- public void closeResources() throws Exception {
- sampleDoc.close();
- sampleNoThumb.close();
- }
-
- @Test
- public void testWorkbookExtendedProperties() throws Exception {
- XSSFWorkbook workbook = new XSSFWorkbook();
- POIXMLProperties props = workbook.getProperties();
- assertNotNull(props);
-
- org.apache.poi.POIXMLProperties.ExtendedProperties properties =
- props.getExtendedProperties();
-
- org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties
- ctProps = properties.getUnderlyingProperties();
-
-
- String appVersion = "3.5 beta";
- String application = "POI";
-
- ctProps.setApplication(application);
- ctProps.setAppVersion(appVersion);
-
- XSSFWorkbook newWorkbook =
- XSSFTestDataSamples.writeOutAndReadBack(workbook);
- workbook.close();
- assertTrue(workbook != newWorkbook);
-
-
- POIXMLProperties newProps = newWorkbook.getProperties();
- assertNotNull(newProps);
- org.apache.poi.POIXMLProperties.ExtendedProperties newProperties =
- newProps.getExtendedProperties();
-
- assertEquals(application, newProperties.getApplication());
- assertEquals(appVersion, newProperties.getAppVersion());
-
- org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties
- newCtProps = newProperties.getUnderlyingProperties();
-
- assertEquals(application, newCtProps.getApplication());
- assertEquals(appVersion, newCtProps.getAppVersion());
-
- newWorkbook.close();
- }
-
-
- /**
- * Test usermodel API for setting custom properties
- */
- @Test
- public void testCustomProperties() throws Exception {
- POIXMLDocument wb1 = new XSSFWorkbook();
-
- POIXMLProperties.CustomProperties customProps = wb1.getProperties().getCustomProperties();
- customProps.addProperty("test-1", "string val");
- customProps.addProperty("test-2", 1974);
- customProps.addProperty("test-3", 36.6);
- //adding a duplicate
- try {
- customProps.addProperty("test-3", 36.6);
- fail("expected exception");
- } catch(IllegalArgumentException e){
- assertEquals("A property with this name already exists in the custom properties", e.getMessage());
- }
- customProps.addProperty("test-4", true);
-
- POIXMLDocument wb2 = XSSFTestDataSamples.writeOutAndReadBack((XSSFWorkbook)wb1);
- wb1.close();
- org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperties ctProps =
- wb2.getProperties().getCustomProperties().getUnderlyingProperties();
- assertEquals(4, ctProps.sizeOfPropertyArray());
- org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperty p;
-
- p = ctProps.getPropertyArray(0);
- assertEquals("{D5CDD505-2E9C-101B-9397-08002B2CF9AE}", p.getFmtid());
- assertEquals("test-1", p.getName());
- assertEquals("string val", p.getLpwstr());
- assertEquals(2, p.getPid());
-
- p = ctProps.getPropertyArray(1);
- assertEquals("{D5CDD505-2E9C-101B-9397-08002B2CF9AE}", p.getFmtid());
- assertEquals("test-2", p.getName());
- assertEquals(1974, p.getI4());
- assertEquals(3, p.getPid());
-
- p = ctProps.getPropertyArray(2);
- assertEquals("{D5CDD505-2E9C-101B-9397-08002B2CF9AE}", p.getFmtid());
- assertEquals("test-3", p.getName());
- assertEquals(36.6, p.getR8(), 0);
- assertEquals(4, p.getPid());
-
- p = ctProps.getPropertyArray(3);
- assertEquals("{D5CDD505-2E9C-101B-9397-08002B2CF9AE}", p.getFmtid());
- assertEquals("test-4", p.getName());
- assertEquals(true, p.getBool());
- assertEquals(5, p.getPid());
-
- wb2.close();
- }
-
- @Test
- public void testDocumentProperties() {
- String category = _coreProperties.getCategory();
- assertEquals("test", category);
- String contentStatus = "Draft";
- _coreProperties.setContentStatus(contentStatus);
- assertEquals("Draft", contentStatus);
- Date created = _coreProperties.getCreated();
- // the original file contains a following value: 2009-07-20T13:12:00Z
- assertTrue(dateTimeEqualToUTCString(created, "2009-07-20T13:12:00Z"));
- String creator = _coreProperties.getCreator();
- assertEquals("Paolo Mottadelli", creator);
- String subject = _coreProperties.getSubject();
- assertEquals("Greetings", subject);
- String title = _coreProperties.getTitle();
- assertEquals("Hello World", title);
- }
-
- @Test
- public void testTransitiveSetters() throws IOException {
- XWPFDocument doc = new XWPFDocument();
- CoreProperties cp = doc.getProperties().getCoreProperties();
-
-
- Date dateCreated = LocaleUtil.getLocaleCalendar(2010, 6, 15, 10, 0, 0).getTime();
- cp.setCreated(new Nullable<>(dateCreated));
- assertEquals(dateCreated, cp.getCreated());
-
- XWPFDocument doc2 = XWPFTestDataSamples.writeOutAndReadBack(doc);
- doc.close();
- cp = doc2.getProperties().getCoreProperties();
- Date dt3 = cp.getCreated();
- assertEquals(dateCreated, dt3);
- doc2.close();
- }
-
- @Test
- public void testGetSetRevision() {
- String revision = _coreProperties.getRevision();
- assertTrue("Revision number is 1", Integer.parseInt(revision) > 1);
- _coreProperties.setRevision("20");
- assertEquals("20", _coreProperties.getRevision());
- _coreProperties.setRevision("20xx");
- assertEquals("20", _coreProperties.getRevision());
- }
-
- @Test
- public void testLastModifiedByUserProperty() {
- String lastModifiedByUser = _coreProperties.getLastModifiedByUser();
- assertEquals("Paolo Mottadelli", lastModifiedByUser);
- _coreProperties.setLastModifiedByUser("Test User");
- assertEquals("Test User", _coreProperties.getLastModifiedByUser());
- }
-
- public static boolean dateTimeEqualToUTCString(Date dateTime, String utcString) {
- Calendar utcCalendar = LocaleUtil.getLocaleCalendar(LocaleUtil.TIMEZONE_UTC);
- utcCalendar.setTimeInMillis(dateTime.getTime());
- String dateTimeUtcString = utcCalendar.get(Calendar.YEAR) + "-" +
- zeroPad((utcCalendar.get(Calendar.MONTH)+1)) + "-" +
- zeroPad(utcCalendar.get(Calendar.DAY_OF_MONTH)) + "T" +
- zeroPad(utcCalendar.get(Calendar.HOUR_OF_DAY)) + ":" +
- zeroPad(utcCalendar.get(Calendar.MINUTE)) + ":" +
- zeroPad(utcCalendar.get(Calendar.SECOND)) + "Z";
-
- return utcString.equals(dateTimeUtcString);
- }
-
- @Ignore("Fails to add some of the thumbnails, needs more investigation")
- @Test
- public void testThumbnails() throws Exception {
- POIXMLProperties noThumbProps = sampleNoThumb.getProperties();
-
- assertNotNull(_props.getThumbnailPart());
- assertNull(noThumbProps.getThumbnailPart());
-
- assertNotNull(_props.getThumbnailFilename());
- assertNull(noThumbProps.getThumbnailFilename());
-
- assertNotNull(_props.getThumbnailImage());
- assertNull(noThumbProps.getThumbnailImage());
-
- assertEquals("/thumbnail.jpeg", _props.getThumbnailFilename());
-
-
- // Adding / changing
- ByteArrayInputStream imageData = new ByteArrayInputStream(new byte[1]);
- noThumbProps.setThumbnail("Testing.png", imageData);
- assertNotNull(noThumbProps.getThumbnailPart());
- assertEquals("/Testing.png", noThumbProps.getThumbnailFilename());
- assertNotNull(noThumbProps.getThumbnailImage());
- assertEquals(1, IOUtils.toByteArray(noThumbProps.getThumbnailImage()).length);
-
- imageData = new ByteArrayInputStream(new byte[2]);
- noThumbProps.setThumbnail("Testing2.png", imageData);
- assertNotNull(noThumbProps.getThumbnailPart());
- assertEquals("/Testing.png", noThumbProps.getThumbnailFilename());
- assertNotNull(noThumbProps.getThumbnailImage());
- assertEquals(2, IOUtils.toByteArray(noThumbProps.getThumbnailImage()).length);
- }
-
- private static String zeroPad(long i) {
- if (i >= 0 && i <=9) {
- return "0" + i;
- } else {
- return String.valueOf(i);
- }
- }
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.extractor;
-
-import static org.apache.poi.POITestCase.assertContains;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.util.Locale;
-
-import org.apache.poi.POIDataSamples;
-import org.apache.poi.POIOLE2TextExtractor;
-import org.apache.poi.POITextExtractor;
-import org.apache.poi.POIXMLTextExtractor;
-import org.apache.poi.UnsupportedFileFormatException;
-import org.apache.poi.hdgf.extractor.VisioTextExtractor;
-import org.apache.poi.hpbf.extractor.PublisherTextExtractor;
-import org.apache.poi.hsmf.extractor.OutlookTextExtactor;
-import org.apache.poi.hssf.HSSFTestDataSamples;
-import org.apache.poi.hssf.OldExcelFormatException;
-import org.apache.poi.hssf.extractor.EventBasedExcelExtractor;
-import org.apache.poi.hssf.extractor.ExcelExtractor;
-import org.apache.poi.hwpf.extractor.Word6Extractor;
-import org.apache.poi.hwpf.extractor.WordExtractor;
-import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
-import org.apache.poi.openxml4j.opc.OPCPackage;
-import org.apache.poi.openxml4j.opc.PackageAccess;
-import org.apache.poi.poifs.filesystem.OPOIFSFileSystem;
-import org.apache.poi.poifs.filesystem.POIFSFileSystem;
-import org.apache.poi.sl.extractor.SlideShowExtractor;
-import org.apache.poi.util.POILogFactory;
-import org.apache.poi.util.POILogger;
-import org.apache.poi.xdgf.extractor.XDGFVisioExtractor;
-import org.apache.poi.xssf.extractor.XSSFBEventBasedExcelExtractor;
-import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor;
-import org.apache.poi.xssf.extractor.XSSFExcelExtractor;
-import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
-import org.apache.xmlbeans.XmlException;
-import org.junit.Test;
-
-/**
- * Test that the extractor factory plays nicely
- */
-public class TestExtractorFactory {
-
- private static final POILogger LOG = POILogFactory.getLogger(TestExtractorFactory.class);
-
- private static final POIDataSamples ssTests = POIDataSamples.getSpreadSheetInstance();
- private static final File xls = getFileAndCheck(ssTests, "SampleSS.xls");
- private static final File xlsx = getFileAndCheck(ssTests, "SampleSS.xlsx");
- private static final File xlsxStrict = getFileAndCheck(ssTests, "SampleSS.strict.xlsx");
- private static final File xltx = getFileAndCheck(ssTests, "test.xltx");
- private static final File xlsEmb = getFileAndCheck(ssTests, "excel_with_embeded.xls");
- private static final File xlsb = getFileAndCheck(ssTests, "testVarious.xlsb");
-
- private static final POIDataSamples wpTests = POIDataSamples.getDocumentInstance();
- private static final File doc = getFileAndCheck(wpTests, "SampleDoc.doc");
- private static final File doc6 = getFileAndCheck(wpTests, "Word6.doc");
- private static final File doc95 = getFileAndCheck(wpTests, "Word95.doc");
- private static final File docx = getFileAndCheck(wpTests, "SampleDoc.docx");
- private static final File dotx = getFileAndCheck(wpTests, "test.dotx");
- private static final File docEmb = getFileAndCheck(wpTests, "word_with_embeded.doc");
- private static final File docEmbOOXML = getFileAndCheck(wpTests, "word_with_embeded_ooxml.doc");
-
- private static final POIDataSamples slTests = POIDataSamples.getSlideShowInstance();
- private static final File ppt = getFileAndCheck(slTests, "SampleShow.ppt");
- private static final File pptx = getFileAndCheck(slTests, "SampleShow.pptx");
- private static final File txt = getFileAndCheck(slTests, "SampleShow.txt");
-
- private static final POIDataSamples olTests = POIDataSamples.getHSMFInstance();
- private static final File msg = getFileAndCheck(olTests, "quick.msg");
- private static final File msgEmb = getFileAndCheck(olTests, "attachment_test_msg.msg");
- private static final File msgEmbMsg = getFileAndCheck(olTests, "attachment_msg_pdf.msg");
-
- private static final POIDataSamples dgTests = POIDataSamples.getDiagramInstance();
- private static final File vsd = getFileAndCheck(dgTests, "Test_Visio-Some_Random_Text.vsd");
- private static final File vsdx = getFileAndCheck(dgTests, "test.vsdx");
-
- private static POIDataSamples pubTests = POIDataSamples.getPublisherInstance();
- private static File pub = getFileAndCheck(pubTests, "Simple.pub");
-
- private static File getFileAndCheck(POIDataSamples samples, String name) {
- File file = samples.getFile(name);
-
- assertNotNull("Did not get a file for " + name, file);
- assertTrue("Did not get a type file for " + name, file.isFile());
- assertTrue("File did not exist: " + name, file.exists());
-
- return file;
- }
-
- private static final Object[] TEST_SET = {
- "Excel", xls, ExcelExtractor.class, 200,
- "Excel - xlsx", xlsx, XSSFExcelExtractor.class, 200,
- "Excel - xltx", xltx, XSSFExcelExtractor.class, -1,
- "Excel - xlsb", xlsb, XSSFBEventBasedExcelExtractor.class, -1,
- "Word", doc, WordExtractor.class, 120,
- "Word - docx", docx, XWPFWordExtractor.class, 120,
- "Word - dotx", dotx, XWPFWordExtractor.class, -1,
- "Word 6", doc6, Word6Extractor.class, 20,
- "Word 95", doc95, Word6Extractor.class, 120,
- "PowerPoint", ppt, SlideShowExtractor.class, 120,
- "PowerPoint - pptx", pptx, SlideShowExtractor.class, 120,
- "Visio", vsd, VisioTextExtractor.class, 50,
- "Visio - vsdx", vsdx, XDGFVisioExtractor.class, 20,
- "Publisher", pub, PublisherTextExtractor.class, 50,
- "Outlook msg", msg, OutlookTextExtactor.class, 50,
-
- // TODO Support OOXML-Strict, see bug #57699
- // xlsxStrict
- };
-
- @FunctionalInterface
- interface FunctionEx<T, R> {
- R apply(T t) throws IOException, OpenXML4JException, XmlException;
- }
-
-
- @Test
- public void testFile() throws Exception {
- for (int i = 0; i < TEST_SET.length; i += 4) {
- try (POITextExtractor ext = ExtractorFactory.createExtractor((File) TEST_SET[i + 1])) {
- testExtractor(ext, (String) TEST_SET[i], (Class) TEST_SET[i + 2], (Integer) TEST_SET[i + 3]);
- }
- }
- }
-
- @Test(expected = IllegalArgumentException.class)
- public void testFileInvalid() throws Exception {
- // Text
- try (POITextExtractor te = ExtractorFactory.createExtractor(txt)) {}
- }
-
- @Test
- public void testInputStream() throws Exception {
- testStream((f) -> ExtractorFactory.createExtractor(f), true);
- }
-
- @Test(expected = IllegalArgumentException.class)
- public void testInputStreamInvalid() throws Exception {
- testInvalid((f) -> ExtractorFactory.createExtractor(f));
- }
-
- @Test
- public void testPOIFS() throws Exception {
- testStream((f) -> ExtractorFactory.createExtractor(new POIFSFileSystem(f)), false);
- }
-
- @Test(expected = IOException.class)
- public void testPOIFSInvalid() throws Exception {
- testInvalid((f) -> ExtractorFactory.createExtractor(new POIFSFileSystem(f)));
- }
-
- @Test
- public void testOPOIFS() throws Exception {
- testStream((f) -> ExtractorFactory.createExtractor(new OPOIFSFileSystem(f)), false);
- }
-
- @Test(expected = IOException.class)
- public void testOPOIFSInvalid() throws Exception {
- testInvalid((f) -> ExtractorFactory.createExtractor(new OPOIFSFileSystem(f)));
- }
-
-
- private void testStream(final FunctionEx<FileInputStream, POITextExtractor> poifsIS, final boolean loadOOXML)
- throws IOException, OpenXML4JException, XmlException {
- for (int i = 0; i < TEST_SET.length; i += 4) {
- File testFile = (File) TEST_SET[i + 1];
- if (!loadOOXML && (testFile.getName().endsWith("x") || testFile.getName().endsWith("xlsb"))) {
- continue;
- }
- try (FileInputStream fis = new FileInputStream(testFile);
- POITextExtractor ext = poifsIS.apply(fis)) {
- testExtractor(ext, (String) TEST_SET[i], (Class) TEST_SET[i + 2], (Integer) TEST_SET[i + 3]);
- } catch (IllegalArgumentException e) {
- fail("failed to process "+testFile);
- }
- }
- }
-
- private void testExtractor(final POITextExtractor ext, final String testcase, final Class extrClass, final Integer minLength) {
- assertTrue("invalid extractor for " + testcase, extrClass.isInstance(ext));
- final String actual = ext.getText();
- if (minLength == -1) {
- assertContains(actual.toLowerCase(Locale.ROOT), "test");
- } else {
- assertTrue("extracted content too short for " + testcase, actual.length() > minLength);
- }
- }
-
- private void testInvalid(FunctionEx<FileInputStream, POITextExtractor> poifs) throws IOException, OpenXML4JException, XmlException {
- // Text
- try (FileInputStream fis = new FileInputStream(txt);
- POITextExtractor te = poifs.apply(fis)) {
- }
- }
-
- @Test
- public void testPackage() throws Exception {
- for (int i = 0; i < TEST_SET.length; i += 4) {
- final File testFile = (File) TEST_SET[i + 1];
- if (!testFile.getName().endsWith("x")) {
- continue;
- }
-
- try (final OPCPackage pkg = OPCPackage.open(testFile, PackageAccess.READ);
- final POITextExtractor ext = ExtractorFactory.createExtractor(pkg)) {
- testExtractor(ext, (String) TEST_SET[i], (Class) TEST_SET[i + 2], (Integer) TEST_SET[i + 3]);
- pkg.revert();
- }
- }
- }
-
- @Test(expected = UnsupportedFileFormatException.class)
- public void testPackageInvalid() throws Exception {
- // Text
- try (final OPCPackage pkg = OPCPackage.open(txt, PackageAccess.READ);
- final POITextExtractor te = ExtractorFactory.createExtractor(pkg)) {}
- }
-
- @Test
- public void testPreferEventBased() throws Exception {
- assertFalse(ExtractorFactory.getPreferEventExtractor());
- assertFalse(ExtractorFactory.getThreadPrefersEventExtractors());
- assertNull(ExtractorFactory.getAllThreadsPreferEventExtractors());
-
- ExtractorFactory.setThreadPrefersEventExtractors(true);
-
- assertTrue(ExtractorFactory.getPreferEventExtractor());
- assertTrue(ExtractorFactory.getThreadPrefersEventExtractors());
- assertNull(ExtractorFactory.getAllThreadsPreferEventExtractors());
-
- ExtractorFactory.setAllThreadsPreferEventExtractors(false);
-
- assertFalse(ExtractorFactory.getPreferEventExtractor());
- assertTrue(ExtractorFactory.getThreadPrefersEventExtractors());
- assertEquals(Boolean.FALSE, ExtractorFactory.getAllThreadsPreferEventExtractors());
-
- ExtractorFactory.setAllThreadsPreferEventExtractors(null);
-
- assertTrue(ExtractorFactory.getPreferEventExtractor());
- assertTrue(ExtractorFactory.getThreadPrefersEventExtractors());
- assertNull(ExtractorFactory.getAllThreadsPreferEventExtractors());
-
-
- // Check we get the right extractors now
- POITextExtractor extractor = ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls)));
- assertTrue(
- extractor
- instanceof EventBasedExcelExtractor
- );
- extractor.close();
- extractor = ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls)));
- assertTrue(
- extractor.getText().length() > 200
- );
- extractor.close();
-
- extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString(), PackageAccess.READ));
- assertTrue(extractor instanceof XSSFEventBasedExcelExtractor);
- extractor.close();
-
- extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString(), PackageAccess.READ));
- assertTrue(
- extractor.getText().length() > 200
- );
- extractor.close();
-
-
- // Put back to normal
- ExtractorFactory.setThreadPrefersEventExtractors(false);
- assertFalse(ExtractorFactory.getPreferEventExtractor());
- assertFalse(ExtractorFactory.getThreadPrefersEventExtractors());
- assertNull(ExtractorFactory.getAllThreadsPreferEventExtractors());
-
- // And back
- extractor = ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls)));
- assertTrue(
- extractor
- instanceof ExcelExtractor
- );
- extractor.close();
- extractor = ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls)));
- assertTrue(
- extractor.getText().length() > 200
- );
- extractor.close();
-
- extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString(), PackageAccess.READ));
- assertTrue(
- extractor
- instanceof XSSFExcelExtractor
- );
- extractor.close();
- extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString()));
- assertTrue(
- extractor.getText().length() > 200
- );
- extractor.close();
- }
-
- /**
- * Test embedded docs text extraction. For now, only
- * does poifs embedded, but will do ooxml ones
- * at some point.
- */
- @Test
- public void testEmbedded() throws Exception {
- final Object[] testObj = {
- "No embeddings", xls, "0-0-0-0-0-0",
- "Excel", xlsEmb, "6-2-2-2-0-0",
- "Word", docEmb, "4-1-2-1-0-0",
- "Word which contains an OOXML file", docEmbOOXML, "3-0-1-1-0-1",
- "Outlook", msgEmb, "1-1-0-0-0-0",
- "Outlook with another outlook file in it", msgEmbMsg, "1-0-0-0-1-0",
- };
-
- for (int i=0; i<testObj.length; i+=3) {
- try (final POIOLE2TextExtractor ext = ExtractorFactory.createExtractor((File)testObj[i+1])) {
- final POITextExtractor[] embeds = ExtractorFactory.getEmbeddedDocsTextExtractors(ext);
-
- int numWord = 0, numXls = 0, numPpt = 0, numMsg = 0, numWordX = 0;
- for (POITextExtractor embed : embeds) {
- assertTrue(embed.getText().length() > 20);
- if (embed instanceof SlideShowExtractor) {
- numPpt++;
- } else if (embed instanceof ExcelExtractor) {
- numXls++;
- } else if (embed instanceof WordExtractor) {
- numWord++;
- } else if (embed instanceof OutlookTextExtactor) {
- numMsg++;
- } else if (embed instanceof XWPFWordExtractor) {
- numWordX++;
- }
- }
-
- final String actual = embeds.length+"-"+numWord+"-"+numXls+"-"+numPpt+"-"+numMsg+"-"+numWordX;
- final String expected = (String)testObj[i+2];
- assertEquals("invalid number of embeddings - "+testObj[i], expected, actual);
- }
- }
-
- // TODO - PowerPoint
- // TODO - Publisher
- // TODO - Visio
- }
-
- private static final String[] EXPECTED_FAILURES = {
- // password protected files
- "spreadsheet/password.xls",
- "spreadsheet/protected_passtika.xlsx",
- "spreadsheet/51832.xls",
- "document/PasswordProtected.doc",
- "slideshow/Password_Protected-hello.ppt",
- "slideshow/Password_Protected-56-hello.ppt",
- "slideshow/Password_Protected-np-hello.ppt",
- "slideshow/cryptoapi-proc2356.ppt",
- //"document/bug53475-password-is-pass.docx",
- //"document/bug53475-password-is-solrcell.docx",
- "spreadsheet/xor-encryption-abc.xls",
- "spreadsheet/35897-type4.xls",
- //"poifs/protect.xlsx",
- //"poifs/protected_sha512.xlsx",
- //"poifs/extenxls_pwd123.xlsx",
- //"poifs/protected_agile.docx",
- "spreadsheet/58616.xlsx",
-
- // TODO: fails XMLExportTest, is this ok?
- "spreadsheet/CustomXMLMapping-singleattributenamespace.xlsx",
- "spreadsheet/55864.xlsx",
- "spreadsheet/57890.xlsx",
-
- // TODO: these fail now with some NPE/file read error because we now try to compute every value via Cell.toString()!
- "spreadsheet/44958.xls",
- "spreadsheet/44958_1.xls",
- "spreadsheet/testArraysAndTables.xls",
-
- // TODO: good to ignore?
- "spreadsheet/sample-beta.xlsx",
-
- // This is actually a spreadsheet!
- "hpsf/TestRobert_Flaherty.doc",
-
- // some files that are broken, eg Word 95, ...
- "spreadsheet/43493.xls",
- "spreadsheet/46904.xls",
- "document/Bug50955.doc",
- "slideshow/PPT95.ppt",
- "openxml4j/OPCCompliance_CoreProperties_DCTermsNamespaceLimitedUseFAIL.docx",
- "openxml4j/OPCCompliance_CoreProperties_DoNotUseCompatibilityMarkupFAIL.docx",
- "openxml4j/OPCCompliance_CoreProperties_LimitedXSITypeAttribute_NotPresentFAIL.docx",
- "openxml4j/OPCCompliance_CoreProperties_LimitedXSITypeAttribute_PresentWithUnauthorizedValueFAIL.docx",
- "openxml4j/OPCCompliance_CoreProperties_OnlyOneCorePropertiesPartFAIL.docx",
- "openxml4j/OPCCompliance_CoreProperties_UnauthorizedXMLLangAttributeFAIL.docx",
- "openxml4j/OPCCompliance_DerivedPartNameFAIL.docx",
- "openxml4j/invalid.xlsx",
- "spreadsheet/54764-2.xlsx", // see TestXSSFBugs.bug54764()
- "spreadsheet/54764.xlsx", // see TestXSSFBugs.bug54764()
- "spreadsheet/Simple.xlsb",
- "poifs/unknown_properties.msg", // POIFS properties corrupted
- "poifs/only-zero-byte-streams.ole2", // No actual contents
- "spreadsheet/poc-xmlbomb.xlsx", // contains xml-entity-expansion
- "spreadsheet/poc-xmlbomb-empty.xlsx", // contains xml-entity-expansion
- "spreadsheet/poc-shared-strings.xlsx", // contains shared-string-entity-expansion
-
- // old Excel files, which we only support simple text extraction of
- "spreadsheet/testEXCEL_2.xls",
- "spreadsheet/testEXCEL_3.xls",
- "spreadsheet/testEXCEL_4.xls",
- "spreadsheet/testEXCEL_5.xls",
- "spreadsheet/testEXCEL_95.xls",
-
- // OOXML Strict is not yet supported, see bug #57699
- "spreadsheet/SampleSS.strict.xlsx",
- "spreadsheet/SimpleStrict.xlsx",
- "spreadsheet/sample.strict.xlsx",
-
- // non-TNEF files
- "ddf/Container.dat",
- "ddf/47143.dat",
-
- // sheet cloning errors
- "spreadsheet/47813.xlsx",
- "spreadsheet/56450.xls",
- "spreadsheet/57231_MixedGasReport.xls",
- "spreadsheet/OddStyleRecord.xls",
- "spreadsheet/WithChartSheet.xlsx",
- "spreadsheet/chart_sheet.xlsx",
- };
-
- @Test
- public void testFileLeak() throws Exception {
- // run a number of files that might fail in order to catch
- // leaked file resources when using file-leak-detector while
- // running the test
-
- for(String file : EXPECTED_FAILURES) {
- try {
- ExtractorFactory.createExtractor(POIDataSamples.getSpreadSheetInstance().getFile(file));
- } catch (Exception e) {
- // catch all exceptions here as we are only interested in file-handle leaks
- }
- }
- }
-
- /**
- * #59074 - Excel 95 files should give a helpful message, not just
- * "No supported documents found in the OLE2 stream"
- */
- @Test(expected = OldExcelFormatException.class)
- public void bug59074() throws Exception {
- ExtractorFactory.createExtractor(
- POIDataSamples.getSpreadSheetInstance().getFile("59074.xls"));
- }
-
- @SuppressWarnings("deprecation")
- @Test(expected = IllegalStateException.class)
- public void testGetEmbedFromXMLExtractor() {
- // currently not implemented
- ExtractorFactory.getEmbededDocsTextExtractors((POIXMLTextExtractor) null);
- }
-
- @SuppressWarnings("deprecation")
- @Test(expected = IllegalStateException.class)
- public void testGetEmbeddedFromXMLExtractor() {
- // currently not implemented
- ExtractorFactory.getEmbeddedDocsTextExtractors((POIXMLTextExtractor)null);
- }
-
- // This bug is currently open. This test will fail with "expected error not thrown" when the bug has been fixed.
- // When this happens, change this from @Test(expected=...) to @Test
- // bug 45565: text within TextBoxes is extracted by ExcelExtractor and WordExtractor
- @Test(expected=AssertionError.class)
- public void test45565() throws Exception {
- try (POITextExtractor extractor = ExtractorFactory.createExtractor(HSSFTestDataSamples.getSampleFile("45565.xls"))) {
- String text = extractor.getText();
- assertContains(text, "testdoc");
- assertContains(text, "test phrase");
- }
- }
-}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.extractor.ooxml;
+
+import static org.apache.poi.POITestCase.assertContains;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.Locale;
+
+import org.apache.poi.POIDataSamples;
+import org.apache.poi.extractor.POIOLE2TextExtractor;
+import org.apache.poi.extractor.POITextExtractor;
+import org.apache.poi.ooxml.extractor.POIXMLTextExtractor;
+import org.apache.poi.UnsupportedFileFormatException;
+import org.apache.poi.hdgf.extractor.VisioTextExtractor;
+import org.apache.poi.hpbf.extractor.PublisherTextExtractor;
+import org.apache.poi.hsmf.extractor.OutlookTextExtactor;
+import org.apache.poi.hssf.HSSFTestDataSamples;
+import org.apache.poi.hssf.OldExcelFormatException;
+import org.apache.poi.hssf.extractor.EventBasedExcelExtractor;
+import org.apache.poi.hssf.extractor.ExcelExtractor;
+import org.apache.poi.hwpf.extractor.Word6Extractor;
+import org.apache.poi.hwpf.extractor.WordExtractor;
+import org.apache.poi.ooxml.extractor.ExtractorFactory;
+import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.openxml4j.opc.PackageAccess;
+import org.apache.poi.poifs.filesystem.OPOIFSFileSystem;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.sl.extractor.SlideShowExtractor;
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
+import org.apache.poi.xdgf.extractor.XDGFVisioExtractor;
+import org.apache.poi.xssf.extractor.XSSFBEventBasedExcelExtractor;
+import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor;
+import org.apache.poi.xssf.extractor.XSSFExcelExtractor;
+import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
+import org.apache.xmlbeans.XmlException;
+import org.junit.Test;
+
+/**
+ * Test that the extractor factory plays nicely
+ */
+public class TestExtractorFactory {
+
+ private static final POILogger LOG = POILogFactory.getLogger(TestExtractorFactory.class);
+
+ private static final POIDataSamples ssTests = POIDataSamples.getSpreadSheetInstance();
+ private static final File xls = getFileAndCheck(ssTests, "SampleSS.xls");
+ private static final File xlsx = getFileAndCheck(ssTests, "SampleSS.xlsx");
+ private static final File xlsxStrict = getFileAndCheck(ssTests, "SampleSS.strict.xlsx");
+ private static final File xltx = getFileAndCheck(ssTests, "test.xltx");
+ private static final File xlsEmb = getFileAndCheck(ssTests, "excel_with_embeded.xls");
+ private static final File xlsb = getFileAndCheck(ssTests, "testVarious.xlsb");
+
+ private static final POIDataSamples wpTests = POIDataSamples.getDocumentInstance();
+ private static final File doc = getFileAndCheck(wpTests, "SampleDoc.doc");
+ private static final File doc6 = getFileAndCheck(wpTests, "Word6.doc");
+ private static final File doc95 = getFileAndCheck(wpTests, "Word95.doc");
+ private static final File docx = getFileAndCheck(wpTests, "SampleDoc.docx");
+ private static final File dotx = getFileAndCheck(wpTests, "test.dotx");
+ private static final File docEmb = getFileAndCheck(wpTests, "word_with_embeded.doc");
+ private static final File docEmbOOXML = getFileAndCheck(wpTests, "word_with_embeded_ooxml.doc");
+
+ private static final POIDataSamples slTests = POIDataSamples.getSlideShowInstance();
+ private static final File ppt = getFileAndCheck(slTests, "SampleShow.ppt");
+ private static final File pptx = getFileAndCheck(slTests, "SampleShow.pptx");
+ private static final File txt = getFileAndCheck(slTests, "SampleShow.txt");
+
+ private static final POIDataSamples olTests = POIDataSamples.getHSMFInstance();
+ private static final File msg = getFileAndCheck(olTests, "quick.msg");
+ private static final File msgEmb = getFileAndCheck(olTests, "attachment_test_msg.msg");
+ private static final File msgEmbMsg = getFileAndCheck(olTests, "attachment_msg_pdf.msg");
+
+ private static final POIDataSamples dgTests = POIDataSamples.getDiagramInstance();
+ private static final File vsd = getFileAndCheck(dgTests, "Test_Visio-Some_Random_Text.vsd");
+ private static final File vsdx = getFileAndCheck(dgTests, "test.vsdx");
+
+ private static POIDataSamples pubTests = POIDataSamples.getPublisherInstance();
+ private static File pub = getFileAndCheck(pubTests, "Simple.pub");
+
+ private static File getFileAndCheck(POIDataSamples samples, String name) {
+ File file = samples.getFile(name);
+
+ assertNotNull("Did not get a file for " + name, file);
+ assertTrue("Did not get a type file for " + name, file.isFile());
+ assertTrue("File did not exist: " + name, file.exists());
+
+ return file;
+ }
+
+ private static final Object[] TEST_SET = {
+ "Excel", xls, ExcelExtractor.class, 200,
+ "Excel - xlsx", xlsx, XSSFExcelExtractor.class, 200,
+ "Excel - xltx", xltx, XSSFExcelExtractor.class, -1,
+ "Excel - xlsb", xlsb, XSSFBEventBasedExcelExtractor.class, -1,
+ "Word", doc, WordExtractor.class, 120,
+ "Word - docx", docx, XWPFWordExtractor.class, 120,
+ "Word - dotx", dotx, XWPFWordExtractor.class, -1,
+ "Word 6", doc6, Word6Extractor.class, 20,
+ "Word 95", doc95, Word6Extractor.class, 120,
+ "PowerPoint", ppt, SlideShowExtractor.class, 120,
+ "PowerPoint - pptx", pptx, SlideShowExtractor.class, 120,
+ "Visio", vsd, VisioTextExtractor.class, 50,
+ "Visio - vsdx", vsdx, XDGFVisioExtractor.class, 20,
+ "Publisher", pub, PublisherTextExtractor.class, 50,
+ "Outlook msg", msg, OutlookTextExtactor.class, 50,
+
+ // TODO Support OOXML-Strict, see bug #57699
+ // xlsxStrict
+ };
+
+ @FunctionalInterface
+ interface FunctionEx<T, R> {
+ R apply(T t) throws IOException, OpenXML4JException, XmlException;
+ }
+
+
+ @Test
+ public void testFile() throws Exception {
+ for (int i = 0; i < TEST_SET.length; i += 4) {
+ try (POITextExtractor ext = ExtractorFactory.createExtractor((File) TEST_SET[i + 1])) {
+ testExtractor(ext, (String) TEST_SET[i], (Class) TEST_SET[i + 2], (Integer) TEST_SET[i + 3]);
+ }
+ }
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testFileInvalid() throws Exception {
+ // Text
+ try (POITextExtractor te = ExtractorFactory.createExtractor(txt)) {}
+ }
+
+ @Test
+ public void testInputStream() throws Exception {
+ testStream((f) -> ExtractorFactory.createExtractor(f), true);
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testInputStreamInvalid() throws Exception {
+ testInvalid((f) -> ExtractorFactory.createExtractor(f));
+ }
+
+ @Test
+ public void testPOIFS() throws Exception {
+ testStream((f) -> ExtractorFactory.createExtractor(new POIFSFileSystem(f)), false);
+ }
+
+ @Test(expected = IOException.class)
+ public void testPOIFSInvalid() throws Exception {
+ testInvalid((f) -> ExtractorFactory.createExtractor(new POIFSFileSystem(f)));
+ }
+
+ @Test
+ public void testOPOIFS() throws Exception {
+ testStream((f) -> ExtractorFactory.createExtractor(new OPOIFSFileSystem(f)), false);
+ }
+
+ @Test(expected = IOException.class)
+ public void testOPOIFSInvalid() throws Exception {
+ testInvalid((f) -> ExtractorFactory.createExtractor(new OPOIFSFileSystem(f)));
+ }
+
+
+ private void testStream(final FunctionEx<FileInputStream, POITextExtractor> poifsIS, final boolean loadOOXML)
+ throws IOException, OpenXML4JException, XmlException {
+ for (int i = 0; i < TEST_SET.length; i += 4) {
+ File testFile = (File) TEST_SET[i + 1];
+ if (!loadOOXML && (testFile.getName().endsWith("x") || testFile.getName().endsWith("xlsb"))) {
+ continue;
+ }
+ try (FileInputStream fis = new FileInputStream(testFile);
+ POITextExtractor ext = poifsIS.apply(fis)) {
+ testExtractor(ext, (String) TEST_SET[i], (Class) TEST_SET[i + 2], (Integer) TEST_SET[i + 3]);
+ } catch (IllegalArgumentException e) {
+ fail("failed to process "+testFile);
+ }
+ }
+ }
+
+ private void testExtractor(final POITextExtractor ext, final String testcase, final Class extrClass, final Integer minLength) {
+ assertTrue("invalid extractor for " + testcase, extrClass.isInstance(ext));
+ final String actual = ext.getText();
+ if (minLength == -1) {
+ assertContains(actual.toLowerCase(Locale.ROOT), "test");
+ } else {
+ assertTrue("extracted content too short for " + testcase, actual.length() > minLength);
+ }
+ }
+
+ private void testInvalid(FunctionEx<FileInputStream, POITextExtractor> poifs) throws IOException, OpenXML4JException, XmlException {
+ // Text
+ try (FileInputStream fis = new FileInputStream(txt);
+ POITextExtractor te = poifs.apply(fis)) {
+ }
+ }
+
+ @Test
+ public void testPackage() throws Exception {
+ for (int i = 0; i < TEST_SET.length; i += 4) {
+ final File testFile = (File) TEST_SET[i + 1];
+ if (!testFile.getName().endsWith("x")) {
+ continue;
+ }
+
+ try (final OPCPackage pkg = OPCPackage.open(testFile, PackageAccess.READ);
+ final POITextExtractor ext = ExtractorFactory.createExtractor(pkg)) {
+ testExtractor(ext, (String) TEST_SET[i], (Class) TEST_SET[i + 2], (Integer) TEST_SET[i + 3]);
+ pkg.revert();
+ }
+ }
+ }
+
+ @Test(expected = UnsupportedFileFormatException.class)
+ public void testPackageInvalid() throws Exception {
+ // Text
+ try (final OPCPackage pkg = OPCPackage.open(txt, PackageAccess.READ);
+ final POITextExtractor te = ExtractorFactory.createExtractor(pkg)) {}
+ }
+
+ @Test
+ public void testPreferEventBased() throws Exception {
+ assertFalse(ExtractorFactory.getPreferEventExtractor());
+ assertFalse(ExtractorFactory.getThreadPrefersEventExtractors());
+ assertNull(ExtractorFactory.getAllThreadsPreferEventExtractors());
+
+ ExtractorFactory.setThreadPrefersEventExtractors(true);
+
+ assertTrue(ExtractorFactory.getPreferEventExtractor());
+ assertTrue(ExtractorFactory.getThreadPrefersEventExtractors());
+ assertNull(ExtractorFactory.getAllThreadsPreferEventExtractors());
+
+ ExtractorFactory.setAllThreadsPreferEventExtractors(false);
+
+ assertFalse(ExtractorFactory.getPreferEventExtractor());
+ assertTrue(ExtractorFactory.getThreadPrefersEventExtractors());
+ assertEquals(Boolean.FALSE, ExtractorFactory.getAllThreadsPreferEventExtractors());
+
+ ExtractorFactory.setAllThreadsPreferEventExtractors(null);
+
+ assertTrue(ExtractorFactory.getPreferEventExtractor());
+ assertTrue(ExtractorFactory.getThreadPrefersEventExtractors());
+ assertNull(ExtractorFactory.getAllThreadsPreferEventExtractors());
+
+
+ // Check we get the right extractors now
+ POITextExtractor extractor = ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls)));
+ assertTrue(
+ extractor
+ instanceof EventBasedExcelExtractor
+ );
+ extractor.close();
+ extractor = ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls)));
+ assertTrue(
+ extractor.getText().length() > 200
+ );
+ extractor.close();
+
+ extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString(), PackageAccess.READ));
+ assertTrue(extractor instanceof XSSFEventBasedExcelExtractor);
+ extractor.close();
+
+ extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString(), PackageAccess.READ));
+ assertTrue(
+ extractor.getText().length() > 200
+ );
+ extractor.close();
+
+
+ // Put back to normal
+ ExtractorFactory.setThreadPrefersEventExtractors(false);
+ assertFalse(ExtractorFactory.getPreferEventExtractor());
+ assertFalse(ExtractorFactory.getThreadPrefersEventExtractors());
+ assertNull(ExtractorFactory.getAllThreadsPreferEventExtractors());
+
+ // And back
+ extractor = ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls)));
+ assertTrue(
+ extractor
+ instanceof ExcelExtractor
+ );
+ extractor.close();
+ extractor = ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls)));
+ assertTrue(
+ extractor.getText().length() > 200
+ );
+ extractor.close();
+
+ extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString(), PackageAccess.READ));
+ assertTrue(
+ extractor
+ instanceof XSSFExcelExtractor
+ );
+ extractor.close();
+ extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString()));
+ assertTrue(
+ extractor.getText().length() > 200
+ );
+ extractor.close();
+ }
+
+ /**
+ * Test embedded docs text extraction. For now, only
+ * does poifs embedded, but will do ooxml ones
+ * at some point.
+ */
+ @Test
+ public void testEmbedded() throws Exception {
+ final Object[] testObj = {
+ "No embeddings", xls, "0-0-0-0-0-0",
+ "Excel", xlsEmb, "6-2-2-2-0-0",
+ "Word", docEmb, "4-1-2-1-0-0",
+ "Word which contains an OOXML file", docEmbOOXML, "3-0-1-1-0-1",
+ "Outlook", msgEmb, "1-1-0-0-0-0",
+ "Outlook with another outlook file in it", msgEmbMsg, "1-0-0-0-1-0",
+ };
+
+ for (int i=0; i<testObj.length; i+=3) {
+ try (final POIOLE2TextExtractor ext = ExtractorFactory.createExtractor((File)testObj[i+1])) {
+ final POITextExtractor[] embeds = ExtractorFactory.getEmbeddedDocsTextExtractors(ext);
+
+ int numWord = 0, numXls = 0, numPpt = 0, numMsg = 0, numWordX = 0;
+ for (POITextExtractor embed : embeds) {
+ assertTrue(embed.getText().length() > 20);
+ if (embed instanceof SlideShowExtractor) {
+ numPpt++;
+ } else if (embed instanceof ExcelExtractor) {
+ numXls++;
+ } else if (embed instanceof WordExtractor) {
+ numWord++;
+ } else if (embed instanceof OutlookTextExtactor) {
+ numMsg++;
+ } else if (embed instanceof XWPFWordExtractor) {
+ numWordX++;
+ }
+ }
+
+ final String actual = embeds.length+"-"+numWord+"-"+numXls+"-"+numPpt+"-"+numMsg+"-"+numWordX;
+ final String expected = (String)testObj[i+2];
+ assertEquals("invalid number of embeddings - "+testObj[i], expected, actual);
+ }
+ }
+
+ // TODO - PowerPoint
+ // TODO - Publisher
+ // TODO - Visio
+ }
+
+ private static final String[] EXPECTED_FAILURES = {
+ // password protected files
+ "spreadsheet/password.xls",
+ "spreadsheet/protected_passtika.xlsx",
+ "spreadsheet/51832.xls",
+ "document/PasswordProtected.doc",
+ "slideshow/Password_Protected-hello.ppt",
+ "slideshow/Password_Protected-56-hello.ppt",
+ "slideshow/Password_Protected-np-hello.ppt",
+ "slideshow/cryptoapi-proc2356.ppt",
+ //"document/bug53475-password-is-pass.docx",
+ //"document/bug53475-password-is-solrcell.docx",
+ "spreadsheet/xor-encryption-abc.xls",
+ "spreadsheet/35897-type4.xls",
+ //"poifs/protect.xlsx",
+ //"poifs/protected_sha512.xlsx",
+ //"poifs/extenxls_pwd123.xlsx",
+ //"poifs/protected_agile.docx",
+ "spreadsheet/58616.xlsx",
+
+ // TODO: fails XMLExportTest, is this ok?
+ "spreadsheet/CustomXMLMapping-singleattributenamespace.xlsx",
+ "spreadsheet/55864.xlsx",
+ "spreadsheet/57890.xlsx",
+
+ // TODO: these fail now with some NPE/file read error because we now try to compute every value via Cell.toString()!
+ "spreadsheet/44958.xls",
+ "spreadsheet/44958_1.xls",
+ "spreadsheet/testArraysAndTables.xls",
+
+ // TODO: good to ignore?
+ "spreadsheet/sample-beta.xlsx",
+
+ // This is actually a spreadsheet!
+ "hpsf/TestRobert_Flaherty.doc",
+
+ // some files that are broken, eg Word 95, ...
+ "spreadsheet/43493.xls",
+ "spreadsheet/46904.xls",
+ "document/Bug50955.doc",
+ "slideshow/PPT95.ppt",
+ "openxml4j/OPCCompliance_CoreProperties_DCTermsNamespaceLimitedUseFAIL.docx",
+ "openxml4j/OPCCompliance_CoreProperties_DoNotUseCompatibilityMarkupFAIL.docx",
+ "openxml4j/OPCCompliance_CoreProperties_LimitedXSITypeAttribute_NotPresentFAIL.docx",
+ "openxml4j/OPCCompliance_CoreProperties_LimitedXSITypeAttribute_PresentWithUnauthorizedValueFAIL.docx",
+ "openxml4j/OPCCompliance_CoreProperties_OnlyOneCorePropertiesPartFAIL.docx",
+ "openxml4j/OPCCompliance_CoreProperties_UnauthorizedXMLLangAttributeFAIL.docx",
+ "openxml4j/OPCCompliance_DerivedPartNameFAIL.docx",
+ "openxml4j/invalid.xlsx",
+ "spreadsheet/54764-2.xlsx", // see TestXSSFBugs.bug54764()
+ "spreadsheet/54764.xlsx", // see TestXSSFBugs.bug54764()
+ "spreadsheet/Simple.xlsb",
+ "poifs/unknown_properties.msg", // POIFS properties corrupted
+ "poifs/only-zero-byte-streams.ole2", // No actual contents
+ "spreadsheet/poc-xmlbomb.xlsx", // contains xml-entity-expansion
+ "spreadsheet/poc-xmlbomb-empty.xlsx", // contains xml-entity-expansion
+ "spreadsheet/poc-shared-strings.xlsx", // contains shared-string-entity-expansion
+
+ // old Excel files, which we only support simple text extraction of
+ "spreadsheet/testEXCEL_2.xls",
+ "spreadsheet/testEXCEL_3.xls",
+ "spreadsheet/testEXCEL_4.xls",
+ "spreadsheet/testEXCEL_5.xls",
+ "spreadsheet/testEXCEL_95.xls",
+
+ // OOXML Strict is not yet supported, see bug #57699
+ "spreadsheet/SampleSS.strict.xlsx",
+ "spreadsheet/SimpleStrict.xlsx",
+ "spreadsheet/sample.strict.xlsx",
+
+ // non-TNEF files
+ "ddf/Container.dat",
+ "ddf/47143.dat",
+
+ // sheet cloning errors
+ "spreadsheet/47813.xlsx",
+ "spreadsheet/56450.xls",
+ "spreadsheet/57231_MixedGasReport.xls",
+ "spreadsheet/OddStyleRecord.xls",
+ "spreadsheet/WithChartSheet.xlsx",
+ "spreadsheet/chart_sheet.xlsx",
+ };
+
+ @Test
+ public void testFileLeak() throws Exception {
+ // run a number of files that might fail in order to catch
+ // leaked file resources when using file-leak-detector while
+ // running the test
+
+ for(String file : EXPECTED_FAILURES) {
+ try {
+ ExtractorFactory.createExtractor(POIDataSamples.getSpreadSheetInstance().getFile(file));
+ } catch (Exception e) {
+ // catch all exceptions here as we are only interested in file-handle leaks
+ }
+ }
+ }
+
+ /**
+ * #59074 - Excel 95 files should give a helpful message, not just
+ * "No supported documents found in the OLE2 stream"
+ */
+ @Test(expected = OldExcelFormatException.class)
+ public void bug59074() throws Exception {
+ ExtractorFactory.createExtractor(
+ POIDataSamples.getSpreadSheetInstance().getFile("59074.xls"));
+ }
+
+ @SuppressWarnings("deprecation")
+ @Test(expected = IllegalStateException.class)
+ public void testGetEmbedFromXMLExtractor() {
+ // currently not implemented
+ ExtractorFactory.getEmbededDocsTextExtractors((POIXMLTextExtractor) null);
+ }
+
+ @SuppressWarnings("deprecation")
+ @Test(expected = IllegalStateException.class)
+ public void testGetEmbeddedFromXMLExtractor() {
+ // currently not implemented
+ ExtractorFactory.getEmbeddedDocsTextExtractors((POIXMLTextExtractor)null);
+ }
+
+ // This bug is currently open. This test will fail with "expected error not thrown" when the bug has been fixed.
+ // When this happens, change this from @Test(expected=...) to @Test
+ // bug 45565: text within TextBoxes is extracted by ExcelExtractor and WordExtractor
+ @Test(expected=AssertionError.class)
+ public void test45565() throws Exception {
+ try (POITextExtractor extractor = ExtractorFactory.createExtractor(HSSFTestDataSamples.getSampleFile("45565.xls"))) {
+ String text = extractor.getText();
+ assertContains(text, "testdoc");
+ assertContains(text, "test phrase");
+ }
+ }
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.ooxml;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertSame;
+import static org.junit.Assert.fail;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.lang.Thread.UncaughtExceptionHandler;
+import java.lang.reflect.InvocationTargetException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+
+import org.apache.poi.POIDataSamples;
+import org.apache.poi.ooxml.POIXMLDocumentPart.RelationPart;
+import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
+import org.apache.poi.openxml4j.exceptions.OpenXML4JRuntimeException;
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.openxml4j.opc.PackagePart;
+import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
+import org.apache.poi.util.IOUtils;
+import org.apache.poi.util.NullOutputStream;
+import org.apache.poi.ooxml.util.PackageHelper;
+import org.apache.poi.util.TempFile;
+import org.apache.poi.xslf.usermodel.XMLSlideShow;
+import org.apache.poi.xssf.usermodel.XSSFRelation;
+import org.apache.poi.xwpf.usermodel.XWPFRelation;
+import org.junit.Test;
+
+/**
+ * Test recursive read and write of OPC packages
+ */
+public final class TestPOIXMLDocument {
+
+ private static class OPCParser extends POIXMLDocument {
+
+ public OPCParser(OPCPackage pkg) {
+ super(pkg);
+ }
+
+ public OPCParser(OPCPackage pkg, String coreDocumentRel) {
+ super(pkg, coreDocumentRel);
+ }
+
+ @Override
+ public List<PackagePart> getAllEmbedds() {
+ throw new RuntimeException("not supported");
+ }
+
+ public void parse(POIXMLFactory factory) throws IOException{
+ load(factory);
+ }
+ }
+
+ private static final class TestFactory extends POIXMLFactory {
+
+ public TestFactory() {
+ //
+ }
+
+ @Override
+ protected POIXMLRelation getDescriptor(String relationshipType) {
+ return null;
+ }
+
+ /**
+ * @since POI 3.14-Beta1
+ */
+ @Override
+ protected POIXMLDocumentPart createDocumentPart
+ (Class<? extends POIXMLDocumentPart> cls, Class<?>[] classes, Object[] values)
+ throws SecurityException, NoSuchMethodException, InstantiationException, IllegalAccessException, InvocationTargetException {
+ return null;
+ }
+ }
+
+ private static void traverse(POIXMLDocument doc) throws IOException{
+ HashMap<String,POIXMLDocumentPart> context = new HashMap<>();
+ for (RelationPart p : doc.getRelationParts()){
+ traverse(p, context);
+ }
+ }
+
+ /**
+ * Recursively traverse a OOXML document and assert that same logical parts have the same physical instances
+ */
+ private static void traverse(RelationPart rp, HashMap<String,POIXMLDocumentPart> context) throws IOException{
+ POIXMLDocumentPart dp = rp.getDocumentPart();
+ assertEquals(rp.getRelationship().getTargetURI().toString(), dp.getPackagePart().getPartName().getName());
+
+ context.put(dp.getPackagePart().getPartName().getName(), dp);
+ for(RelationPart p : dp.getRelationParts()){
+ assertNotNull(p.getRelationship().toString());
+
+ String uri = p.getDocumentPart().getPackagePart().getPartName().getURI().toString();
+ assertEquals(uri, p.getRelationship().getTargetURI().toString());
+ if (!context.containsKey(uri)) {
+ traverse(p, context);
+ } else {
+ POIXMLDocumentPart prev = context.get(uri);
+ assertSame("Duplicate POIXMLDocumentPart instance for targetURI=" + uri, prev, p.getDocumentPart());
+ }
+ }
+ }
+
+ public void assertReadWrite(OPCPackage pkg1) throws Exception {
+
+ OPCParser doc = new OPCParser(pkg1);
+ doc.parse(new TestFactory());
+
+ traverse(doc);
+
+ File tmp = TempFile.createTempFile("poi-ooxml", ".tmp");
+ FileOutputStream out = new FileOutputStream(tmp);
+ doc.write(out);
+ out.close();
+
+ // Should not be able to write to an output stream that has been closed
+ try {
+ doc.write(out);
+ fail("Should not be able to write to an output stream that has been closed.");
+ } catch (final OpenXML4JRuntimeException e) {
+ // FIXME: A better exception class (IOException?) and message should be raised
+ // indicating that the document could not be written because the output stream is closed.
+ // see {@link org.apache.poi.openxml4j.opc.ZipPackage#saveImpl(java.io.OutputStream)}
+ if (e.getMessage().matches("Fail to save: an error occurs while saving the package : The part .+ failed to be saved in the stream with marshaller .+")) {
+ // expected
+ } else {
+ throw e;
+ }
+ }
+
+ // Should not be able to write a document that has been closed
+ doc.close();
+ try {
+ doc.write(new NullOutputStream());
+ fail("Should not be able to write a document that has been closed.");
+ } catch (final IOException e) {
+ if (e.getMessage().equals("Cannot write data, document seems to have been closed already")) {
+ // expected
+ } else {
+ throw e;
+ }
+ }
+
+ // Should be able to close a document multiple times, though subsequent closes will have no effect.
+ doc.close();
+
+
+ @SuppressWarnings("resource")
+ OPCPackage pkg2 = OPCPackage.open(tmp.getAbsolutePath());
+ doc = new OPCParser(pkg1);
+ try {
+ doc.parse(new TestFactory());
+ traverse(doc);
+
+ assertEquals(pkg1.getRelationships().size(), pkg2.getRelationships().size());
+
+ ArrayList<PackagePart> l1 = pkg1.getParts();
+ ArrayList<PackagePart> l2 = pkg2.getParts();
+
+ assertEquals(l1.size(), l2.size());
+ for (int i=0; i < l1.size(); i++){
+ PackagePart p1 = l1.get(i);
+ PackagePart p2 = l2.get(i);
+
+ assertEquals(p1.getContentType(), p2.getContentType());
+ assertEquals(p1.hasRelationships(), p2.hasRelationships());
+ if(p1.hasRelationships()){
+ assertEquals(p1.getRelationships().size(), p2.getRelationships().size());
+ }
+ assertEquals(p1.getPartName(), p2.getPartName());
+ }
+ } finally {
+ doc.close();
+ pkg1.close();
+ pkg2.close();
+ }
+ }
+
+ @Test
+ public void testPPTX() throws Exception {
+ POIDataSamples pds = POIDataSamples.getSlideShowInstance();
+ assertReadWrite(PackageHelper.open(pds.openResourceAsStream("PPTWithAttachments.pptm")));
+ }
+
+ @Test
+ public void testXLSX() throws Exception {
+ POIDataSamples pds = POIDataSamples.getSpreadSheetInstance();
+ assertReadWrite(PackageHelper.open(pds.openResourceAsStream("ExcelWithAttachments.xlsm")));
+ }
+
+ @Test
+ public void testDOCX() throws Exception {
+ POIDataSamples pds = POIDataSamples.getDocumentInstance();
+ assertReadWrite(PackageHelper.open(pds.openResourceAsStream("WordWithAttachments.docx")));
+ }
+
+ @Test
+ public void testRelationOrder() throws Exception {
+ POIDataSamples pds = POIDataSamples.getDocumentInstance();
+ @SuppressWarnings("resource")
+ OPCPackage pkg = PackageHelper.open(pds.openResourceAsStream("WordWithAttachments.docx"));
+ OPCParser doc = new OPCParser(pkg);
+ try {
+ doc.parse(new TestFactory());
+
+ for(POIXMLDocumentPart rel : doc.getRelations()){
+ //TODO finish me
+ assertNotNull(rel);
+ }
+ } finally {
+ doc.close();
+ }
+ }
+
+ @Test
+ public void testGetNextPartNumber() throws Exception {
+ POIDataSamples pds = POIDataSamples.getDocumentInstance();
+ @SuppressWarnings("resource")
+ OPCPackage pkg = PackageHelper.open(pds.openResourceAsStream("WordWithAttachments.docx"));
+ OPCParser doc = new OPCParser(pkg);
+ try {
+ doc.parse(new TestFactory());
+
+ // Non-indexed parts: Word is taken, Excel is not
+ assertEquals(-1, doc.getNextPartNumber(XWPFRelation.DOCUMENT, 0));
+ assertEquals(-1, doc.getNextPartNumber(XWPFRelation.DOCUMENT, -1));
+ assertEquals(-1, doc.getNextPartNumber(XWPFRelation.DOCUMENT, 99));
+ assertEquals(0, doc.getNextPartNumber(XSSFRelation.WORKBOOK, 0));
+ assertEquals(0, doc.getNextPartNumber(XSSFRelation.WORKBOOK, -1));
+ assertEquals(0, doc.getNextPartNumber(XSSFRelation.WORKBOOK, 99));
+
+ // Indexed parts:
+ // Has 2 headers
+ assertEquals(0, doc.getNextPartNumber(XWPFRelation.HEADER, 0));
+ assertEquals(3, doc.getNextPartNumber(XWPFRelation.HEADER, -1));
+ assertEquals(3, doc.getNextPartNumber(XWPFRelation.HEADER, 1));
+ assertEquals(8, doc.getNextPartNumber(XWPFRelation.HEADER, 8));
+
+ // Has no Excel Sheets
+ assertEquals(0, doc.getNextPartNumber(XSSFRelation.WORKSHEET, 0));
+ assertEquals(1, doc.getNextPartNumber(XSSFRelation.WORKSHEET, -1));
+ assertEquals(1, doc.getNextPartNumber(XSSFRelation.WORKSHEET, 1));
+ } finally {
+ doc.close();
+ }
+ }
+
+ @Test
+ public void testCommitNullPart() throws IOException, InvalidFormatException {
+ POIXMLDocumentPart part = new POIXMLDocumentPart();
+ part.prepareForCommit();
+ part.commit();
+ part.onSave(new HashSet<>());
+
+ assertNull(part.getRelationById(null));
+ assertNull(part.getRelationId(null));
+ assertFalse(part.removeRelation(null, true));
+ part.removeRelation((POIXMLDocumentPart)null);
+ assertEquals("",part.toString());
+ part.onDocumentCreate();
+ //part.getTargetPart(null);
+ }
+
+ @Test
+ public void testVSDX() throws Exception {
+ POIDataSamples pds = POIDataSamples.getDiagramInstance();
+ @SuppressWarnings("resource")
+ OPCPackage open = PackageHelper.open(pds.openResourceAsStream("test.vsdx"));
+ POIXMLDocument part = new OPCParser(open, PackageRelationshipTypes.VISIO_CORE_DOCUMENT);
+
+ assertNotNull(part);
+ assertEquals(0, part.getRelationCounter());
+ part.close();
+ }
+
+ @Test
+ public void testVSDXPart() throws IOException {
+ POIDataSamples pds = POIDataSamples.getDiagramInstance();
+ OPCPackage open = PackageHelper.open(pds.openResourceAsStream("test.vsdx"));
+
+ POIXMLDocumentPart part = new POIXMLDocumentPart(open, PackageRelationshipTypes.VISIO_CORE_DOCUMENT);
+
+ assertNotNull(part);
+ assertEquals(0, part.getRelationCounter());
+
+ open.close();
+ }
+
+ @Test(expected=POIXMLException.class)
+ public void testInvalidCoreRel() throws IOException {
+ POIDataSamples pds = POIDataSamples.getDiagramInstance();
+ OPCPackage open = PackageHelper.open(pds.openResourceAsStream("test.vsdx"));
+
+ try {
+ new POIXMLDocumentPart(open, "somethingillegal");
+ } finally {
+ open.close();
+ }
+ }
+
+ @Test
+ public void testOSGIClassLoading() {
+ // the schema type loader is cached per thread in POIXMLTypeLoader.
+ // So create a new Thread and change the context class loader (which would normally be used)
+ // to not contain the OOXML classes
+ Runnable run = new Runnable() {
+ public void run() {
+ InputStream is = POIDataSamples.getSlideShowInstance().openResourceAsStream("table_test.pptx");
+ XMLSlideShow ppt = null;
+ try {
+ ppt = new XMLSlideShow(is);
+ ppt.getSlides().get(0).getShapes();
+ } catch (IOException e) {
+ fail("failed to load XMLSlideShow");
+ } finally {
+ IOUtils.closeQuietly(ppt);
+ IOUtils.closeQuietly(is);
+ }
+ }
+ };
+
+ Thread thread = Thread.currentThread();
+ ClassLoader cl = thread.getContextClassLoader();
+ UncaughtHandler uh = new UncaughtHandler();
+
+ // check schema type loading and check if we could run in an OOM
+ Thread ta[] = new Thread[30];
+ for (int j=0; j<10; j++) {
+ for (int i=0; i<ta.length; i++) {
+ ta[i] = new Thread(run);
+ ta[i].setContextClassLoader(cl.getParent());
+ ta[i].setUncaughtExceptionHandler(uh);
+ ta[i].start();
+ }
+ for (int i=0; i<ta.length; i++) {
+ try {
+ ta[i].join();
+ } catch (InterruptedException e) {
+ fail("failed to join thread");
+ }
+ }
+ }
+ assertFalse(uh.hasException());
+ }
+
+ private static class UncaughtHandler implements UncaughtExceptionHandler {
+ Throwable e;
+
+ public synchronized void uncaughtException(Thread t, Throwable e) {
+ this.e = e;
+
+ }
+
+ public synchronized boolean hasException() {
+ return e != null;
+ }
+ }
+
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.ooxml;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.util.Calendar;
+import java.util.Date;
+
+import org.apache.poi.ooxml.POIXMLProperties.CoreProperties;
+import org.apache.poi.openxml4j.util.Nullable;
+import org.apache.poi.util.IOUtils;
+import org.apache.poi.util.LocaleUtil;
+import org.apache.poi.xssf.XSSFTestDataSamples;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+import org.apache.poi.xwpf.XWPFTestDataSamples;
+import org.apache.poi.xwpf.usermodel.XWPFDocument;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Ignore;
+import org.junit.Test;
+
+/**
+ * Test setting extended and custom OOXML properties
+ */
+public final class TestPOIXMLProperties {
+ private XWPFDocument sampleDoc;
+ private XWPFDocument sampleNoThumb;
+ private POIXMLProperties _props;
+ private CoreProperties _coreProperties;
+
+ @Before
+ public void setUp() throws IOException {
+ sampleDoc = XWPFTestDataSamples.openSampleDocument("documentProperties.docx");
+ sampleNoThumb = XWPFTestDataSamples.openSampleDocument("SampleDoc.docx");
+ assertNotNull(sampleDoc);
+ assertNotNull(sampleNoThumb);
+ _props = sampleDoc.getProperties();
+ _coreProperties = _props.getCoreProperties();
+ assertNotNull(_props);
+ }
+
+ @After
+ public void closeResources() throws Exception {
+ sampleDoc.close();
+ sampleNoThumb.close();
+ }
+
+ @Test
+ public void testWorkbookExtendedProperties() throws Exception {
+ XSSFWorkbook workbook = new XSSFWorkbook();
+ POIXMLProperties props = workbook.getProperties();
+ assertNotNull(props);
+
+ POIXMLProperties.ExtendedProperties properties =
+ props.getExtendedProperties();
+
+ org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties
+ ctProps = properties.getUnderlyingProperties();
+
+
+ String appVersion = "3.5 beta";
+ String application = "POI";
+
+ ctProps.setApplication(application);
+ ctProps.setAppVersion(appVersion);
+
+ XSSFWorkbook newWorkbook =
+ XSSFTestDataSamples.writeOutAndReadBack(workbook);
+ workbook.close();
+ assertTrue(workbook != newWorkbook);
+
+
+ POIXMLProperties newProps = newWorkbook.getProperties();
+ assertNotNull(newProps);
+ POIXMLProperties.ExtendedProperties newProperties =
+ newProps.getExtendedProperties();
+
+ assertEquals(application, newProperties.getApplication());
+ assertEquals(appVersion, newProperties.getAppVersion());
+
+ org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties
+ newCtProps = newProperties.getUnderlyingProperties();
+
+ assertEquals(application, newCtProps.getApplication());
+ assertEquals(appVersion, newCtProps.getAppVersion());
+
+ newWorkbook.close();
+ }
+
+
+ /**
+ * Test usermodel API for setting custom properties
+ */
+ @Test
+ public void testCustomProperties() throws Exception {
+ POIXMLDocument wb1 = new XSSFWorkbook();
+
+ POIXMLProperties.CustomProperties customProps = wb1.getProperties().getCustomProperties();
+ customProps.addProperty("test-1", "string val");
+ customProps.addProperty("test-2", 1974);
+ customProps.addProperty("test-3", 36.6);
+ //adding a duplicate
+ try {
+ customProps.addProperty("test-3", 36.6);
+ fail("expected exception");
+ } catch(IllegalArgumentException e){
+ assertEquals("A property with this name already exists in the custom properties", e.getMessage());
+ }
+ customProps.addProperty("test-4", true);
+
+ POIXMLDocument wb2 = XSSFTestDataSamples.writeOutAndReadBack((XSSFWorkbook)wb1);
+ wb1.close();
+ org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperties ctProps =
+ wb2.getProperties().getCustomProperties().getUnderlyingProperties();
+ assertEquals(4, ctProps.sizeOfPropertyArray());
+ org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperty p;
+
+ p = ctProps.getPropertyArray(0);
+ assertEquals("{D5CDD505-2E9C-101B-9397-08002B2CF9AE}", p.getFmtid());
+ assertEquals("test-1", p.getName());
+ assertEquals("string val", p.getLpwstr());
+ assertEquals(2, p.getPid());
+
+ p = ctProps.getPropertyArray(1);
+ assertEquals("{D5CDD505-2E9C-101B-9397-08002B2CF9AE}", p.getFmtid());
+ assertEquals("test-2", p.getName());
+ assertEquals(1974, p.getI4());
+ assertEquals(3, p.getPid());
+
+ p = ctProps.getPropertyArray(2);
+ assertEquals("{D5CDD505-2E9C-101B-9397-08002B2CF9AE}", p.getFmtid());
+ assertEquals("test-3", p.getName());
+ assertEquals(36.6, p.getR8(), 0);
+ assertEquals(4, p.getPid());
+
+ p = ctProps.getPropertyArray(3);
+ assertEquals("{D5CDD505-2E9C-101B-9397-08002B2CF9AE}", p.getFmtid());
+ assertEquals("test-4", p.getName());
+ assertEquals(true, p.getBool());
+ assertEquals(5, p.getPid());
+
+ wb2.close();
+ }
+
+ @Test
+ public void testDocumentProperties() {
+ String category = _coreProperties.getCategory();
+ assertEquals("test", category);
+ String contentStatus = "Draft";
+ _coreProperties.setContentStatus(contentStatus);
+ assertEquals("Draft", contentStatus);
+ Date created = _coreProperties.getCreated();
+ // the original file contains a following value: 2009-07-20T13:12:00Z
+ assertTrue(dateTimeEqualToUTCString(created, "2009-07-20T13:12:00Z"));
+ String creator = _coreProperties.getCreator();
+ assertEquals("Paolo Mottadelli", creator);
+ String subject = _coreProperties.getSubject();
+ assertEquals("Greetings", subject);
+ String title = _coreProperties.getTitle();
+ assertEquals("Hello World", title);
+ }
+
+ @Test
+ public void testTransitiveSetters() throws IOException {
+ XWPFDocument doc = new XWPFDocument();
+ CoreProperties cp = doc.getProperties().getCoreProperties();
+
+
+ Date dateCreated = LocaleUtil.getLocaleCalendar(2010, 6, 15, 10, 0, 0).getTime();
+ cp.setCreated(new Nullable<>(dateCreated));
+ assertEquals(dateCreated, cp.getCreated());
+
+ XWPFDocument doc2 = XWPFTestDataSamples.writeOutAndReadBack(doc);
+ doc.close();
+ cp = doc2.getProperties().getCoreProperties();
+ Date dt3 = cp.getCreated();
+ assertEquals(dateCreated, dt3);
+ doc2.close();
+ }
+
+ @Test
+ public void testGetSetRevision() {
+ String revision = _coreProperties.getRevision();
+ assertTrue("Revision number is 1", Integer.parseInt(revision) > 1);
+ _coreProperties.setRevision("20");
+ assertEquals("20", _coreProperties.getRevision());
+ _coreProperties.setRevision("20xx");
+ assertEquals("20", _coreProperties.getRevision());
+ }
+
+ @Test
+ public void testLastModifiedByUserProperty() {
+ String lastModifiedByUser = _coreProperties.getLastModifiedByUser();
+ assertEquals("Paolo Mottadelli", lastModifiedByUser);
+ _coreProperties.setLastModifiedByUser("Test User");
+ assertEquals("Test User", _coreProperties.getLastModifiedByUser());
+ }
+
+ public static boolean dateTimeEqualToUTCString(Date dateTime, String utcString) {
+ Calendar utcCalendar = LocaleUtil.getLocaleCalendar(LocaleUtil.TIMEZONE_UTC);
+ utcCalendar.setTimeInMillis(dateTime.getTime());
+ String dateTimeUtcString = utcCalendar.get(Calendar.YEAR) + "-" +
+ zeroPad((utcCalendar.get(Calendar.MONTH)+1)) + "-" +
+ zeroPad(utcCalendar.get(Calendar.DAY_OF_MONTH)) + "T" +
+ zeroPad(utcCalendar.get(Calendar.HOUR_OF_DAY)) + ":" +
+ zeroPad(utcCalendar.get(Calendar.MINUTE)) + ":" +
+ zeroPad(utcCalendar.get(Calendar.SECOND)) + "Z";
+
+ return utcString.equals(dateTimeUtcString);
+ }
+
+ @Ignore("Fails to add some of the thumbnails, needs more investigation")
+ @Test
+ public void testThumbnails() throws Exception {
+ POIXMLProperties noThumbProps = sampleNoThumb.getProperties();
+
+ assertNotNull(_props.getThumbnailPart());
+ assertNull(noThumbProps.getThumbnailPart());
+
+ assertNotNull(_props.getThumbnailFilename());
+ assertNull(noThumbProps.getThumbnailFilename());
+
+ assertNotNull(_props.getThumbnailImage());
+ assertNull(noThumbProps.getThumbnailImage());
+
+ assertEquals("/thumbnail.jpeg", _props.getThumbnailFilename());
+
+
+ // Adding / changing
+ ByteArrayInputStream imageData = new ByteArrayInputStream(new byte[1]);
+ noThumbProps.setThumbnail("Testing.png", imageData);
+ assertNotNull(noThumbProps.getThumbnailPart());
+ assertEquals("/Testing.png", noThumbProps.getThumbnailFilename());
+ assertNotNull(noThumbProps.getThumbnailImage());
+ assertEquals(1, IOUtils.toByteArray(noThumbProps.getThumbnailImage()).length);
+
+ imageData = new ByteArrayInputStream(new byte[2]);
+ noThumbProps.setThumbnail("Testing2.png", imageData);
+ assertNotNull(noThumbProps.getThumbnailPart());
+ assertEquals("/Testing.png", noThumbProps.getThumbnailFilename());
+ assertNotNull(noThumbProps.getThumbnailImage());
+ assertEquals(2, IOUtils.toByteArray(noThumbProps.getThumbnailImage()).length);
+ }
+
+ private static String zeroPad(long i) {
+ if (i >= 0 && i <=9) {
+ return "0" + i;
+ } else {
+ return String.valueOf(i);
+ }
+ }
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.ooxml.util;
+
+import java.io.File;
+import java.io.IOException;
+import java.lang.reflect.Field;
+import java.lang.reflect.Method;
+import java.net.URL;
+import java.security.AccessController;
+import java.security.CodeSource;
+import java.security.PrivilegedAction;
+import java.security.ProtectionDomain;
+import java.util.ArrayList;
+import java.util.Enumeration;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.Vector;
+import java.util.jar.JarEntry;
+import java.util.jar.JarFile;
+import java.util.regex.Pattern;
+
+import junit.framework.TestCase;
+
+import org.apache.poi.util.IOUtils;
+import org.apache.poi.util.StringUtil;
+import org.apache.poi.util.SuppressForbidden;
+import org.junit.Test;
+import org.junit.internal.TextListener;
+import org.junit.runner.Description;
+import org.junit.runner.JUnitCore;
+import org.junit.runner.Result;
+
+/**
+ * Build a 'lite' version of the ooxml-schemas.jar
+ *
+ * @author Yegor Kozlov
+ */
+public final class OOXMLLite {
+ private static final Pattern SCHEMA_PATTERN = Pattern.compile("schemaorg_apache_xmlbeans/(system|element)/.*\\.xsb");
+
+ /**
+ * Destination directory to copy filtered classes
+ */
+ private File _destDest;
+
+ /**
+ * Directory with the compiled ooxml tests
+ */
+ private File _testDir;
+
+ /**
+ * Reference to the ooxml-schemas.jar
+ */
+ private File _ooxmlJar;
+
+
+ OOXMLLite(String dest, String test, String ooxmlJar) {
+ _destDest = new File(dest);
+ _testDir = new File(test);
+ _ooxmlJar = new File(ooxmlJar);
+ }
+
+ public static void main(String[] args) throws IOException {
+ System.out.println("Free memory (bytes): " +
+ Runtime.getRuntime().freeMemory());
+ long maxMemory = Runtime.getRuntime().maxMemory();
+ System.out.println("Maximum memory (bytes): " +
+ (maxMemory == Long.MAX_VALUE ? "no limit" : maxMemory));
+ System.out.println("Total memory (bytes): " +
+ Runtime.getRuntime().totalMemory());
+
+ String dest = null, test = null, ooxml = null;
+
+ for (int i = 0; i < args.length; i++) {
+ switch (args[i]) {
+ case "-dest":
+ dest = args[++i];
+ break;
+ case "-test":
+ test = args[++i];
+ break;
+ case "-ooxml":
+ ooxml = args[++i];
+ break;
+ }
+ }
+ OOXMLLite builder = new OOXMLLite(dest, test, ooxml);
+ builder.build();
+ }
+
+ void build() throws IOException {
+ List<Class<?>> lst = new ArrayList<>();
+ //collect unit tests
+ String exclude = StringUtil.join("|",
+ "BaseTestXWorkbook",
+ "BaseTestXSheet",
+ "BaseTestXRow",
+ "BaseTestXCell",
+ "BaseTestXSSFPivotTable",
+ "TestSXSSFWorkbook\\$\\d",
+ "TestUnfixedBugs",
+ "MemoryUsage",
+ "TestDataProvider",
+ "TestDataSamples",
+ "All.+Tests",
+ "ZipFileAssert",
+ "AesZipFileZipEntrySource",
+ "TempFileRecordingSXSSFWorkbookWithCustomZipEntrySource",
+ "PkiTestUtils",
+ "TestCellFormatPart\\$\\d",
+ "TestSignatureInfo\\$\\d",
+ "TestCertificateEncryption\\$CertData",
+ "TestPOIXMLDocument\\$OPCParser",
+ "TestPOIXMLDocument\\$TestFactory",
+ "TestXSLFTextParagraph\\$DrawTextParagraphProxy",
+ "TestXSSFExportToXML\\$\\d",
+ "TestXSSFExportToXML\\$DummyEntityResolver",
+ "TestFormulaEvaluatorOnXSSF\\$Result",
+ "TestFormulaEvaluatorOnXSSF\\$SS",
+ "TestMultiSheetFormulaEvaluatorOnXSSF\\$Result",
+ "TestMultiSheetFormulaEvaluatorOnXSSF\\$SS",
+ "TestXSSFBugs\\$\\d",
+ "AddImageBench",
+ "AddImageBench_jmhType_B\\d",
+ "AddImageBench_benchCreatePicture_jmhTest",
+ "TestEvilUnclosedBRFixingInputStream\\$EvilUnclosedBRFixingInputStream",
+ "TempFileRecordingSXSSFWorkbookWithCustomZipEntrySource\\$TempFileRecordingSheetDataWriterWithDecorator",
+ "TestXSSFBReader\\$1",
+ "TestXSSFBReader\\$TestSheetHandler",
+ "TestFormulaEvaluatorOnXSSF\\$1",
+ "TestMultiSheetFormulaEvaluatorOnXSSF\\$1",
+ "TestZipPackagePropertiesMarshaller\\$1",
+ "SLCommonUtils",
+ "TestPPTX2PNG\\$1",
+ "TestMatrixFormulasFromXMLSpreadsheet\\$1",
+ "TestMatrixFormulasFromXMLSpreadsheet\\$Navigator",
+ "TestPOIXMLDocument\\$UncaughtHandler",
+ "TestOleShape\\$Api",
+ "TestOleShape\\$1",
+ "TestPOIXMLDocument\\$1",
+ "TestXMLSlideShow\\$1",
+ "TestXMLSlideShow\\$BufAccessBAOS",
+ "TestXDDFChart\\$1",
+ "TestOOXMLLister\\$1",
+ "TestOOXMLPrettyPrint\\$1"
+ );
+ System.out.println("Collecting unit tests from " + _testDir);
+ collectTests(_testDir, _testDir, lst, ".+.class$", ".+(" + exclude + ").class");
+ System.out.println("Found " + lst.size() + " classes");
+
+ //run tests
+ JUnitCore jUnitCore = new JUnitCore();
+ jUnitCore.addListener(new TextListener(System.out) {
+ private final Set<String> classes = new HashSet<>();
+ private int count;
+
+ @Override
+ public void testStarted(Description description) {
+ // count how many test-classes we already saw
+ classes.add(description.getClassName());
+ count++;
+ if(count % 100 == 0) {
+ System.out.println();
+ System.out.println(classes.size() + "/" + lst.size() + ": " + description.getDisplayName());
+ }
+
+ super.testStarted(description);
+ }
+ });
+ Result result = jUnitCore.run(lst.toArray(new Class<?>[0]));
+ if (!result.wasSuccessful()) {
+ throw new RuntimeException("Tests did not succeed, cannot build ooxml-lite jar");
+ }
+
+ //see what classes from the ooxml-schemas.jar are loaded
+ System.out.println("Copying classes to " + _destDest);
+ Map<String, Class<?>> classes = getLoadedClasses(_ooxmlJar.getName());
+ for (Class<?> cls : classes.values()) {
+ String className = cls.getName();
+ String classRef = className.replace('.', '/') + ".class";
+ File destFile = new File(_destDest, classRef);
+ IOUtils.copy(cls.getResourceAsStream('/' + classRef), destFile);
+
+ if(cls.isInterface()){
+ /// Copy classes and interfaces declared as members of this class
+ for(Class<?> fc : cls.getDeclaredClasses()){
+ className = fc.getName();
+ classRef = className.replace('.', '/') + ".class";
+ destFile = new File(_destDest, classRef);
+ IOUtils.copy(fc.getResourceAsStream('/' + classRef), destFile);
+ }
+ }
+ }
+
+ //finally copy the compiled .xsb files
+ System.out.println("Copying .xsb resources");
+ try (JarFile jar = new JarFile(_ooxmlJar)) {
+ for (Enumeration<JarEntry> e = jar.entries(); e.hasMoreElements(); ) {
+ JarEntry je = e.nextElement();
+ if (SCHEMA_PATTERN.matcher(je.getName()).matches()) {
+ File destFile = new File(_destDest, je.getName());
+ IOUtils.copy(jar.getInputStream(je), destFile);
+ }
+ }
+ }
+ }
+
+ private static boolean checkForTestAnnotation(Class<?> testclass) {
+ for (Method m : testclass.getDeclaredMethods()) {
+ if(m.isAnnotationPresent(Test.class)) {
+ return true;
+ }
+ }
+
+ // also check super classes
+ if(testclass.getSuperclass() != null) {
+ for (Method m : testclass.getSuperclass().getDeclaredMethods()) {
+ if(m.isAnnotationPresent(Test.class)) {
+ return true;
+ }
+ }
+ }
+
+ System.out.println("Class " + testclass.getName() + " does not derive from TestCase and does not have a @Test annotation");
+
+ // Should we also look at superclasses to find cases
+ // where we have abstract base classes with derived tests?
+ // if(checkForTestAnnotation(testclass.getSuperclass())) return true;
+
+ return false;
+ }
+
+ /**
+ * Recursively collect classes from the supplied directory
+ *
+ * @param arg the directory to search in
+ * @param out output
+ * @param ptrn the pattern (regexp) to filter found files
+ */
+ private static void collectTests(File root, File arg, List<Class<?>> out, String ptrn, String exclude) {
+ if (arg.isDirectory()) {
+ File files[] = arg.listFiles();
+ if (files != null) {
+ for (File f : files) {
+ collectTests(root, f, out, ptrn, exclude);
+ }
+ }
+ } else {
+ String path = arg.getAbsolutePath();
+ String prefix = root.getAbsolutePath();
+ String cls = path.substring(prefix.length() + 1).replace(File.separator, ".");
+ if(!cls.matches(ptrn)) return;
+ if (cls.matches(exclude)) return;
+ //ignore inner classes defined in tests
+ if (cls.indexOf('$') != -1) {
+ System.out.println("Inner class " + cls + " not included");
+ return;
+ }
+
+ cls = cls.replace(".class", "");
+
+ try {
+ Class<?> testclass = Class.forName(cls);
+ if (TestCase.class.isAssignableFrom(testclass)
+ || checkForTestAnnotation(testclass)) {
+ out.add(testclass);
+ }
+ } catch (Throwable e) { // NOSONAR
+ System.out.println("Class " + cls + " is not in classpath");
+ }
+ }
+ }
+
+ /**
+ *
+ * @param ptrn the pattern to filter output
+ * @return the classes loaded by the system class loader keyed by class name
+ */
+ @SuppressWarnings("unchecked")
+ private static Map<String, Class<?>> getLoadedClasses(String ptrn) {
+ // make the field accessible, we defer this from static initialization to here to
+ // allow JDKs which do not have this field (e.g. IBM JDK) to at least load the class
+ // without failing, see https://issues.apache.org/bugzilla/show_bug.cgi?id=56550
+ final Field _classes = AccessController.doPrivileged(new PrivilegedAction<Field>() {
+ @SuppressForbidden("TODO: Reflection works until Java 8 on Oracle/Sun JDKs, but breaks afterwards (different classloader types, access checks)")
+ public Field run() {
+ try {
+ Field fld = ClassLoader.class.getDeclaredField("classes");
+ fld.setAccessible(true);
+ return fld;
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+
+ }
+ });
+
+ ClassLoader appLoader = ClassLoader.getSystemClassLoader();
+ try {
+ Vector<Class<?>> classes = (Vector<Class<?>>) _classes.get(appLoader);
+ Map<String, Class<?>> map = new HashMap<>();
+ for (Class<?> cls : classes) {
+ // e.g. proxy-classes, ...
+ ProtectionDomain pd = cls.getProtectionDomain();
+ if (pd == null) continue;
+ CodeSource cs = pd.getCodeSource();
+ if (cs == null) continue;
+ URL loc = cs.getLocation();
+ if (loc == null) continue;
+
+ String jar = loc.toString();
+ if (jar.contains(ptrn)) {
+ map.put(cls.getName(), cls);
+ }
+ }
+ return map;
+ } catch (IllegalAccessException e) {
+ throw new RuntimeException(e);
+ }
+ }
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.ooxml.util;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNotSame;
+import static org.junit.Assert.assertTrue;
+
+import java.io.ByteArrayInputStream;
+
+import javax.xml.XMLConstants;
+
+import org.junit.Test;
+import org.xml.sax.InputSource;
+import org.xml.sax.XMLReader;
+
+public class TestSAXHelper {
+ @Test
+ public void testXMLReader() throws Exception {
+ XMLReader reader = SAXHelper.newXMLReader();
+ assertNotSame(reader, SAXHelper.newXMLReader());
+ assertTrue(reader.getFeature(XMLConstants.FEATURE_SECURE_PROCESSING));
+ assertEquals(SAXHelper.IGNORING_ENTITY_RESOLVER, reader.getEntityResolver());
+ assertNotNull(reader.getProperty("http://www.oracle.com/xml/jaxp/properties/entityExpansionLimit"));
+ assertEquals("4096", reader.getProperty("http://www.oracle.com/xml/jaxp/properties/entityExpansionLimit"));
+ assertNotNull(reader.getProperty("http://apache.org/xml/properties/security-manager"));
+
+ reader.parse(new InputSource(new ByteArrayInputStream("<xml></xml>".getBytes("UTF-8"))));
+ }
+}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.util;
-
-import javax.xml.XMLConstants;
-
-import org.junit.Test;
-import org.xml.sax.InputSource;
-import org.xml.sax.XMLReader;
-
-import java.io.ByteArrayInputStream;
-
-import static org.junit.Assert.*;
-
-public class TestSAXHelper {
- @Test
- public void testXMLReader() throws Exception {
- XMLReader reader = SAXHelper.newXMLReader();
- assertNotSame(reader, SAXHelper.newXMLReader());
- assertTrue(reader.getFeature(XMLConstants.FEATURE_SECURE_PROCESSING));
- assertEquals(SAXHelper.IGNORING_ENTITY_RESOLVER, reader.getEntityResolver());
- assertNotNull(reader.getProperty("http://www.oracle.com/xml/jaxp/properties/entityExpansionLimit"));
- assertEquals("4096", reader.getProperty("http://www.oracle.com/xml/jaxp/properties/entityExpansionLimit"));
- assertNotNull(reader.getProperty("http://apache.org/xml/properties/security-manager"));
-
- reader.parse(new InputSource(new ByteArrayInputStream("<xml></xml>".getBytes("UTF-8"))));
- }
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi;
-
-import java.io.File;
-import java.io.OutputStream;
-
-import org.apache.poi.poifs.filesystem.DirectoryNode;
-import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
-import org.apache.poi.poifs.filesystem.OPOIFSFileSystem;
-import org.apache.poi.poifs.filesystem.POIFSFileSystem;
-
-
-/**
- * This holds the common functionality for all read-only
- * POI Document classes, i.e. ones which don't support writing.
- *
- * @since POI 3.15 beta 3
- */
-public abstract class POIReadOnlyDocument extends POIDocument {
- public POIReadOnlyDocument(DirectoryNode dir) {
- super(dir);
- }
- public POIReadOnlyDocument(NPOIFSFileSystem fs) {
- super(fs);
- }
- public POIReadOnlyDocument(OPOIFSFileSystem fs) {
- super(fs);
- }
- public POIReadOnlyDocument(POIFSFileSystem fs) {
- super(fs);
- }
-
- /**
- * Note - writing is not yet supported for this file format, sorry.
- *
- * @throws IllegalStateException If you call the method, as writing is not supported
- */
- @Override
- public void write() {
- throw new IllegalStateException("Writing is not yet implemented for this Document Format");
- }
- /**
- * Note - writing is not yet supported for this file format, sorry.
- *
- * @throws IllegalStateException If you call the method, as writing is not supported
- */
- @Override
- public void write(File file) {
- throw new IllegalStateException("Writing is not yet implemented for this Document Format");
- }
- /**
- * Note - writing is not yet supported for this file format, sorry.
- *
- * @throws IllegalStateException If you call the method, as writing is not supported
- */
- @Override
- public void write(OutputStream out) {
- throw new IllegalStateException("Writing is not yet implemented for this Document Format");
- }
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.extractor;
-
-import java.io.ByteArrayInputStream;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.Iterator;
-import java.util.List;
-
-import org.apache.poi.POIOLE2TextExtractor;
-import org.apache.poi.POITextExtractor;
-import org.apache.poi.hdgf.extractor.VisioTextExtractor;
-import org.apache.poi.hpbf.extractor.PublisherTextExtractor;
-import org.apache.poi.hslf.extractor.PowerPointExtractor;
-import org.apache.poi.hslf.usermodel.HSLFSlideShow;
-import org.apache.poi.hsmf.MAPIMessage;
-import org.apache.poi.hsmf.datatypes.AttachmentChunks;
-import org.apache.poi.hsmf.extractor.OutlookTextExtactor;
-import org.apache.poi.hwpf.OldWordFileFormatException;
-import org.apache.poi.hwpf.extractor.Word6Extractor;
-import org.apache.poi.hwpf.extractor.WordExtractor;
-import org.apache.poi.poifs.filesystem.DirectoryEntry;
-import org.apache.poi.poifs.filesystem.DirectoryNode;
-import org.apache.poi.poifs.filesystem.Entry;
-import org.apache.poi.sl.extractor.SlideShowExtractor;
-import org.apache.poi.sl.usermodel.SlideShowFactory;
-
-/**
- * Scratchpad-specific logic for {@link OLE2ExtractorFactory} and
- * {@link org.apache.poi.extractor.ExtractorFactory}, which permit the other two to run with
- * no Scratchpad jar (though without functionality!)
- * <p>Note - should not be used standalone, always use via the other
- * two classes</p>
- */
-@SuppressWarnings("WeakerAccess")
-public class OLE2ScratchpadExtractorFactory {
- /**
- * Look for certain entries in the stream, to figure it
- * out what format is desired
- * Note - doesn't check for core-supported formats!
- * Note - doesn't check for OOXML-supported formats
- */
- public static POITextExtractor createExtractor(DirectoryNode poifsDir) throws IOException {
- if (poifsDir.hasEntry("WordDocument")) {
- // Old or new style word document?
- try {
- return new WordExtractor(poifsDir);
- } catch (OldWordFileFormatException e) {
- return new Word6Extractor(poifsDir);
- }
- }
-
- if (poifsDir.hasEntry(HSLFSlideShow.POWERPOINT_DOCUMENT)) {
- return new SlideShowExtractor(SlideShowFactory.create(poifsDir));
- }
-
- if (poifsDir.hasEntry("VisioDocument")) {
- return new VisioTextExtractor(poifsDir);
- }
-
- if (poifsDir.hasEntry("Quill")) {
- return new PublisherTextExtractor(poifsDir);
- }
-
- final String[] outlookEntryNames = new String[] {
- // message bodies, saved as plain text (PtypString)
- // The first short (0x1000, 0x0047, 0x0037) refer to the Property ID (see [MS-OXPROPS].pdf)
- // the second short (0x001e, 0x001f, 0x0102) refer to the type of data stored in this entry
- // https://msdn.microsoft.com/endatatypes.Ex-us/library/cc433490(v=exchg.80).aspx
- // @see org.apache.poi.hsmf.Types.MAPIType
- "__substg1.0_1000001E", //PidTagBody ASCII
- "__substg1.0_1000001F", //PidTagBody Unicode
- "__substg1.0_0047001E", //PidTagMessageSubmissionId ASCII
- "__substg1.0_0047001F", //PidTagMessageSubmissionId Unicode
- "__substg1.0_0037001E", //PidTagSubject ASCII
- "__substg1.0_0037001F", //PidTagSubject Unicode
- };
- for (String entryName : outlookEntryNames) {
- if (poifsDir.hasEntry(entryName)) {
- return new OutlookTextExtactor(poifsDir);
- }
- }
-
- throw new IllegalArgumentException("No supported documents found in the OLE2 stream");
- }
-
- /**
- * Returns an array of text extractors, one for each of
- * the embedded documents in the file (if there are any).
- * If there are no embedded documents, you'll get back an
- * empty array. Otherwise, you'll get one open
- * {@link POITextExtractor} for each embedded file.
- */
- public static void identifyEmbeddedResources(POIOLE2TextExtractor ext, List<Entry> dirs, List<InputStream> nonPOIFS) throws IOException {
- // Find all the embedded directories
- DirectoryEntry root = ext.getRoot();
- if (root == null) {
- throw new IllegalStateException("The extractor didn't know which POIFS it came from!");
- }
-
- if (ext instanceof WordExtractor) {
- // These are in ObjectPool -> _... under the root
- try {
- DirectoryEntry op = (DirectoryEntry)
- root.getEntry("ObjectPool");
- Iterator<Entry> it = op.getEntries();
- while(it.hasNext()) {
- Entry entry = it.next();
- if(entry.getName().startsWith("_")) {
- dirs.add(entry);
- }
- }
- } catch(FileNotFoundException e) {
- // ignored here
- }
- //} else if(ext instanceof PowerPointExtractor) {
- // Tricky, not stored directly in poifs
- // TODO
- } else if (ext instanceof OutlookTextExtactor) {
- // Stored in the Attachment blocks
- MAPIMessage msg = ((OutlookTextExtactor)ext).getMAPIMessage();
- for (AttachmentChunks attachment : msg.getAttachmentFiles()) {
- if (attachment.getAttachData() != null) {
- byte[] data = attachment.getAttachData().getValue();
- nonPOIFS.add( new ByteArrayInputStream(data) );
- } else if (attachment.getAttachmentDirectory() != null) {
- dirs.add(attachment.getAttachmentDirectory().getDirectory());
- }
- }
- }
- }
-}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.extractor.ole2;
+
+import java.io.ByteArrayInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.poi.extractor.POIOLE2TextExtractor;
+import org.apache.poi.extractor.POITextExtractor;
+import org.apache.poi.extractor.OLE2ExtractorFactory;
+import org.apache.poi.hdgf.extractor.VisioTextExtractor;
+import org.apache.poi.hpbf.extractor.PublisherTextExtractor;
+import org.apache.poi.hslf.usermodel.HSLFSlideShow;
+import org.apache.poi.hsmf.MAPIMessage;
+import org.apache.poi.hsmf.datatypes.AttachmentChunks;
+import org.apache.poi.hsmf.extractor.OutlookTextExtactor;
+import org.apache.poi.hwpf.OldWordFileFormatException;
+import org.apache.poi.hwpf.extractor.Word6Extractor;
+import org.apache.poi.hwpf.extractor.WordExtractor;
+import org.apache.poi.poifs.filesystem.DirectoryEntry;
+import org.apache.poi.poifs.filesystem.DirectoryNode;
+import org.apache.poi.poifs.filesystem.Entry;
+import org.apache.poi.sl.extractor.SlideShowExtractor;
+import org.apache.poi.sl.usermodel.SlideShowFactory;
+
+/**
+ * Scratchpad-specific logic for {@link OLE2ExtractorFactory} and
+ * {@link org.apache.poi.extractor.ExtractorFactory}, which permit the other two to run with
+ * no Scratchpad jar (though without functionality!)
+ * <p>Note - should not be used standalone, always use via the other
+ * two classes</p>
+ */
+@SuppressWarnings("WeakerAccess")
+public class OLE2ScratchpadExtractorFactory {
+ /**
+ * Look for certain entries in the stream, to figure it
+ * out what format is desired
+ * Note - doesn't check for core-supported formats!
+ * Note - doesn't check for OOXML-supported formats
+ */
+ public static POITextExtractor createExtractor(DirectoryNode poifsDir) throws IOException {
+ if (poifsDir.hasEntry("WordDocument")) {
+ // Old or new style word document?
+ try {
+ return new WordExtractor(poifsDir);
+ } catch (OldWordFileFormatException e) {
+ return new Word6Extractor(poifsDir);
+ }
+ }
+
+ if (poifsDir.hasEntry(HSLFSlideShow.POWERPOINT_DOCUMENT)) {
+ return new SlideShowExtractor(SlideShowFactory.create(poifsDir));
+ }
+
+ if (poifsDir.hasEntry("VisioDocument")) {
+ return new VisioTextExtractor(poifsDir);
+ }
+
+ if (poifsDir.hasEntry("Quill")) {
+ return new PublisherTextExtractor(poifsDir);
+ }
+
+ final String[] outlookEntryNames = new String[] {
+ // message bodies, saved as plain text (PtypString)
+ // The first short (0x1000, 0x0047, 0x0037) refer to the Property ID (see [MS-OXPROPS].pdf)
+ // the second short (0x001e, 0x001f, 0x0102) refer to the type of data stored in this entry
+ // https://msdn.microsoft.com/endatatypes.Ex-us/library/cc433490(v=exchg.80).aspx
+ // @see org.apache.poi.hsmf.Types.MAPIType
+ "__substg1.0_1000001E", //PidTagBody ASCII
+ "__substg1.0_1000001F", //PidTagBody Unicode
+ "__substg1.0_0047001E", //PidTagMessageSubmissionId ASCII
+ "__substg1.0_0047001F", //PidTagMessageSubmissionId Unicode
+ "__substg1.0_0037001E", //PidTagSubject ASCII
+ "__substg1.0_0037001F", //PidTagSubject Unicode
+ };
+ for (String entryName : outlookEntryNames) {
+ if (poifsDir.hasEntry(entryName)) {
+ return new OutlookTextExtactor(poifsDir);
+ }
+ }
+
+ throw new IllegalArgumentException("No supported documents found in the OLE2 stream");
+ }
+
+ /**
+ * Returns an array of text extractors, one for each of
+ * the embedded documents in the file (if there are any).
+ * If there are no embedded documents, you'll get back an
+ * empty array. Otherwise, you'll get one open
+ * {@link POITextExtractor} for each embedded file.
+ */
+ public static void identifyEmbeddedResources(POIOLE2TextExtractor ext, List<Entry> dirs, List<InputStream> nonPOIFS) throws IOException {
+ // Find all the embedded directories
+ DirectoryEntry root = ext.getRoot();
+ if (root == null) {
+ throw new IllegalStateException("The extractor didn't know which POIFS it came from!");
+ }
+
+ if (ext instanceof WordExtractor) {
+ // These are in ObjectPool -> _... under the root
+ try {
+ DirectoryEntry op = (DirectoryEntry)
+ root.getEntry("ObjectPool");
+ Iterator<Entry> it = op.getEntries();
+ while(it.hasNext()) {
+ Entry entry = it.next();
+ if(entry.getName().startsWith("_")) {
+ dirs.add(entry);
+ }
+ }
+ } catch(FileNotFoundException e) {
+ // ignored here
+ }
+ //} else if(ext instanceof PowerPointExtractor) {
+ // Tricky, not stored directly in poifs
+ // TODO
+ } else if (ext instanceof OutlookTextExtactor) {
+ // Stored in the Attachment blocks
+ MAPIMessage msg = ((OutlookTextExtactor)ext).getMAPIMessage();
+ for (AttachmentChunks attachment : msg.getAttachmentFiles()) {
+ if (attachment.getAttachData() != null) {
+ byte[] data = attachment.getAttachData().getValue();
+ nonPOIFS.add( new ByteArrayInputStream(data) );
+ } else if (attachment.getAttachmentDirectory() != null) {
+ dirs.add(attachment.getAttachmentDirectory().getDirectory());
+ }
+ }
+ }
+ }
+}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.hssf.converter;
-
-import org.apache.poi.util.Beta;
-
-@Beta
-public class ExcelToFoUtils extends AbstractExcelUtils
-{
-
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-
-package org.apache.poi.hssf.usermodel;
-
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-
-import org.apache.poi.hssf.record.BOFRecord;
-import org.apache.poi.hssf.record.DimensionsRecord;
-import org.apache.poi.hssf.record.EOFRecord;
-import org.apache.poi.hssf.record.FooterRecord;
-import org.apache.poi.hssf.record.HCenterRecord;
-import org.apache.poi.hssf.record.HeaderRecord;
-import org.apache.poi.hssf.record.PrintSetupRecord;
-import org.apache.poi.hssf.record.ProtectRecord;
-import org.apache.poi.hssf.record.Record;
-import org.apache.poi.hssf.record.RecordBase;
-import org.apache.poi.hssf.record.SCLRecord;
-import org.apache.poi.hssf.record.UnknownRecord;
-import org.apache.poi.hssf.record.VCenterRecord;
-import org.apache.poi.hssf.record.chart.AreaFormatRecord;
-import org.apache.poi.hssf.record.chart.AxisLineFormatRecord;
-import org.apache.poi.hssf.record.chart.AxisOptionsRecord;
-import org.apache.poi.hssf.record.chart.AxisParentRecord;
-import org.apache.poi.hssf.record.chart.AxisRecord;
-import org.apache.poi.hssf.record.chart.AxisUsedRecord;
-import org.apache.poi.hssf.record.chart.BarRecord;
-import org.apache.poi.hssf.record.chart.BeginRecord;
-import org.apache.poi.hssf.record.chart.CategorySeriesAxisRecord;
-import org.apache.poi.hssf.record.chart.ChartFormatRecord;
-import org.apache.poi.hssf.record.chart.ChartRecord;
-import org.apache.poi.hssf.record.chart.ChartTitleFormatRecord;
-import org.apache.poi.hssf.record.chart.DataFormatRecord;
-import org.apache.poi.hssf.record.chart.DefaultDataLabelTextPropertiesRecord;
-import org.apache.poi.hssf.record.chart.EndRecord;
-import org.apache.poi.hssf.record.chart.FontBasisRecord;
-import org.apache.poi.hssf.record.chart.FontIndexRecord;
-import org.apache.poi.hssf.record.chart.FrameRecord;
-import org.apache.poi.hssf.record.chart.LegendRecord;
-import org.apache.poi.hssf.record.chart.LineFormatRecord;
-import org.apache.poi.hssf.record.chart.LinkedDataRecord;
-import org.apache.poi.hssf.record.chart.PlotAreaRecord;
-import org.apache.poi.hssf.record.chart.PlotGrowthRecord;
-import org.apache.poi.hssf.record.chart.SeriesIndexRecord;
-import org.apache.poi.hssf.record.chart.SeriesRecord;
-import org.apache.poi.hssf.record.chart.SeriesTextRecord;
-import org.apache.poi.hssf.record.chart.SeriesToChartGroupRecord;
-import org.apache.poi.hssf.record.chart.SheetPropertiesRecord;
-import org.apache.poi.hssf.record.chart.TextRecord;
-import org.apache.poi.hssf.record.chart.TickRecord;
-import org.apache.poi.hssf.record.chart.UnitsRecord;
-import org.apache.poi.hssf.record.chart.ValueRangeRecord;
-import org.apache.poi.ss.formula.ptg.Area3DPtg;
-import org.apache.poi.ss.formula.ptg.AreaPtgBase;
-import org.apache.poi.ss.formula.ptg.Ptg;
-import org.apache.poi.ss.util.CellRangeAddress;
-import org.apache.poi.ss.util.CellRangeAddressBase;
-
-/**
- * Has methods for construction of a chart object.
- *
- * @author Glen Stampoultzis (glens at apache.org)
- */
-public final class HSSFChart {
- private HSSFSheet sheet;
- private ChartRecord chartRecord;
-
- private LegendRecord legendRecord;
- @SuppressWarnings("unused")
- private ChartTitleFormatRecord chartTitleFormat;
- private SeriesTextRecord chartTitleText;
- private List<ValueRangeRecord> valueRanges = new ArrayList<>();
-
- private HSSFChartType type = HSSFChartType.Unknown;
-
- private List<HSSFSeries> series = new ArrayList<>();
-
- public enum HSSFChartType {
- Area {
- @Override
- public short getSid() {
- return 0x101A;
- }
- },
- Bar {
- @Override
- public short getSid() {
- return 0x1017;
- }
- },
- Line {
- @Override
- public short getSid() {
- return 0x1018;
- }
- },
- Pie {
- @Override
- public short getSid() {
- return 0x1019;
- }
- },
- Scatter {
- @Override
- public short getSid() {
- return 0x101B;
- }
- },
- Unknown {
- @Override
- public short getSid() {
- return 0;
- }
- };
-
- public abstract short getSid();
- }
-
- private HSSFChart(HSSFSheet sheet, ChartRecord chartRecord) {
- this.chartRecord = chartRecord;
- this.sheet = sheet;
- }
-
- /**
- * Creates a bar chart. API needs some work. :)
- * <p>
- * NOTE: Does not yet work... checking it in just so others
- * can take a look.
- */
- public void createBarChart( HSSFWorkbook workbook, HSSFSheet parentSheet )
- {
-
- List<Record> records = new ArrayList<>();
- records.add( createMSDrawingObjectRecord() );
- records.add( createOBJRecord() );
- records.add( createBOFRecord() );
- records.add(new HeaderRecord(""));
- records.add(new FooterRecord(""));
- records.add( createHCenterRecord() );
- records.add( createVCenterRecord() );
- records.add( createPrintSetupRecord() );
- // unknown 33
- records.add( createFontBasisRecord1() );
- records.add( createFontBasisRecord2() );
- records.add(new ProtectRecord(false));
- records.add( createUnitsRecord() );
- records.add( createChartRecord( 0, 0, 30434904, 19031616 ) );
- records.add( createBeginRecord() );
- records.add( createSCLRecord( (short) 1, (short) 1 ) );
- records.add( createPlotGrowthRecord( 65536, 65536 ) );
- records.add( createFrameRecord1() );
- records.add( createBeginRecord() );
- records.add( createLineFormatRecord(true) );
- records.add( createAreaFormatRecord1() );
- records.add( createEndRecord() );
- records.add( createSeriesRecord() );
- records.add( createBeginRecord() );
- records.add( createTitleLinkedDataRecord() );
- records.add( createValuesLinkedDataRecord() );
- records.add( createCategoriesLinkedDataRecord() );
- records.add( createDataFormatRecord() );
- // records.add(createBeginRecord());
- // unknown
- // records.add(createEndRecord());
- records.add( createSeriesToChartGroupRecord() );
- records.add( createEndRecord() );
- records.add( createSheetPropsRecord() );
- records.add( createDefaultTextRecord( DefaultDataLabelTextPropertiesRecord.CATEGORY_DATA_TYPE_ALL_TEXT_CHARACTERISTIC ) );
- records.add( createAllTextRecord() );
- records.add( createBeginRecord() );
- // unknown
- records.add( createFontIndexRecord( 5 ) );
- records.add( createDirectLinkRecord() );
- records.add( createEndRecord() );
- records.add( createDefaultTextRecord( (short) 3 ) ); // eek, undocumented text type
- records.add( createUnknownTextRecord() );
- records.add( createBeginRecord() );
- records.add( createFontIndexRecord( (short) 6 ) );
- records.add( createDirectLinkRecord() );
- records.add( createEndRecord() );
-
- records.add( createAxisUsedRecord( (short) 1 ) );
- createAxisRecords( records );
-
- records.add( createEndRecord() );
- records.add( createDimensionsRecord() );
- records.add( createSeriesIndexRecord(2) );
- records.add( createSeriesIndexRecord(1) );
- records.add( createSeriesIndexRecord(3) );
- records.add(EOFRecord.instance);
-
-
-
- parentSheet.insertChartRecords( records );
- workbook.insertChartRecord();
- }
-
- /**
- * Returns all the charts for the given sheet.
- *
- * NOTE: You won't be able to do very much with
- * these charts yet, as this is very limited support
- */
- public static HSSFChart[] getSheetCharts(HSSFSheet sheet) {
- List<HSSFChart> charts = new ArrayList<>();
- HSSFChart lastChart = null;
- HSSFSeries lastSeries = null;
- // Find records of interest
- List<RecordBase> records = sheet.getSheet().getRecords();
- for(RecordBase r : records) {
-
- if(r instanceof ChartRecord) {
- lastSeries = null;
- lastChart = new HSSFChart(sheet,(ChartRecord)r);
- charts.add(lastChart);
- } else if (r instanceof LinkedDataRecord) {
- LinkedDataRecord linkedDataRecord = (LinkedDataRecord) r;
- if (lastSeries != null) {
- lastSeries.insertData(linkedDataRecord);
- }
- }
-
- if (lastChart == null) {
- continue;
- }
-
- if (r instanceof LegendRecord) {
- lastChart.legendRecord = (LegendRecord)r;
- } else if(r instanceof SeriesRecord) {
- HSSFSeries series = new HSSFSeries( (SeriesRecord)r );
- lastChart.series.add(series);
- lastSeries = series;
- } else if(r instanceof ChartTitleFormatRecord) {
- lastChart.chartTitleFormat = (ChartTitleFormatRecord)r;
- } else if(r instanceof SeriesTextRecord) {
- // Applies to a series, unless we've seen a legend already
- SeriesTextRecord str = (SeriesTextRecord)r;
- if(lastChart.legendRecord == null && lastChart.series.size() > 0) {
- HSSFSeries series = lastChart.series.get(lastChart.series.size()-1);
- series.seriesTitleText = str;
- } else {
- lastChart.chartTitleText = str;
- }
- } else if(r instanceof ValueRangeRecord){
- lastChart.valueRanges.add((ValueRangeRecord)r);
- } else if (r instanceof Record) {
- Record record = (Record) r;
- for (HSSFChartType type : HSSFChartType.values()) {
- if (type == HSSFChartType.Unknown) {
- continue;
- }
- if (record.getSid() == type.getSid()) {
- lastChart.type = type;
- break;
- }
- }
- }
- }
-
- return charts.toArray( new HSSFChart[charts.size()] );
- }
-
- /** Get the X offset of the chart */
- public int getChartX() { return chartRecord.getX(); }
- /** Get the Y offset of the chart */
- public int getChartY() { return chartRecord.getY(); }
- /** Get the width of the chart. {@link ChartRecord} */
- public int getChartWidth() { return chartRecord.getWidth(); }
- /** Get the height of the chart. {@link ChartRecord} */
- public int getChartHeight() { return chartRecord.getHeight(); }
-
- /** Sets the X offset of the chart */
- public void setChartX(int x) { chartRecord.setX(x); }
- /** Sets the Y offset of the chart */
- public void setChartY(int y) { chartRecord.setY(y); }
- /** Sets the width of the chart. {@link ChartRecord} */
- public void setChartWidth(int width) { chartRecord.setWidth(width); }
- /** Sets the height of the chart. {@link ChartRecord} */
- public void setChartHeight(int height) { chartRecord.setHeight(height); }
-
- /**
- * Returns the series of the chart
- */
- public HSSFSeries[] getSeries() {
- return series.toArray(new HSSFSeries[series.size()]);
- }
-
- /**
- * Returns the chart's title, if there is one,
- * or null if not
- */
- public String getChartTitle() {
- if(chartTitleText != null) {
- return chartTitleText.getText();
- }
- return null;
- }
-
- /**
- * Changes the chart's title, but only if there
- * was one already.
- * TODO - add in the records if not
- */
- public void setChartTitle(String title) {
- if(chartTitleText != null) {
- chartTitleText.setText(title);
- } else {
- throw new IllegalStateException("No chart title found to change");
- }
- }
-
- /**
- * Set value range (basic Axis Options)
- * @param axisIndex 0 - primary axis, 1 - secondary axis
- * @param minimum minimum value; Double.NaN - automatic; null - no change
- * @param maximum maximum value; Double.NaN - automatic; null - no change
- * @param majorUnit major unit value; Double.NaN - automatic; null - no change
- * @param minorUnit minor unit value; Double.NaN - automatic; null - no change
- */
- public void setValueRange( int axisIndex, Double minimum, Double maximum, Double majorUnit, Double minorUnit){
- ValueRangeRecord valueRange = valueRanges.get( axisIndex );
- if( valueRange == null ) return;
- if( minimum != null ){
- valueRange.setAutomaticMinimum(minimum.isNaN());
- valueRange.setMinimumAxisValue(minimum);
- }
- if( maximum != null ){
- valueRange.setAutomaticMaximum(maximum.isNaN());
- valueRange.setMaximumAxisValue(maximum);
- }
- if( majorUnit != null ){
- valueRange.setAutomaticMajor(majorUnit.isNaN());
- valueRange.setMajorIncrement(majorUnit);
- }
- if( minorUnit != null ){
- valueRange.setAutomaticMinor(minorUnit.isNaN());
- valueRange.setMinorIncrement(minorUnit);
- }
- }
-
- private SeriesIndexRecord createSeriesIndexRecord( int index )
- {
- SeriesIndexRecord r = new SeriesIndexRecord();
- r.setIndex((short)index);
- return r;
- }
-
- private DimensionsRecord createDimensionsRecord()
- {
- DimensionsRecord r = new DimensionsRecord();
- r.setFirstRow(0);
- r.setLastRow(31);
- r.setFirstCol((short)0);
- r.setLastCol((short)1);
- return r;
- }
-
- private HCenterRecord createHCenterRecord()
- {
- HCenterRecord r = new HCenterRecord();
- r.setHCenter(false);
- return r;
- }
-
- private VCenterRecord createVCenterRecord()
- {
- VCenterRecord r = new VCenterRecord();
- r.setVCenter(false);
- return r;
- }
-
- private PrintSetupRecord createPrintSetupRecord()
- {
- PrintSetupRecord r = new PrintSetupRecord();
- r.setPaperSize((short)0);
- r.setScale((short)18);
- r.setPageStart((short)1);
- r.setFitWidth((short)1);
- r.setFitHeight((short)1);
- r.setLeftToRight(false);
- r.setLandscape(false);
- r.setValidSettings(true);
- r.setNoColor(false);
- r.setDraft(false);
- r.setNotes(false);
- r.setNoOrientation(false);
- r.setUsePage(false);
- r.setHResolution((short)0);
- r.setVResolution((short)0);
- r.setHeaderMargin(0.5);
- r.setFooterMargin(0.5);
- r.setCopies((short)15); // what the ??
- return r;
- }
-
- private FontBasisRecord createFontBasisRecord1()
- {
- FontBasisRecord r = new FontBasisRecord();
- r.setXBasis((short)9120);
- r.setYBasis((short)5640);
- r.setHeightBasis((short)200);
- r.setScale((short)0);
- r.setIndexToFontTable((short)5);
- return r;
- }
-
- private FontBasisRecord createFontBasisRecord2()
- {
- FontBasisRecord r = createFontBasisRecord1();
- r.setIndexToFontTable((short)6);
- return r;
- }
-
- private BOFRecord createBOFRecord()
- {
- BOFRecord r = new BOFRecord();
- r.setVersion((short)600);
- r.setType((short)20);
- r.setBuild((short)0x1CFE);
- r.setBuildYear((short)1997);
- r.setHistoryBitMask(0x40C9);
- r.setRequiredVersion(106);
- return r;
- }
-
- private UnknownRecord createOBJRecord()
- {
- byte[] data = {
- (byte) 0x15, (byte) 0x00, (byte) 0x12, (byte) 0x00, (byte) 0x05, (byte) 0x00, (byte) 0x02, (byte) 0x00, (byte) 0x11, (byte) 0x60, (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0xB8, (byte) 0x03,
- (byte) 0x87, (byte) 0x03, (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x00,
- };
-
- return new UnknownRecord( (short) 0x005D, data );
- }
-
- private UnknownRecord createMSDrawingObjectRecord()
- {
- // Since we haven't created this object yet we'll just put in the raw
- // form for the moment.
-
- byte[] data = {
- (byte)0x0F, (byte)0x00, (byte)0x02, (byte)0xF0, (byte)0xC0, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x10, (byte)0x00, (byte)0x08, (byte)0xF0, (byte)0x08, (byte)0x00, (byte)0x00, (byte)0x00,
- (byte)0x02, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x02, (byte)0x04, (byte)0x00, (byte)0x00, (byte)0x0F, (byte)0x00, (byte)0x03, (byte)0xF0, (byte)0xA8, (byte)0x00, (byte)0x00, (byte)0x00,
- (byte)0x0F, (byte)0x00, (byte)0x04, (byte)0xF0, (byte)0x28, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x01, (byte)0x00, (byte)0x09, (byte)0xF0, (byte)0x10, (byte)0x00, (byte)0x00, (byte)0x00,
- (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00,
- (byte)0x02, (byte)0x00, (byte)0x0A, (byte)0xF0, (byte)0x08, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x04, (byte)0x00, (byte)0x00, (byte)0x05, (byte)0x00, (byte)0x00, (byte)0x00,
- (byte)0x0F, (byte)0x00, (byte)0x04, (byte)0xF0, (byte)0x70, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x92, (byte)0x0C, (byte)0x0A, (byte)0xF0, (byte)0x08, (byte)0x00, (byte)0x00, (byte)0x00,
- (byte)0x02, (byte)0x04, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x0A, (byte)0x00, (byte)0x00, (byte)0x93, (byte)0x00, (byte)0x0B, (byte)0xF0, (byte)0x36, (byte)0x00, (byte)0x00, (byte)0x00,
- (byte)0x7F, (byte)0x00, (byte)0x04, (byte)0x01, (byte)0x04, (byte)0x01, (byte)0xBF, (byte)0x00, (byte)0x08, (byte)0x00, (byte)0x08, (byte)0x00, (byte)0x81, (byte)0x01, (byte)0x4E, (byte)0x00,
- (byte)0x00, (byte)0x08, (byte)0x83, (byte)0x01, (byte)0x4D, (byte)0x00, (byte)0x00, (byte)0x08, (byte)0xBF, (byte)0x01, (byte)0x10, (byte)0x00, (byte)0x11, (byte)0x00, (byte)0xC0, (byte)0x01,
- (byte)0x4D, (byte)0x00, (byte)0x00, (byte)0x08, (byte)0xFF, (byte)0x01, (byte)0x08, (byte)0x00, (byte)0x08, (byte)0x00, (byte)0x3F, (byte)0x02, (byte)0x00, (byte)0x00, (byte)0x02, (byte)0x00,
- (byte)0xBF, (byte)0x03, (byte)0x00, (byte)0x00, (byte)0x08, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x10, (byte)0xF0, (byte)0x12, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00,
- (byte)0x04, (byte)0x00, (byte)0xC0, (byte)0x02, (byte)0x0A, (byte)0x00, (byte)0xF4, (byte)0x00, (byte)0x0E, (byte)0x00, (byte)0x66, (byte)0x01, (byte)0x20, (byte)0x00, (byte)0xE9, (byte)0x00,
- (byte)0x00, (byte)0x00, (byte)0x11, (byte)0xF0, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00
- };
-
- return new UnknownRecord((short)0x00EC, data);
- }
-
- private void createAxisRecords( List<Record> records )
- {
- records.add( createAxisParentRecord() );
- records.add( createBeginRecord() );
- records.add( createAxisRecord( AxisRecord.AXIS_TYPE_CATEGORY_OR_X_AXIS ) );
- records.add( createBeginRecord() );
- records.add( createCategorySeriesAxisRecord() );
- records.add( createAxisOptionsRecord() );
- records.add( createTickRecord1() );
- records.add( createEndRecord() );
- records.add( createAxisRecord( AxisRecord.AXIS_TYPE_VALUE_AXIS ) );
- records.add( createBeginRecord() );
- records.add( createValueRangeRecord() );
- records.add( createTickRecord2() );
- records.add( createAxisLineFormatRecord( AxisLineFormatRecord.AXIS_TYPE_MAJOR_GRID_LINE ) );
- records.add( createLineFormatRecord(false) );
- records.add( createEndRecord() );
- records.add( createPlotAreaRecord() );
- records.add( createFrameRecord2() );
- records.add( createBeginRecord() );
- records.add( createLineFormatRecord2() );
- records.add( createAreaFormatRecord2() );
- records.add( createEndRecord() );
- records.add( createChartFormatRecord() );
- records.add( createBeginRecord() );
- records.add( createBarRecord() );
- // unknown 1022
- records.add( createLegendRecord() );
- records.add( createBeginRecord() );
- // unknown 104f
- records.add( createTextRecord() );
- records.add( createBeginRecord() );
- // unknown 104f
- records.add( createLinkedDataRecord() );
- records.add( createEndRecord() );
- records.add( createEndRecord() );
- records.add( createEndRecord() );
- records.add( createEndRecord() );
- }
-
- private LinkedDataRecord createLinkedDataRecord()
- {
- LinkedDataRecord r = new LinkedDataRecord();
- r.setLinkType(LinkedDataRecord.LINK_TYPE_TITLE_OR_TEXT);
- r.setReferenceType(LinkedDataRecord.REFERENCE_TYPE_DIRECT);
- r.setCustomNumberFormat(false);
- r.setIndexNumberFmtRecord((short)0);
- r.setFormulaOfLink(null);
- return r;
- }
-
- private TextRecord createTextRecord()
- {
- TextRecord r = new TextRecord();
- r.setHorizontalAlignment(TextRecord.HORIZONTAL_ALIGNMENT_CENTER);
- r.setVerticalAlignment(TextRecord.VERTICAL_ALIGNMENT_CENTER);
- r.setDisplayMode((short)1);
- r.setRgbColor(0x00000000);
- r.setX(-37);
- r.setY(-60);
- r.setWidth(0);
- r.setHeight(0);
- r.setAutoColor(true);
- r.setShowKey(false);
- r.setShowValue(false);
- r.setVertical(false);
- r.setAutoGeneratedText(true);
- r.setGenerated(true);
- r.setAutoLabelDeleted(false);
- r.setAutoBackground(true);
- r.setRotation((short)0);
- r.setShowCategoryLabelAsPercentage(false);
- r.setShowValueAsPercentage(false);
- r.setShowBubbleSizes(false);
- r.setShowLabel(false);
- r.setIndexOfColorValue((short)77);
- r.setDataLabelPlacement((short)0);
- r.setTextRotation((short)0);
- return r;
- }
-
- private LegendRecord createLegendRecord()
- {
- LegendRecord r = new LegendRecord();
- r.setXAxisUpperLeft(3542);
- r.setYAxisUpperLeft(1566);
- r.setXSize(437);
- r.setYSize(213);
- r.setType(LegendRecord.TYPE_RIGHT);
- r.setSpacing(LegendRecord.SPACING_MEDIUM);
- r.setAutoPosition(true);
- r.setAutoSeries(true);
- r.setAutoXPositioning(true);
- r.setAutoYPositioning(true);
- r.setVertical(true);
- r.setDataTable(false);
- return r;
- }
-
- private BarRecord createBarRecord()
- {
- BarRecord r = new BarRecord();
- r.setBarSpace((short)0);
- r.setCategorySpace((short)150);
- r.setHorizontal(false);
- r.setStacked(false);
- r.setDisplayAsPercentage(false);
- r.setShadow(false);
- return r;
- }
-
- private ChartFormatRecord createChartFormatRecord()
- {
- ChartFormatRecord r = new ChartFormatRecord();
- r.setXPosition(0);
- r.setYPosition(0);
- r.setWidth(0);
- r.setHeight(0);
- r.setVaryDisplayPattern(false);
- return r;
- }
-
- private PlotAreaRecord createPlotAreaRecord()
- {
- return new PlotAreaRecord( );
- }
-
- private AxisLineFormatRecord createAxisLineFormatRecord( short format )
- {
- AxisLineFormatRecord r = new AxisLineFormatRecord();
- r.setAxisType( format );
- return r;
- }
-
- private ValueRangeRecord createValueRangeRecord()
- {
- ValueRangeRecord r = new ValueRangeRecord();
- r.setMinimumAxisValue( 0.0 );
- r.setMaximumAxisValue( 0.0 );
- r.setMajorIncrement( 0 );
- r.setMinorIncrement( 0 );
- r.setCategoryAxisCross( 0 );
- r.setAutomaticMinimum( true );
- r.setAutomaticMaximum( true );
- r.setAutomaticMajor( true );
- r.setAutomaticMinor( true );
- r.setAutomaticCategoryCrossing( true );
- r.setLogarithmicScale( false );
- r.setValuesInReverse( false );
- r.setCrossCategoryAxisAtMaximum( false );
- r.setReserved( true ); // what's this do??
- return r;
- }
-
- private TickRecord createTickRecord1()
- {
- TickRecord r = new TickRecord();
- r.setMajorTickType( (byte) 2 );
- r.setMinorTickType( (byte) 0 );
- r.setLabelPosition( (byte) 3 );
- r.setBackground( (byte) 1 );
- r.setLabelColorRgb( 0 );
- r.setZero1( (short) 0 );
- r.setZero2( (short) 0 );
- r.setZero3( (short) 45 );
- r.setAutorotate( true );
- r.setAutoTextBackground( true );
- r.setRotation( (short) 0 );
- r.setAutorotate( true );
- r.setTickColor( (short) 77 );
- return r;
- }
-
- private TickRecord createTickRecord2()
- {
- TickRecord r = createTickRecord1();
- r.setZero3((short)0);
- return r;
- }
-
- private AxisOptionsRecord createAxisOptionsRecord()
- {
- AxisOptionsRecord r = new AxisOptionsRecord();
- r.setMinimumCategory( (short) -28644 );
- r.setMaximumCategory( (short) -28715 );
- r.setMajorUnitValue( (short) 2 );
- r.setMajorUnit( (short) 0 );
- r.setMinorUnitValue( (short) 1 );
- r.setMinorUnit( (short) 0 );
- r.setBaseUnit( (short) 0 );
- r.setCrossingPoint( (short) -28644 );
- r.setDefaultMinimum( true );
- r.setDefaultMaximum( true );
- r.setDefaultMajor( true );
- r.setDefaultMinorUnit( true );
- r.setIsDate( true );
- r.setDefaultBase( true );
- r.setDefaultCross( true );
- r.setDefaultDateSettings( true );
- return r;
- }
-
- private CategorySeriesAxisRecord createCategorySeriesAxisRecord()
- {
- CategorySeriesAxisRecord r = new CategorySeriesAxisRecord();
- r.setCrossingPoint( (short) 1 );
- r.setLabelFrequency( (short) 1 );
- r.setTickMarkFrequency( (short) 1 );
- r.setValueAxisCrossing( true );
- r.setCrossesFarRight( false );
- r.setReversed( false );
- return r;
- }
-
- private AxisRecord createAxisRecord( short axisType )
- {
- AxisRecord r = new AxisRecord();
- r.setAxisType( axisType );
- return r;
- }
-
- private AxisParentRecord createAxisParentRecord()
- {
- AxisParentRecord r = new AxisParentRecord();
- r.setAxisType( AxisParentRecord.AXIS_TYPE_MAIN );
- r.setX( 479 );
- r.setY( 221 );
- r.setWidth( 2995 );
- r.setHeight( 2902 );
- return r;
- }
-
- private AxisUsedRecord createAxisUsedRecord( short numAxis )
- {
- AxisUsedRecord r = new AxisUsedRecord();
- r.setNumAxis( numAxis );
- return r;
- }
-
- private LinkedDataRecord createDirectLinkRecord()
- {
- LinkedDataRecord r = new LinkedDataRecord();
- r.setLinkType( LinkedDataRecord.LINK_TYPE_TITLE_OR_TEXT );
- r.setReferenceType( LinkedDataRecord.REFERENCE_TYPE_DIRECT );
- r.setCustomNumberFormat( false );
- r.setIndexNumberFmtRecord( (short) 0 );
- r.setFormulaOfLink(null);
- return r;
- }
-
- private FontIndexRecord createFontIndexRecord( int index )
- {
- FontIndexRecord r = new FontIndexRecord();
- r.setFontIndex( (short) index );
- return r;
- }
-
- private TextRecord createAllTextRecord()
- {
- TextRecord r = new TextRecord();
- r.setHorizontalAlignment( TextRecord.HORIZONTAL_ALIGNMENT_CENTER );
- r.setVerticalAlignment( TextRecord.VERTICAL_ALIGNMENT_CENTER );
- r.setDisplayMode( TextRecord.DISPLAY_MODE_TRANSPARENT );
- r.setRgbColor( 0 );
- r.setX( -37 );
- r.setY( -60 );
- r.setWidth( 0 );
- r.setHeight( 0 );
- r.setAutoColor( true );
- r.setShowKey( false );
- r.setShowValue( true );
- r.setVertical( false );
- r.setAutoGeneratedText( true );
- r.setGenerated( true );
- r.setAutoLabelDeleted( false );
- r.setAutoBackground( true );
- r.setRotation( (short) 0 );
- r.setShowCategoryLabelAsPercentage( false );
- r.setShowValueAsPercentage( false );
- r.setShowBubbleSizes( false );
- r.setShowLabel( false );
- r.setIndexOfColorValue( (short) 77 );
- r.setDataLabelPlacement( (short) 0 );
- r.setTextRotation( (short) 0 );
- return r;
- }
-
- private TextRecord createUnknownTextRecord()
- {
- TextRecord r = new TextRecord();
- r.setHorizontalAlignment( TextRecord.HORIZONTAL_ALIGNMENT_CENTER );
- r.setVerticalAlignment( TextRecord.VERTICAL_ALIGNMENT_CENTER );
- r.setDisplayMode( TextRecord.DISPLAY_MODE_TRANSPARENT );
- r.setRgbColor( 0 );
- r.setX( -37 );
- r.setY( -60 );
- r.setWidth( 0 );
- r.setHeight( 0 );
- r.setAutoColor( true );
- r.setShowKey( false );
- r.setShowValue( false );
- r.setVertical( false );
- r.setAutoGeneratedText( true );
- r.setGenerated( true );
- r.setAutoLabelDeleted( false );
- r.setAutoBackground( true );
- r.setRotation( (short) 0 );
- r.setShowCategoryLabelAsPercentage( false );
- r.setShowValueAsPercentage( false );
- r.setShowBubbleSizes( false );
- r.setShowLabel( false );
- r.setIndexOfColorValue( (short) 77 );
- r.setDataLabelPlacement( (short) 11088 );
- r.setTextRotation( (short) 0 );
- return r;
- }
-
- private DefaultDataLabelTextPropertiesRecord createDefaultTextRecord( short categoryDataType )
- {
- DefaultDataLabelTextPropertiesRecord r = new DefaultDataLabelTextPropertiesRecord();
- r.setCategoryDataType( categoryDataType );
- return r;
- }
-
- private SheetPropertiesRecord createSheetPropsRecord()
- {
- SheetPropertiesRecord r = new SheetPropertiesRecord();
- r.setChartTypeManuallyFormatted( false );
- r.setPlotVisibleOnly( true );
- r.setDoNotSizeWithWindow( false );
- r.setDefaultPlotDimensions( true );
- r.setAutoPlotArea( false );
- return r;
- }
-
- private SeriesToChartGroupRecord createSeriesToChartGroupRecord()
- {
- return new SeriesToChartGroupRecord();
- }
-
- private DataFormatRecord createDataFormatRecord()
- {
- DataFormatRecord r = new DataFormatRecord();
- r.setPointNumber( (short) -1 );
- r.setSeriesIndex( (short) 0 );
- r.setSeriesNumber( (short) 0 );
- r.setUseExcel4Colors( false );
- return r;
- }
-
- private LinkedDataRecord createCategoriesLinkedDataRecord()
- {
- LinkedDataRecord r = new LinkedDataRecord();
- r.setLinkType( LinkedDataRecord.LINK_TYPE_CATEGORIES );
- r.setReferenceType( LinkedDataRecord.REFERENCE_TYPE_WORKSHEET );
- r.setCustomNumberFormat( false );
- r.setIndexNumberFmtRecord( (short) 0 );
- Area3DPtg p = new Area3DPtg(0, 31, 1, 1,
- false, false, false, false, 0);
- r.setFormulaOfLink(new Ptg[] { p, });
- return r;
- }
-
- private LinkedDataRecord createValuesLinkedDataRecord()
- {
- LinkedDataRecord r = new LinkedDataRecord();
- r.setLinkType( LinkedDataRecord.LINK_TYPE_VALUES );
- r.setReferenceType( LinkedDataRecord.REFERENCE_TYPE_WORKSHEET );
- r.setCustomNumberFormat( false );
- r.setIndexNumberFmtRecord( (short) 0 );
- Area3DPtg p = new Area3DPtg(0, 31, 0, 0,
- false, false, false, false, 0);
- r.setFormulaOfLink(new Ptg[] { p, });
- return r;
- }
-
- private LinkedDataRecord createTitleLinkedDataRecord()
- {
- LinkedDataRecord r = new LinkedDataRecord();
- r.setLinkType( LinkedDataRecord.LINK_TYPE_TITLE_OR_TEXT );
- r.setReferenceType( LinkedDataRecord.REFERENCE_TYPE_DIRECT );
- r.setCustomNumberFormat( false );
- r.setIndexNumberFmtRecord( (short) 0 );
- r.setFormulaOfLink(null);
- return r;
- }
-
- private SeriesRecord createSeriesRecord()
- {
- SeriesRecord r = new SeriesRecord();
- r.setCategoryDataType( SeriesRecord.CATEGORY_DATA_TYPE_NUMERIC );
- r.setValuesDataType( SeriesRecord.VALUES_DATA_TYPE_NUMERIC );
- r.setNumCategories( (short) 32 );
- r.setNumValues( (short) 31 );
- r.setBubbleSeriesType( SeriesRecord.BUBBLE_SERIES_TYPE_NUMERIC );
- r.setNumBubbleValues( (short) 0 );
- return r;
- }
-
- private EndRecord createEndRecord()
- {
- return new EndRecord();
- }
-
- private AreaFormatRecord createAreaFormatRecord1()
- {
- AreaFormatRecord r = new AreaFormatRecord();
- r.setForegroundColor( 16777215 ); // RGB Color
- r.setBackgroundColor( 0 ); // RGB Color
- r.setPattern( (short) 1 ); // TODO: Add Pattern constants to record
- r.setAutomatic( true );
- r.setInvert( false );
- r.setForecolorIndex( (short) 78 );
- r.setBackcolorIndex( (short) 77 );
- return r;
- }
-
- private AreaFormatRecord createAreaFormatRecord2()
- {
- AreaFormatRecord r = new AreaFormatRecord();
- r.setForegroundColor(0x00c0c0c0);
- r.setBackgroundColor(0x00000000);
- r.setPattern((short)1);
- r.setAutomatic(false);
- r.setInvert(false);
- r.setForecolorIndex((short)22);
- r.setBackcolorIndex((short)79);
- return r;
- }
-
- private LineFormatRecord createLineFormatRecord( boolean drawTicks )
- {
- LineFormatRecord r = new LineFormatRecord();
- r.setLineColor( 0 );
- r.setLinePattern( LineFormatRecord.LINE_PATTERN_SOLID );
- r.setWeight( (short) -1 );
- r.setAuto( true );
- r.setDrawTicks( drawTicks );
- r.setColourPaletteIndex( (short) 77 ); // what colour is this?
- return r;
- }
-
- private LineFormatRecord createLineFormatRecord2()
- {
- LineFormatRecord r = new LineFormatRecord();
- r.setLineColor( 0x00808080 );
- r.setLinePattern( (short) 0 );
- r.setWeight( (short) 0 );
- r.setAuto( false );
- r.setDrawTicks( false );
- r.setUnknown( false );
- r.setColourPaletteIndex( (short) 23 );
- return r;
- }
-
- private FrameRecord createFrameRecord1()
- {
- FrameRecord r = new FrameRecord();
- r.setBorderType( FrameRecord.BORDER_TYPE_REGULAR );
- r.setAutoSize( false );
- r.setAutoPosition( true );
- return r;
- }
-
- private FrameRecord createFrameRecord2()
- {
- FrameRecord r = new FrameRecord();
- r.setBorderType( FrameRecord.BORDER_TYPE_REGULAR );
- r.setAutoSize( true );
- r.setAutoPosition( true );
- return r;
- }
-
- private PlotGrowthRecord createPlotGrowthRecord( int horizScale, int vertScale )
- {
- PlotGrowthRecord r = new PlotGrowthRecord();
- r.setHorizontalScale( horizScale );
- r.setVerticalScale( vertScale );
- return r;
- }
-
- private SCLRecord createSCLRecord( short numerator, short denominator )
- {
- SCLRecord r = new SCLRecord();
- r.setDenominator( denominator );
- r.setNumerator( numerator );
- return r;
- }
-
- private BeginRecord createBeginRecord()
- {
- return new BeginRecord();
- }
-
- private ChartRecord createChartRecord( int x, int y, int width, int height )
- {
- ChartRecord r = new ChartRecord();
- r.setX( x );
- r.setY( y );
- r.setWidth( width );
- r.setHeight( height );
- return r;
- }
-
- private UnitsRecord createUnitsRecord()
- {
- UnitsRecord r = new UnitsRecord();
- r.setUnits( (short) 0 );
- return r;
- }
-
-
- /**
- * A series in a chart
- */
- public static class HSSFSeries {
- private SeriesRecord series;
- private SeriesTextRecord seriesTitleText;
- private LinkedDataRecord dataName;
- private LinkedDataRecord dataValues;
- private LinkedDataRecord dataCategoryLabels;
- private LinkedDataRecord dataSecondaryCategoryLabels;
-
- /* package */ HSSFSeries(SeriesRecord series) {
- this.series = series;
- }
-
- /* package */ void insertData(LinkedDataRecord data){
- switch(data.getLinkType()){
-
- case LinkedDataRecord.LINK_TYPE_TITLE_OR_TEXT:
- dataName = data;
- break;
- case LinkedDataRecord.LINK_TYPE_VALUES:
- dataValues = data;
- break;
- case LinkedDataRecord.LINK_TYPE_CATEGORIES:
- dataCategoryLabels = data;
- break;
- case LinkedDataRecord.LINK_TYPE_SECONDARY_CATEGORIES:
- dataSecondaryCategoryLabels = data;
- break;
- default:
- throw new IllegalStateException("Invalid link type: " + data.getLinkType());
- }
- }
-
- /* package */ void setSeriesTitleText(SeriesTextRecord seriesTitleText)
- {
- this.seriesTitleText = seriesTitleText;
- }
-
- public short getNumValues() {
- return series.getNumValues();
- }
- /**
- * See {@link SeriesRecord}
- */
- public short getValueType() {
- return series.getValuesDataType();
- }
-
- /**
- * Returns the series' title, if there is one,
- * or null if not
- */
- public String getSeriesTitle() {
- if(seriesTitleText != null) {
- return seriesTitleText.getText();
- }
- return null;
- }
-
- /**
- * Changes the series' title, but only if there
- * was one already.
- * TODO - add in the records if not
- */
- public void setSeriesTitle(String title) {
- if(seriesTitleText != null) {
- seriesTitleText.setText(title);
- } else {
- throw new IllegalStateException("No series title found to change");
- }
- }
-
- /**
- * @return record with data names
- */
- public LinkedDataRecord getDataName(){
- return dataName;
- }
-
- /**
- * @return record with data values
- */
- public LinkedDataRecord getDataValues(){
- return dataValues;
- }
-
- /**
- * @return record with data category labels
- */
- public LinkedDataRecord getDataCategoryLabels(){
- return dataCategoryLabels;
- }
-
- /**
- * @return record with data secondary category labels
- */
- public LinkedDataRecord getDataSecondaryCategoryLabels() {
- return dataSecondaryCategoryLabels;
- }
-
- /**
- * @return record with series
- */
- public SeriesRecord getSeries() {
- return series;
- }
-
- private CellRangeAddressBase getCellRange(LinkedDataRecord linkedDataRecord) {
- if (linkedDataRecord == null)
- {
- return null ;
- }
-
- int firstRow = 0;
- int lastRow = 0;
- int firstCol = 0;
- int lastCol = 0;
-
- for (Ptg ptg : linkedDataRecord.getFormulaOfLink()) {
- if (ptg instanceof AreaPtgBase) {
- AreaPtgBase areaPtg = (AreaPtgBase) ptg;
-
- firstRow = areaPtg.getFirstRow();
- lastRow = areaPtg.getLastRow();
-
- firstCol = areaPtg.getFirstColumn();
- lastCol = areaPtg.getLastColumn();
- }
- }
-
- return new CellRangeAddress(firstRow, lastRow, firstCol, lastCol);
- }
-
- public CellRangeAddressBase getValuesCellRange() {
- return getCellRange(dataValues);
- }
-
- public CellRangeAddressBase getCategoryLabelsCellRange() {
- return getCellRange(dataCategoryLabels);
- }
-
- private Integer setVerticalCellRange(LinkedDataRecord linkedDataRecord,
- CellRangeAddressBase range) {
- if (linkedDataRecord == null)
- {
- return null;
- }
-
- List<Ptg> ptgList = new ArrayList<>();
-
- int rowCount = (range.getLastRow() - range.getFirstRow()) + 1;
- int colCount = (range.getLastColumn() - range.getFirstColumn()) + 1;
-
- for (Ptg ptg : linkedDataRecord.getFormulaOfLink()) {
- if (ptg instanceof AreaPtgBase) {
- AreaPtgBase areaPtg = (AreaPtgBase) ptg;
-
- areaPtg.setFirstRow(range.getFirstRow());
- areaPtg.setLastRow(range.getLastRow());
-
- areaPtg.setFirstColumn(range.getFirstColumn());
- areaPtg.setLastColumn(range.getLastColumn());
- ptgList.add(areaPtg);
- }
- }
-
- linkedDataRecord.setFormulaOfLink(ptgList.toArray(new Ptg[ptgList.size()]));
-
- return rowCount * colCount;
- }
-
- public void setValuesCellRange(CellRangeAddressBase range) {
- Integer count = setVerticalCellRange(dataValues, range);
- if (count == null)
- {
- return;
- }
-
- series.setNumValues((short)(int)count);
- }
-
- public void setCategoryLabelsCellRange(CellRangeAddressBase range) {
- Integer count = setVerticalCellRange(dataCategoryLabels, range);
- if (count == null)
- {
- return;
- }
-
- series.setNumCategories((short)(int)count);
- }
- }
-
- public HSSFSeries createSeries() throws Exception {
- ArrayList<RecordBase> seriesTemplate = new ArrayList<>();
- boolean seriesTemplateFilled = false;
-
- int idx = 0;
- int deep = 0;
- int chartRecordIdx = -1;
- int chartDeep = -1;
- int lastSeriesDeep = -1;
- int endSeriesRecordIdx = -1;
- int seriesIdx = 0;
- final List<RecordBase> records = sheet.getSheet().getRecords();
-
- /* store first series as template and find last series index */
- for(final RecordBase record : records) {
-
- idx++;
-
- if (record instanceof BeginRecord) {
- deep++;
- } else if (record instanceof EndRecord) {
- deep--;
-
- if (lastSeriesDeep == deep) {
- lastSeriesDeep = -1;
- endSeriesRecordIdx = idx;
- if (!seriesTemplateFilled) {
- seriesTemplate.add(record);
- seriesTemplateFilled = true;
- }
- }
-
- if (chartDeep == deep) {
- break;
- }
- }
-
- if (record instanceof ChartRecord) {
- if (record == chartRecord) {
- chartRecordIdx = idx;
- chartDeep = deep;
- }
- } else if (record instanceof SeriesRecord) {
- if (chartRecordIdx != -1) {
- seriesIdx++;
- lastSeriesDeep = deep;
- }
- }
-
- if (lastSeriesDeep != -1 && !seriesTemplateFilled) {
- seriesTemplate.add(record) ;
- }
- }
-
- /* check if a series was found */
- if (endSeriesRecordIdx == -1) {
- return null;
- }
-
- /* next index in the records list where the new series can be inserted */
- idx = endSeriesRecordIdx + 1;
-
- HSSFSeries newSeries = null;
-
- /* duplicate record of the template series */
- ArrayList<RecordBase> clonedRecords = new ArrayList<>();
- for(final RecordBase record : seriesTemplate) {
-
- Record newRecord = null;
-
- if (record instanceof BeginRecord) {
- newRecord = new BeginRecord();
- } else if (record instanceof EndRecord) {
- newRecord = new EndRecord();
- } else if (record instanceof SeriesRecord) {
- SeriesRecord seriesRecord = (SeriesRecord) ((SeriesRecord)record).clone();
- newSeries = new HSSFSeries(seriesRecord);
- newRecord = seriesRecord;
- } else if (record instanceof LinkedDataRecord) {
- LinkedDataRecord linkedDataRecord = ((LinkedDataRecord)record).clone();
- if (newSeries != null) {
- newSeries.insertData(linkedDataRecord);
- }
- newRecord = linkedDataRecord;
- } else if (record instanceof DataFormatRecord) {
- DataFormatRecord dataFormatRecord = ((DataFormatRecord)record).clone();
-
- dataFormatRecord.setSeriesIndex((short)seriesIdx) ;
- dataFormatRecord.setSeriesNumber((short)seriesIdx) ;
-
- newRecord = dataFormatRecord;
- } else if (record instanceof SeriesTextRecord) {
- SeriesTextRecord seriesTextRecord = (SeriesTextRecord) ((SeriesTextRecord)record).clone();
- if (newSeries != null) {
- newSeries.setSeriesTitleText(seriesTextRecord);
- }
- newRecord = seriesTextRecord;
- } else if (record instanceof Record) {
- newRecord = (Record) ((Record)record).clone();
- }
-
- if (newRecord != null)
- {
- clonedRecords.add(newRecord);
- }
- }
-
- /* check if a user model series object was created */
- if (newSeries == null)
- {
- return null;
- }
-
- /* transfer series to record list */
- for(final RecordBase record : clonedRecords) {
- records.add(idx++, record);
- }
-
- return newSeries;
- }
-
- public boolean removeSeries(HSSFSeries remSeries) {
- int deep = 0;
- int chartDeep = -1;
- int lastSeriesDeep = -1;
- int seriesIdx = -1;
- boolean removeSeries = false;
- boolean chartEntered = false;
- boolean result = false;
- final List<RecordBase> records = sheet.getSheet().getRecords();
-
- /* store first series as template and find last series index */
- Iterator<RecordBase> iter = records.iterator();
- while (iter.hasNext()) {
- RecordBase record = iter.next();
-
- if (record instanceof BeginRecord) {
- deep++;
- } else if (record instanceof EndRecord) {
- deep--;
-
- if (lastSeriesDeep == deep) {
- lastSeriesDeep = -1;
-
- if (removeSeries) {
- removeSeries = false;
- result = true;
- iter.remove();
- }
- }
-
- if (chartDeep == deep) {
- break;
- }
- }
-
- if (record instanceof ChartRecord) {
- if (record == chartRecord) {
- chartDeep = deep;
- chartEntered = true;
- }
- } else if (record instanceof SeriesRecord) {
- if (chartEntered) {
- if (remSeries.series == record) {
- lastSeriesDeep = deep;
- removeSeries = true;
- } else {
- seriesIdx++;
- }
- }
- } else if (record instanceof DataFormatRecord) {
- if (chartEntered && !removeSeries) {
- DataFormatRecord dataFormatRecord = (DataFormatRecord) record;
- dataFormatRecord.setSeriesIndex((short) seriesIdx);
- dataFormatRecord.setSeriesNumber((short) seriesIdx);
- }
- }
-
- if (removeSeries) {
- iter.remove();
- }
- }
-
- return result;
- }
-
- public HSSFChartType getType() {
- return type;
- }
-}
--- /dev/null
+
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.dev;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.Locale;
+import java.util.Properties;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.transform.OutputKeys;
+import javax.xml.transform.Result;
+import javax.xml.transform.Transformer;
+import javax.xml.transform.TransformerException;
+import javax.xml.transform.TransformerFactory;
+import javax.xml.transform.stream.StreamResult;
+import javax.xml.transform.stream.StreamSource;
+
+import org.apache.poi.util.XMLHelper;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+
+/**
+ * Description of the Class
+ *
+ *@author andy
+ *@since May 10, 2002
+ */
+public class RecordGenerator {
+ /**
+ * The main program for the RecordGenerator class
+ *
+ *@param args The command line arguments
+ *@exception Exception Description of the Exception
+ */
+ public static void main(String[] args)
+ throws Exception {
+ // Force load so that we don't start generating records and realise this hasn't compiled yet.
+ Class.forName("org.apache.poi.generator.FieldIterator");
+
+ if (args.length != 4) {
+ System.out.println("Usage:");
+ System.out.println(" java org.apache.poi.hssf.util.RecordGenerator RECORD_DEFINTIONS RECORD_STYLES DEST_SRC_PATH TEST_SRC_PATH");
+ } else {
+ generateRecords(args[0], args[1], args[2], args[3]);
+ }
+ }
+
+
+ private static void generateRecords(String defintionsDir, String recordStyleDir, String destSrcPathDir, String testSrcPathDir)
+ throws Exception {
+ File definitionsFiles[] = new File(defintionsDir).listFiles();
+ if (definitionsFiles == null) {
+ System.err.println(defintionsDir+" is not a directory.");
+ return;
+ }
+
+ for (File file : definitionsFiles) {
+ if (file.isFile() &&
+ (file.getName().endsWith("_record.xml") ||
+ file.getName().endsWith("_type.xml")
+ )
+ ) {
+ // Get record name and package
+ DocumentBuilderFactory factory = XMLHelper.getDocumentBuilderFactory();
+ DocumentBuilder builder = factory.newDocumentBuilder();
+ Document document = builder.parse(file);
+ Element record = document.getDocumentElement();
+ String extendstg = record.getElementsByTagName("extends").item(0).getFirstChild().getNodeValue();
+ String suffix = record.getElementsByTagName("suffix").item(0).getFirstChild().getNodeValue();
+ String recordName = record.getAttributes().getNamedItem("name").getNodeValue();
+ String packageName = record.getAttributes().getNamedItem("package").getNodeValue();
+ packageName = packageName.replace('.', '/');
+
+ // Generate record
+ String destinationPath = destSrcPathDir + "/" + packageName;
+ File destinationPathFile = new File(destinationPath);
+ if(!destinationPathFile.mkdirs()) {
+ throw new IOException("Could not create directory " + destinationPathFile);
+ } else {
+ System.out.println("Created destination directory: " + destinationPath);
+ }
+ String destinationFilepath = destinationPath + "/" + recordName + suffix + ".java";
+ transform(file, new File(destinationFilepath),
+ new File(recordStyleDir + "/" + extendstg.toLowerCase(Locale.ROOT) + ".xsl"));
+ System.out.println("Generated " + suffix + ": " + destinationFilepath);
+
+ // Generate test (if not already generated)
+ destinationPath = testSrcPathDir + "/" + packageName;
+ destinationPathFile = new File(destinationPath);
+ if(!destinationPathFile.mkdirs()) {
+ throw new IOException("Could not create directory " + destinationPathFile);
+ } else {
+ System.out.println("Created destination directory: " + destinationPath);
+ }
+ destinationFilepath = destinationPath + "/Test" + recordName + suffix + ".java";
+ if (!new File(destinationFilepath).exists()) {
+ String temp = (recordStyleDir + "/" + extendstg.toLowerCase(Locale.ROOT) + "_test.xsl");
+ transform(file, new File(destinationFilepath), new File(temp));
+ System.out.println("Generated test: " + destinationFilepath);
+ } else {
+ System.out.println("Skipped test generation: " + destinationFilepath);
+ }
+ }
+ }
+ }
+
+
+
+ /**
+ * <p>Executes an XSL transformation. This process transforms an XML input
+ * file into a text output file controlled by an XSLT specification.</p>
+ *
+ * @param in the XML input file
+ * @param out the text output file
+ * @param xslt the XSLT specification, i.e. an XSL style sheet
+ * @throws FileNotFoundException
+ * @throws TransformerException
+ */
+ private static void transform(final File in, final File out, final File xslt)
+ throws FileNotFoundException, TransformerException
+ {
+ final StreamSource ss = new StreamSource(xslt);
+ final TransformerFactory tf = TransformerFactory.newInstance();
+ final Transformer t;
+ try
+ {
+ t = tf.newTransformer(ss);
+ }
+ catch (TransformerException ex)
+ {
+ System.err.println("Error compiling XSL style sheet " + xslt);
+ throw ex;
+ }
+ final Properties p = new Properties();
+ p.setProperty(OutputKeys.METHOD, "text");
+ t.setOutputProperties(p);
+ final Result result = new StreamResult(out);
+ t.transform(new StreamSource(in), result);
+ }
+
+}