From: Andreas Beeker Date: Sun, 27 May 2018 21:59:18 +0000 (+0000) Subject: #62355 - unsplit packages - 1 - moved classes X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=54f191b613833e874bf7e65f57d9d9d4ef4bace8;p=poi.git #62355 - unsplit packages - 1 - moved classes git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1832358 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/java/org/apache/poi/POIOLE2TextExtractor.java b/src/java/org/apache/poi/POIOLE2TextExtractor.java deleted file mode 100644 index 0fccf71c4a..0000000000 --- a/src/java/org/apache/poi/POIOLE2TextExtractor.java +++ /dev/null @@ -1,112 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi; - -import org.apache.poi.hpsf.DocumentSummaryInformation; -import org.apache.poi.hpsf.SummaryInformation; -import org.apache.poi.hpsf.extractor.HPSFPropertiesExtractor; -import org.apache.poi.poifs.filesystem.DirectoryEntry; - -/** - * Common Parent for OLE2 based Text Extractors - * of POI Documents, such as .doc, .xls - * You will typically find the implementation of - * a given format's text extractor under - * org.apache.poi.[format].extractor . - * - * @see org.apache.poi.hssf.extractor.ExcelExtractor - * @see org.apache.poi.hslf.extractor.PowerPointExtractor - * @see org.apache.poi.hdgf.extractor.VisioTextExtractor - * @see org.apache.poi.hwpf.extractor.WordExtractor - */ -public abstract class POIOLE2TextExtractor extends POITextExtractor { - /** The POIDocument that's open */ - protected POIDocument document; - - /** - * Creates a new text extractor for the given document - * - * @param document The POIDocument to use in this extractor. - */ - public POIOLE2TextExtractor(POIDocument document) { - this.document = document; - - // Ensure any underlying resources, such as open files, - // will get cleaned up if the user calls #close() - setFilesystem(document); - } - - /** - * Creates a new text extractor, using the same - * document as another text extractor. Normally - * only used by properties extractors. - * - * @param otherExtractor the extractor which document to be used - */ - protected POIOLE2TextExtractor(POIOLE2TextExtractor otherExtractor) { - this.document = otherExtractor.document; - } - - /** - * Returns the document information metadata for the document - * - * @return The Document Summary Information or null - * if it could not be read for this document. - */ - public DocumentSummaryInformation getDocSummaryInformation() { - return document.getDocumentSummaryInformation(); - } - /** - * Returns the summary information metadata for the document. - * - * @return The Summary information for the document or null - * if it could not be read for this document. - */ - public SummaryInformation getSummaryInformation() { - return document.getSummaryInformation(); - } - - /** - * Returns an HPSF powered text extractor for the - * document properties metadata, such as title and author. - * - * @return an instance of POIExtractor that can extract meta-data. - */ - @Override - public POITextExtractor getMetadataTextExtractor() { - return new HPSFPropertiesExtractor(this); - } - - /** - * Return the underlying DirectoryEntry of this document. - * - * @return the DirectoryEntry that is associated with the POIDocument of this extractor. - */ - public DirectoryEntry getRoot() { - return document.getDirectory(); - } - - /** - * Return the underlying POIDocument - * - * @return the underlying POIDocument - */ - @Override - public POIDocument getDocument() { - return document; - } -} \ No newline at end of file diff --git a/src/java/org/apache/poi/POIReadOnlyDocument.java b/src/java/org/apache/poi/POIReadOnlyDocument.java new file mode 100644 index 0000000000..3b3eca588d --- /dev/null +++ b/src/java/org/apache/poi/POIReadOnlyDocument.java @@ -0,0 +1,75 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi; + +import java.io.File; +import java.io.OutputStream; + +import org.apache.poi.poifs.filesystem.DirectoryNode; +import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; +import org.apache.poi.poifs.filesystem.OPOIFSFileSystem; +import org.apache.poi.poifs.filesystem.POIFSFileSystem; + + +/** + * This holds the common functionality for all read-only + * POI Document classes, i.e. ones which don't support writing. + * + * @since POI 3.15 beta 3 + */ +public abstract class POIReadOnlyDocument extends POIDocument { + public POIReadOnlyDocument(DirectoryNode dir) { + super(dir); + } + public POIReadOnlyDocument(NPOIFSFileSystem fs) { + super(fs); + } + public POIReadOnlyDocument(OPOIFSFileSystem fs) { + super(fs); + } + public POIReadOnlyDocument(POIFSFileSystem fs) { + super(fs); + } + + /** + * Note - writing is not yet supported for this file format, sorry. + * + * @throws IllegalStateException If you call the method, as writing is not supported + */ + @Override + public void write() { + throw new IllegalStateException("Writing is not yet implemented for this Document Format"); + } + /** + * Note - writing is not yet supported for this file format, sorry. + * + * @throws IllegalStateException If you call the method, as writing is not supported + */ + @Override + public void write(File file) { + throw new IllegalStateException("Writing is not yet implemented for this Document Format"); + } + /** + * Note - writing is not yet supported for this file format, sorry. + * + * @throws IllegalStateException If you call the method, as writing is not supported + */ + @Override + public void write(OutputStream out) { + throw new IllegalStateException("Writing is not yet implemented for this Document Format"); + } +} diff --git a/src/java/org/apache/poi/POITextExtractor.java b/src/java/org/apache/poi/POITextExtractor.java deleted file mode 100644 index 55d0832f16..0000000000 --- a/src/java/org/apache/poi/POITextExtractor.java +++ /dev/null @@ -1,82 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi; - -import java.io.Closeable; -import java.io.IOException; - -/** - * Common Parent for Text Extractors - * of POI Documents. - * You will typically find the implementation of - * a given format's text extractor under - * org.apache.poi.[format].extractor . - * - * @see org.apache.poi.hssf.extractor.ExcelExtractor - * @see org.apache.poi.hslf.extractor.PowerPointExtractor - * @see org.apache.poi.hdgf.extractor.VisioTextExtractor - * @see org.apache.poi.hwpf.extractor.WordExtractor - */ -public abstract class POITextExtractor implements Closeable { - private Closeable fsToClose; - - /** - * Retrieves all the text from the document. - * How cells, paragraphs etc are separated in the text - * is implementation specific - see the javadocs for - * a specific project for details. - * @return All the text from the document - */ - public abstract String getText(); - - /** - * Returns another text extractor, which is able to - * output the textual content of the document - * metadata / properties, such as author and title. - * - * @return the metadata and text extractor - */ - public abstract POITextExtractor getMetadataTextExtractor(); - - /** - * Used to ensure file handle cleanup. - * - * @param fs filesystem to close - */ - public void setFilesystem(Closeable fs) { - fsToClose = fs; - } - - /** - * Allows to free resources of the Extractor as soon as - * it is not needed any more. This may include closing - * open file handles and freeing memory. - * - * The Extractor cannot be used after close has been called. - */ - @Override - public void close() throws IOException { - if(fsToClose != null) { - fsToClose.close(); - } - } - - /** - * @return the processed document - */ - public abstract Object getDocument(); -} diff --git a/src/java/org/apache/poi/dev/RecordGenerator.java b/src/java/org/apache/poi/dev/RecordGenerator.java deleted file mode 100644 index 585003c526..0000000000 --- a/src/java/org/apache/poi/dev/RecordGenerator.java +++ /dev/null @@ -1,160 +0,0 @@ - -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ - -package org.apache.poi.dev; - -import java.io.File; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.util.Locale; -import java.util.Properties; - -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; -import javax.xml.transform.OutputKeys; -import javax.xml.transform.Result; -import javax.xml.transform.Transformer; -import javax.xml.transform.TransformerException; -import javax.xml.transform.TransformerFactory; -import javax.xml.transform.stream.StreamResult; -import javax.xml.transform.stream.StreamSource; - -import org.apache.poi.util.XMLHelper; -import org.w3c.dom.Document; -import org.w3c.dom.Element; - -/** - * Description of the Class - * - *@author andy - *@since May 10, 2002 - */ -public class RecordGenerator { - /** - * The main program for the RecordGenerator class - * - *@param args The command line arguments - *@exception Exception Description of the Exception - */ - public static void main(String[] args) - throws Exception { - // Force load so that we don't start generating records and realise this hasn't compiled yet. - Class.forName("org.apache.poi.generator.FieldIterator"); - - if (args.length != 4) { - System.out.println("Usage:"); - System.out.println(" java org.apache.poi.hssf.util.RecordGenerator RECORD_DEFINTIONS RECORD_STYLES DEST_SRC_PATH TEST_SRC_PATH"); - } else { - generateRecords(args[0], args[1], args[2], args[3]); - } - } - - - private static void generateRecords(String defintionsDir, String recordStyleDir, String destSrcPathDir, String testSrcPathDir) - throws Exception { - File definitionsFiles[] = new File(defintionsDir).listFiles(); - if (definitionsFiles == null) { - System.err.println(defintionsDir+" is not a directory."); - return; - } - - for (File file : definitionsFiles) { - if (file.isFile() && - (file.getName().endsWith("_record.xml") || - file.getName().endsWith("_type.xml") - ) - ) { - // Get record name and package - DocumentBuilderFactory factory = XMLHelper.getDocumentBuilderFactory(); - DocumentBuilder builder = factory.newDocumentBuilder(); - Document document = builder.parse(file); - Element record = document.getDocumentElement(); - String extendstg = record.getElementsByTagName("extends").item(0).getFirstChild().getNodeValue(); - String suffix = record.getElementsByTagName("suffix").item(0).getFirstChild().getNodeValue(); - String recordName = record.getAttributes().getNamedItem("name").getNodeValue(); - String packageName = record.getAttributes().getNamedItem("package").getNodeValue(); - packageName = packageName.replace('.', '/'); - - // Generate record - String destinationPath = destSrcPathDir + "/" + packageName; - File destinationPathFile = new File(destinationPath); - if(!destinationPathFile.mkdirs()) { - throw new IOException("Could not create directory " + destinationPathFile); - } else { - System.out.println("Created destination directory: " + destinationPath); - } - String destinationFilepath = destinationPath + "/" + recordName + suffix + ".java"; - transform(file, new File(destinationFilepath), - new File(recordStyleDir + "/" + extendstg.toLowerCase(Locale.ROOT) + ".xsl")); - System.out.println("Generated " + suffix + ": " + destinationFilepath); - - // Generate test (if not already generated) - destinationPath = testSrcPathDir + "/" + packageName; - destinationPathFile = new File(destinationPath); - if(!destinationPathFile.mkdirs()) { - throw new IOException("Could not create directory " + destinationPathFile); - } else { - System.out.println("Created destination directory: " + destinationPath); - } - destinationFilepath = destinationPath + "/Test" + recordName + suffix + ".java"; - if (!new File(destinationFilepath).exists()) { - String temp = (recordStyleDir + "/" + extendstg.toLowerCase(Locale.ROOT) + "_test.xsl"); - transform(file, new File(destinationFilepath), new File(temp)); - System.out.println("Generated test: " + destinationFilepath); - } else { - System.out.println("Skipped test generation: " + destinationFilepath); - } - } - } - } - - - - /** - *

Executes an XSL transformation. This process transforms an XML input - * file into a text output file controlled by an XSLT specification.

- * - * @param in the XML input file - * @param out the text output file - * @param xslt the XSLT specification, i.e. an XSL style sheet - * @throws FileNotFoundException - * @throws TransformerException - */ - private static void transform(final File in, final File out, final File xslt) - throws FileNotFoundException, TransformerException - { - final StreamSource ss = new StreamSource(xslt); - final TransformerFactory tf = TransformerFactory.newInstance(); - final Transformer t; - try - { - t = tf.newTransformer(ss); - } - catch (TransformerException ex) - { - System.err.println("Error compiling XSL style sheet " + xslt); - throw ex; - } - final Properties p = new Properties(); - p.setProperty(OutputKeys.METHOD, "text"); - t.setOutputProperties(p); - final Result result = new StreamResult(out); - t.transform(new StreamSource(in), result); - } - -} diff --git a/src/java/org/apache/poi/extractor/POIOLE2TextExtractor.java b/src/java/org/apache/poi/extractor/POIOLE2TextExtractor.java new file mode 100644 index 0000000000..465de40375 --- /dev/null +++ b/src/java/org/apache/poi/extractor/POIOLE2TextExtractor.java @@ -0,0 +1,113 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.extractor; + +import org.apache.poi.POIDocument; +import org.apache.poi.hpsf.DocumentSummaryInformation; +import org.apache.poi.hpsf.SummaryInformation; +import org.apache.poi.hpsf.extractor.HPSFPropertiesExtractor; +import org.apache.poi.poifs.filesystem.DirectoryEntry; + +/** + * Common Parent for OLE2 based Text Extractors + * of POI Documents, such as .doc, .xls + * You will typically find the implementation of + * a given format's text extractor under + * org.apache.poi.[format].extractor . + * + * @see org.apache.poi.hssf.extractor.ExcelExtractor + * @see org.apache.poi.hslf.extractor.PowerPointExtractor + * @see org.apache.poi.hdgf.extractor.VisioTextExtractor + * @see org.apache.poi.hwpf.extractor.WordExtractor + */ +public abstract class POIOLE2TextExtractor extends POITextExtractor { + /** The POIDocument that's open */ + protected POIDocument document; + + /** + * Creates a new text extractor for the given document + * + * @param document The POIDocument to use in this extractor. + */ + public POIOLE2TextExtractor(POIDocument document) { + this.document = document; + + // Ensure any underlying resources, such as open files, + // will get cleaned up if the user calls #close() + setFilesystem(document); + } + + /** + * Creates a new text extractor, using the same + * document as another text extractor. Normally + * only used by properties extractors. + * + * @param otherExtractor the extractor which document to be used + */ + protected POIOLE2TextExtractor(POIOLE2TextExtractor otherExtractor) { + this.document = otherExtractor.document; + } + + /** + * Returns the document information metadata for the document + * + * @return The Document Summary Information or null + * if it could not be read for this document. + */ + public DocumentSummaryInformation getDocSummaryInformation() { + return document.getDocumentSummaryInformation(); + } + /** + * Returns the summary information metadata for the document. + * + * @return The Summary information for the document or null + * if it could not be read for this document. + */ + public SummaryInformation getSummaryInformation() { + return document.getSummaryInformation(); + } + + /** + * Returns an HPSF powered text extractor for the + * document properties metadata, such as title and author. + * + * @return an instance of POIExtractor that can extract meta-data. + */ + @Override + public POITextExtractor getMetadataTextExtractor() { + return new HPSFPropertiesExtractor(this); + } + + /** + * Return the underlying DirectoryEntry of this document. + * + * @return the DirectoryEntry that is associated with the POIDocument of this extractor. + */ + public DirectoryEntry getRoot() { + return document.getDirectory(); + } + + /** + * Return the underlying POIDocument + * + * @return the underlying POIDocument + */ + @Override + public POIDocument getDocument() { + return document; + } +} \ No newline at end of file diff --git a/src/java/org/apache/poi/extractor/POITextExtractor.java b/src/java/org/apache/poi/extractor/POITextExtractor.java new file mode 100644 index 0000000000..e32adcb12c --- /dev/null +++ b/src/java/org/apache/poi/extractor/POITextExtractor.java @@ -0,0 +1,82 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.extractor; + +import java.io.Closeable; +import java.io.IOException; + +/** + * Common Parent for Text Extractors + * of POI Documents. + * You will typically find the implementation of + * a given format's text extractor under + * org.apache.poi.[format].extractor . + * + * @see org.apache.poi.hssf.extractor.ExcelExtractor + * @see org.apache.poi.hslf.extractor.PowerPointExtractor + * @see org.apache.poi.hdgf.extractor.VisioTextExtractor + * @see org.apache.poi.hwpf.extractor.WordExtractor + */ +public abstract class POITextExtractor implements Closeable { + private Closeable fsToClose; + + /** + * Retrieves all the text from the document. + * How cells, paragraphs etc are separated in the text + * is implementation specific - see the javadocs for + * a specific project for details. + * @return All the text from the document + */ + public abstract String getText(); + + /** + * Returns another text extractor, which is able to + * output the textual content of the document + * metadata / properties, such as author and title. + * + * @return the metadata and text extractor + */ + public abstract POITextExtractor getMetadataTextExtractor(); + + /** + * Used to ensure file handle cleanup. + * + * @param fs filesystem to close + */ + public void setFilesystem(Closeable fs) { + fsToClose = fs; + } + + /** + * Allows to free resources of the Extractor as soon as + * it is not needed any more. This may include closing + * open file handles and freeing memory. + * + * The Extractor cannot be used after close has been called. + */ + @Override + public void close() throws IOException { + if(fsToClose != null) { + fsToClose.close(); + } + } + + /** + * @return the processed document + */ + public abstract Object getDocument(); +} diff --git a/src/java/org/apache/poi/hssf/usermodel/HSSFChart.java b/src/java/org/apache/poi/hssf/usermodel/HSSFChart.java new file mode 100644 index 0000000000..f91781dc64 --- /dev/null +++ b/src/java/org/apache/poi/hssf/usermodel/HSSFChart.java @@ -0,0 +1,1371 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.hssf.usermodel; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +import org.apache.poi.hssf.record.BOFRecord; +import org.apache.poi.hssf.record.DimensionsRecord; +import org.apache.poi.hssf.record.EOFRecord; +import org.apache.poi.hssf.record.FooterRecord; +import org.apache.poi.hssf.record.HCenterRecord; +import org.apache.poi.hssf.record.HeaderRecord; +import org.apache.poi.hssf.record.PrintSetupRecord; +import org.apache.poi.hssf.record.ProtectRecord; +import org.apache.poi.hssf.record.Record; +import org.apache.poi.hssf.record.RecordBase; +import org.apache.poi.hssf.record.SCLRecord; +import org.apache.poi.hssf.record.UnknownRecord; +import org.apache.poi.hssf.record.VCenterRecord; +import org.apache.poi.hssf.record.chart.AreaFormatRecord; +import org.apache.poi.hssf.record.chart.AxisLineFormatRecord; +import org.apache.poi.hssf.record.chart.AxisOptionsRecord; +import org.apache.poi.hssf.record.chart.AxisParentRecord; +import org.apache.poi.hssf.record.chart.AxisRecord; +import org.apache.poi.hssf.record.chart.AxisUsedRecord; +import org.apache.poi.hssf.record.chart.BarRecord; +import org.apache.poi.hssf.record.chart.BeginRecord; +import org.apache.poi.hssf.record.chart.CategorySeriesAxisRecord; +import org.apache.poi.hssf.record.chart.ChartFormatRecord; +import org.apache.poi.hssf.record.chart.ChartRecord; +import org.apache.poi.hssf.record.chart.ChartTitleFormatRecord; +import org.apache.poi.hssf.record.chart.DataFormatRecord; +import org.apache.poi.hssf.record.chart.DefaultDataLabelTextPropertiesRecord; +import org.apache.poi.hssf.record.chart.EndRecord; +import org.apache.poi.hssf.record.chart.FontBasisRecord; +import org.apache.poi.hssf.record.chart.FontIndexRecord; +import org.apache.poi.hssf.record.chart.FrameRecord; +import org.apache.poi.hssf.record.chart.LegendRecord; +import org.apache.poi.hssf.record.chart.LineFormatRecord; +import org.apache.poi.hssf.record.chart.LinkedDataRecord; +import org.apache.poi.hssf.record.chart.PlotAreaRecord; +import org.apache.poi.hssf.record.chart.PlotGrowthRecord; +import org.apache.poi.hssf.record.chart.SeriesIndexRecord; +import org.apache.poi.hssf.record.chart.SeriesRecord; +import org.apache.poi.hssf.record.chart.SeriesTextRecord; +import org.apache.poi.hssf.record.chart.SeriesToChartGroupRecord; +import org.apache.poi.hssf.record.chart.SheetPropertiesRecord; +import org.apache.poi.hssf.record.chart.TextRecord; +import org.apache.poi.hssf.record.chart.TickRecord; +import org.apache.poi.hssf.record.chart.UnitsRecord; +import org.apache.poi.hssf.record.chart.ValueRangeRecord; +import org.apache.poi.ss.formula.ptg.Area3DPtg; +import org.apache.poi.ss.formula.ptg.AreaPtgBase; +import org.apache.poi.ss.formula.ptg.Ptg; +import org.apache.poi.ss.util.CellRangeAddress; +import org.apache.poi.ss.util.CellRangeAddressBase; + +/** + * Has methods for construction of a chart object. + * + * @author Glen Stampoultzis (glens at apache.org) + */ +public final class HSSFChart { + private HSSFSheet sheet; + private ChartRecord chartRecord; + + private LegendRecord legendRecord; + @SuppressWarnings("unused") + private ChartTitleFormatRecord chartTitleFormat; + private SeriesTextRecord chartTitleText; + private List valueRanges = new ArrayList<>(); + + private HSSFChartType type = HSSFChartType.Unknown; + + private List series = new ArrayList<>(); + + public enum HSSFChartType { + Area { + @Override + public short getSid() { + return 0x101A; + } + }, + Bar { + @Override + public short getSid() { + return 0x1017; + } + }, + Line { + @Override + public short getSid() { + return 0x1018; + } + }, + Pie { + @Override + public short getSid() { + return 0x1019; + } + }, + Scatter { + @Override + public short getSid() { + return 0x101B; + } + }, + Unknown { + @Override + public short getSid() { + return 0; + } + }; + + public abstract short getSid(); + } + + private HSSFChart(HSSFSheet sheet, ChartRecord chartRecord) { + this.chartRecord = chartRecord; + this.sheet = sheet; + } + + /** + * Creates a bar chart. API needs some work. :) + *

+ * NOTE: Does not yet work... checking it in just so others + * can take a look. + */ + public void createBarChart( HSSFWorkbook workbook, HSSFSheet parentSheet ) + { + + List records = new ArrayList<>(); + records.add( createMSDrawingObjectRecord() ); + records.add( createOBJRecord() ); + records.add( createBOFRecord() ); + records.add(new HeaderRecord("")); + records.add(new FooterRecord("")); + records.add( createHCenterRecord() ); + records.add( createVCenterRecord() ); + records.add( createPrintSetupRecord() ); + // unknown 33 + records.add( createFontBasisRecord1() ); + records.add( createFontBasisRecord2() ); + records.add(new ProtectRecord(false)); + records.add( createUnitsRecord() ); + records.add( createChartRecord( 0, 0, 30434904, 19031616 ) ); + records.add( createBeginRecord() ); + records.add( createSCLRecord( (short) 1, (short) 1 ) ); + records.add( createPlotGrowthRecord( 65536, 65536 ) ); + records.add( createFrameRecord1() ); + records.add( createBeginRecord() ); + records.add( createLineFormatRecord(true) ); + records.add( createAreaFormatRecord1() ); + records.add( createEndRecord() ); + records.add( createSeriesRecord() ); + records.add( createBeginRecord() ); + records.add( createTitleLinkedDataRecord() ); + records.add( createValuesLinkedDataRecord() ); + records.add( createCategoriesLinkedDataRecord() ); + records.add( createDataFormatRecord() ); + // records.add(createBeginRecord()); + // unknown + // records.add(createEndRecord()); + records.add( createSeriesToChartGroupRecord() ); + records.add( createEndRecord() ); + records.add( createSheetPropsRecord() ); + records.add( createDefaultTextRecord( DefaultDataLabelTextPropertiesRecord.CATEGORY_DATA_TYPE_ALL_TEXT_CHARACTERISTIC ) ); + records.add( createAllTextRecord() ); + records.add( createBeginRecord() ); + // unknown + records.add( createFontIndexRecord( 5 ) ); + records.add( createDirectLinkRecord() ); + records.add( createEndRecord() ); + records.add( createDefaultTextRecord( (short) 3 ) ); // eek, undocumented text type + records.add( createUnknownTextRecord() ); + records.add( createBeginRecord() ); + records.add( createFontIndexRecord( (short) 6 ) ); + records.add( createDirectLinkRecord() ); + records.add( createEndRecord() ); + + records.add( createAxisUsedRecord( (short) 1 ) ); + createAxisRecords( records ); + + records.add( createEndRecord() ); + records.add( createDimensionsRecord() ); + records.add( createSeriesIndexRecord(2) ); + records.add( createSeriesIndexRecord(1) ); + records.add( createSeriesIndexRecord(3) ); + records.add(EOFRecord.instance); + + + + parentSheet.insertChartRecords( records ); + workbook.insertChartRecord(); + } + + /** + * Returns all the charts for the given sheet. + * + * NOTE: You won't be able to do very much with + * these charts yet, as this is very limited support + */ + public static HSSFChart[] getSheetCharts(HSSFSheet sheet) { + List charts = new ArrayList<>(); + HSSFChart lastChart = null; + HSSFSeries lastSeries = null; + // Find records of interest + List records = sheet.getSheet().getRecords(); + for(RecordBase r : records) { + + if(r instanceof ChartRecord) { + lastSeries = null; + lastChart = new HSSFChart(sheet,(ChartRecord)r); + charts.add(lastChart); + } else if (r instanceof LinkedDataRecord) { + LinkedDataRecord linkedDataRecord = (LinkedDataRecord) r; + if (lastSeries != null) { + lastSeries.insertData(linkedDataRecord); + } + } + + if (lastChart == null) { + continue; + } + + if (r instanceof LegendRecord) { + lastChart.legendRecord = (LegendRecord)r; + } else if(r instanceof SeriesRecord) { + HSSFSeries series = new HSSFSeries( (SeriesRecord)r ); + lastChart.series.add(series); + lastSeries = series; + } else if(r instanceof ChartTitleFormatRecord) { + lastChart.chartTitleFormat = (ChartTitleFormatRecord)r; + } else if(r instanceof SeriesTextRecord) { + // Applies to a series, unless we've seen a legend already + SeriesTextRecord str = (SeriesTextRecord)r; + if(lastChart.legendRecord == null && lastChart.series.size() > 0) { + HSSFSeries series = lastChart.series.get(lastChart.series.size()-1); + series.seriesTitleText = str; + } else { + lastChart.chartTitleText = str; + } + } else if(r instanceof ValueRangeRecord){ + lastChart.valueRanges.add((ValueRangeRecord)r); + } else if (r instanceof Record) { + Record record = (Record) r; + for (HSSFChartType type : HSSFChartType.values()) { + if (type == HSSFChartType.Unknown) { + continue; + } + if (record.getSid() == type.getSid()) { + lastChart.type = type; + break; + } + } + } + } + + return charts.toArray( new HSSFChart[charts.size()] ); + } + + /** Get the X offset of the chart */ + public int getChartX() { return chartRecord.getX(); } + /** Get the Y offset of the chart */ + public int getChartY() { return chartRecord.getY(); } + /** Get the width of the chart. {@link ChartRecord} */ + public int getChartWidth() { return chartRecord.getWidth(); } + /** Get the height of the chart. {@link ChartRecord} */ + public int getChartHeight() { return chartRecord.getHeight(); } + + /** Sets the X offset of the chart */ + public void setChartX(int x) { chartRecord.setX(x); } + /** Sets the Y offset of the chart */ + public void setChartY(int y) { chartRecord.setY(y); } + /** Sets the width of the chart. {@link ChartRecord} */ + public void setChartWidth(int width) { chartRecord.setWidth(width); } + /** Sets the height of the chart. {@link ChartRecord} */ + public void setChartHeight(int height) { chartRecord.setHeight(height); } + + /** + * Returns the series of the chart + */ + public HSSFSeries[] getSeries() { + return series.toArray(new HSSFSeries[series.size()]); + } + + /** + * Returns the chart's title, if there is one, + * or null if not + */ + public String getChartTitle() { + if(chartTitleText != null) { + return chartTitleText.getText(); + } + return null; + } + + /** + * Changes the chart's title, but only if there + * was one already. + * TODO - add in the records if not + */ + public void setChartTitle(String title) { + if(chartTitleText != null) { + chartTitleText.setText(title); + } else { + throw new IllegalStateException("No chart title found to change"); + } + } + + /** + * Set value range (basic Axis Options) + * @param axisIndex 0 - primary axis, 1 - secondary axis + * @param minimum minimum value; Double.NaN - automatic; null - no change + * @param maximum maximum value; Double.NaN - automatic; null - no change + * @param majorUnit major unit value; Double.NaN - automatic; null - no change + * @param minorUnit minor unit value; Double.NaN - automatic; null - no change + */ + public void setValueRange( int axisIndex, Double minimum, Double maximum, Double majorUnit, Double minorUnit){ + ValueRangeRecord valueRange = valueRanges.get( axisIndex ); + if( valueRange == null ) return; + if( minimum != null ){ + valueRange.setAutomaticMinimum(minimum.isNaN()); + valueRange.setMinimumAxisValue(minimum); + } + if( maximum != null ){ + valueRange.setAutomaticMaximum(maximum.isNaN()); + valueRange.setMaximumAxisValue(maximum); + } + if( majorUnit != null ){ + valueRange.setAutomaticMajor(majorUnit.isNaN()); + valueRange.setMajorIncrement(majorUnit); + } + if( minorUnit != null ){ + valueRange.setAutomaticMinor(minorUnit.isNaN()); + valueRange.setMinorIncrement(minorUnit); + } + } + + private SeriesIndexRecord createSeriesIndexRecord( int index ) + { + SeriesIndexRecord r = new SeriesIndexRecord(); + r.setIndex((short)index); + return r; + } + + private DimensionsRecord createDimensionsRecord() + { + DimensionsRecord r = new DimensionsRecord(); + r.setFirstRow(0); + r.setLastRow(31); + r.setFirstCol((short)0); + r.setLastCol((short)1); + return r; + } + + private HCenterRecord createHCenterRecord() + { + HCenterRecord r = new HCenterRecord(); + r.setHCenter(false); + return r; + } + + private VCenterRecord createVCenterRecord() + { + VCenterRecord r = new VCenterRecord(); + r.setVCenter(false); + return r; + } + + private PrintSetupRecord createPrintSetupRecord() + { + PrintSetupRecord r = new PrintSetupRecord(); + r.setPaperSize((short)0); + r.setScale((short)18); + r.setPageStart((short)1); + r.setFitWidth((short)1); + r.setFitHeight((short)1); + r.setLeftToRight(false); + r.setLandscape(false); + r.setValidSettings(true); + r.setNoColor(false); + r.setDraft(false); + r.setNotes(false); + r.setNoOrientation(false); + r.setUsePage(false); + r.setHResolution((short)0); + r.setVResolution((short)0); + r.setHeaderMargin(0.5); + r.setFooterMargin(0.5); + r.setCopies((short)15); // what the ?? + return r; + } + + private FontBasisRecord createFontBasisRecord1() + { + FontBasisRecord r = new FontBasisRecord(); + r.setXBasis((short)9120); + r.setYBasis((short)5640); + r.setHeightBasis((short)200); + r.setScale((short)0); + r.setIndexToFontTable((short)5); + return r; + } + + private FontBasisRecord createFontBasisRecord2() + { + FontBasisRecord r = createFontBasisRecord1(); + r.setIndexToFontTable((short)6); + return r; + } + + private BOFRecord createBOFRecord() + { + BOFRecord r = new BOFRecord(); + r.setVersion((short)600); + r.setType((short)20); + r.setBuild((short)0x1CFE); + r.setBuildYear((short)1997); + r.setHistoryBitMask(0x40C9); + r.setRequiredVersion(106); + return r; + } + + private UnknownRecord createOBJRecord() + { + byte[] data = { + (byte) 0x15, (byte) 0x00, (byte) 0x12, (byte) 0x00, (byte) 0x05, (byte) 0x00, (byte) 0x02, (byte) 0x00, (byte) 0x11, (byte) 0x60, (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0xB8, (byte) 0x03, + (byte) 0x87, (byte) 0x03, (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x00, + }; + + return new UnknownRecord( (short) 0x005D, data ); + } + + private UnknownRecord createMSDrawingObjectRecord() + { + // Since we haven't created this object yet we'll just put in the raw + // form for the moment. + + byte[] data = { + (byte)0x0F, (byte)0x00, (byte)0x02, (byte)0xF0, (byte)0xC0, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x10, (byte)0x00, (byte)0x08, (byte)0xF0, (byte)0x08, (byte)0x00, (byte)0x00, (byte)0x00, + (byte)0x02, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x02, (byte)0x04, (byte)0x00, (byte)0x00, (byte)0x0F, (byte)0x00, (byte)0x03, (byte)0xF0, (byte)0xA8, (byte)0x00, (byte)0x00, (byte)0x00, + (byte)0x0F, (byte)0x00, (byte)0x04, (byte)0xF0, (byte)0x28, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x01, (byte)0x00, (byte)0x09, (byte)0xF0, (byte)0x10, (byte)0x00, (byte)0x00, (byte)0x00, + (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, + (byte)0x02, (byte)0x00, (byte)0x0A, (byte)0xF0, (byte)0x08, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x04, (byte)0x00, (byte)0x00, (byte)0x05, (byte)0x00, (byte)0x00, (byte)0x00, + (byte)0x0F, (byte)0x00, (byte)0x04, (byte)0xF0, (byte)0x70, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x92, (byte)0x0C, (byte)0x0A, (byte)0xF0, (byte)0x08, (byte)0x00, (byte)0x00, (byte)0x00, + (byte)0x02, (byte)0x04, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x0A, (byte)0x00, (byte)0x00, (byte)0x93, (byte)0x00, (byte)0x0B, (byte)0xF0, (byte)0x36, (byte)0x00, (byte)0x00, (byte)0x00, + (byte)0x7F, (byte)0x00, (byte)0x04, (byte)0x01, (byte)0x04, (byte)0x01, (byte)0xBF, (byte)0x00, (byte)0x08, (byte)0x00, (byte)0x08, (byte)0x00, (byte)0x81, (byte)0x01, (byte)0x4E, (byte)0x00, + (byte)0x00, (byte)0x08, (byte)0x83, (byte)0x01, (byte)0x4D, (byte)0x00, (byte)0x00, (byte)0x08, (byte)0xBF, (byte)0x01, (byte)0x10, (byte)0x00, (byte)0x11, (byte)0x00, (byte)0xC0, (byte)0x01, + (byte)0x4D, (byte)0x00, (byte)0x00, (byte)0x08, (byte)0xFF, (byte)0x01, (byte)0x08, (byte)0x00, (byte)0x08, (byte)0x00, (byte)0x3F, (byte)0x02, (byte)0x00, (byte)0x00, (byte)0x02, (byte)0x00, + (byte)0xBF, (byte)0x03, (byte)0x00, (byte)0x00, (byte)0x08, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x10, (byte)0xF0, (byte)0x12, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, + (byte)0x04, (byte)0x00, (byte)0xC0, (byte)0x02, (byte)0x0A, (byte)0x00, (byte)0xF4, (byte)0x00, (byte)0x0E, (byte)0x00, (byte)0x66, (byte)0x01, (byte)0x20, (byte)0x00, (byte)0xE9, (byte)0x00, + (byte)0x00, (byte)0x00, (byte)0x11, (byte)0xF0, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00 + }; + + return new UnknownRecord((short)0x00EC, data); + } + + private void createAxisRecords( List records ) + { + records.add( createAxisParentRecord() ); + records.add( createBeginRecord() ); + records.add( createAxisRecord( AxisRecord.AXIS_TYPE_CATEGORY_OR_X_AXIS ) ); + records.add( createBeginRecord() ); + records.add( createCategorySeriesAxisRecord() ); + records.add( createAxisOptionsRecord() ); + records.add( createTickRecord1() ); + records.add( createEndRecord() ); + records.add( createAxisRecord( AxisRecord.AXIS_TYPE_VALUE_AXIS ) ); + records.add( createBeginRecord() ); + records.add( createValueRangeRecord() ); + records.add( createTickRecord2() ); + records.add( createAxisLineFormatRecord( AxisLineFormatRecord.AXIS_TYPE_MAJOR_GRID_LINE ) ); + records.add( createLineFormatRecord(false) ); + records.add( createEndRecord() ); + records.add( createPlotAreaRecord() ); + records.add( createFrameRecord2() ); + records.add( createBeginRecord() ); + records.add( createLineFormatRecord2() ); + records.add( createAreaFormatRecord2() ); + records.add( createEndRecord() ); + records.add( createChartFormatRecord() ); + records.add( createBeginRecord() ); + records.add( createBarRecord() ); + // unknown 1022 + records.add( createLegendRecord() ); + records.add( createBeginRecord() ); + // unknown 104f + records.add( createTextRecord() ); + records.add( createBeginRecord() ); + // unknown 104f + records.add( createLinkedDataRecord() ); + records.add( createEndRecord() ); + records.add( createEndRecord() ); + records.add( createEndRecord() ); + records.add( createEndRecord() ); + } + + private LinkedDataRecord createLinkedDataRecord() + { + LinkedDataRecord r = new LinkedDataRecord(); + r.setLinkType(LinkedDataRecord.LINK_TYPE_TITLE_OR_TEXT); + r.setReferenceType(LinkedDataRecord.REFERENCE_TYPE_DIRECT); + r.setCustomNumberFormat(false); + r.setIndexNumberFmtRecord((short)0); + r.setFormulaOfLink(null); + return r; + } + + private TextRecord createTextRecord() + { + TextRecord r = new TextRecord(); + r.setHorizontalAlignment(TextRecord.HORIZONTAL_ALIGNMENT_CENTER); + r.setVerticalAlignment(TextRecord.VERTICAL_ALIGNMENT_CENTER); + r.setDisplayMode((short)1); + r.setRgbColor(0x00000000); + r.setX(-37); + r.setY(-60); + r.setWidth(0); + r.setHeight(0); + r.setAutoColor(true); + r.setShowKey(false); + r.setShowValue(false); + r.setVertical(false); + r.setAutoGeneratedText(true); + r.setGenerated(true); + r.setAutoLabelDeleted(false); + r.setAutoBackground(true); + r.setRotation((short)0); + r.setShowCategoryLabelAsPercentage(false); + r.setShowValueAsPercentage(false); + r.setShowBubbleSizes(false); + r.setShowLabel(false); + r.setIndexOfColorValue((short)77); + r.setDataLabelPlacement((short)0); + r.setTextRotation((short)0); + return r; + } + + private LegendRecord createLegendRecord() + { + LegendRecord r = new LegendRecord(); + r.setXAxisUpperLeft(3542); + r.setYAxisUpperLeft(1566); + r.setXSize(437); + r.setYSize(213); + r.setType(LegendRecord.TYPE_RIGHT); + r.setSpacing(LegendRecord.SPACING_MEDIUM); + r.setAutoPosition(true); + r.setAutoSeries(true); + r.setAutoXPositioning(true); + r.setAutoYPositioning(true); + r.setVertical(true); + r.setDataTable(false); + return r; + } + + private BarRecord createBarRecord() + { + BarRecord r = new BarRecord(); + r.setBarSpace((short)0); + r.setCategorySpace((short)150); + r.setHorizontal(false); + r.setStacked(false); + r.setDisplayAsPercentage(false); + r.setShadow(false); + return r; + } + + private ChartFormatRecord createChartFormatRecord() + { + ChartFormatRecord r = new ChartFormatRecord(); + r.setXPosition(0); + r.setYPosition(0); + r.setWidth(0); + r.setHeight(0); + r.setVaryDisplayPattern(false); + return r; + } + + private PlotAreaRecord createPlotAreaRecord() + { + return new PlotAreaRecord( ); + } + + private AxisLineFormatRecord createAxisLineFormatRecord( short format ) + { + AxisLineFormatRecord r = new AxisLineFormatRecord(); + r.setAxisType( format ); + return r; + } + + private ValueRangeRecord createValueRangeRecord() + { + ValueRangeRecord r = new ValueRangeRecord(); + r.setMinimumAxisValue( 0.0 ); + r.setMaximumAxisValue( 0.0 ); + r.setMajorIncrement( 0 ); + r.setMinorIncrement( 0 ); + r.setCategoryAxisCross( 0 ); + r.setAutomaticMinimum( true ); + r.setAutomaticMaximum( true ); + r.setAutomaticMajor( true ); + r.setAutomaticMinor( true ); + r.setAutomaticCategoryCrossing( true ); + r.setLogarithmicScale( false ); + r.setValuesInReverse( false ); + r.setCrossCategoryAxisAtMaximum( false ); + r.setReserved( true ); // what's this do?? + return r; + } + + private TickRecord createTickRecord1() + { + TickRecord r = new TickRecord(); + r.setMajorTickType( (byte) 2 ); + r.setMinorTickType( (byte) 0 ); + r.setLabelPosition( (byte) 3 ); + r.setBackground( (byte) 1 ); + r.setLabelColorRgb( 0 ); + r.setZero1( (short) 0 ); + r.setZero2( (short) 0 ); + r.setZero3( (short) 45 ); + r.setAutorotate( true ); + r.setAutoTextBackground( true ); + r.setRotation( (short) 0 ); + r.setAutorotate( true ); + r.setTickColor( (short) 77 ); + return r; + } + + private TickRecord createTickRecord2() + { + TickRecord r = createTickRecord1(); + r.setZero3((short)0); + return r; + } + + private AxisOptionsRecord createAxisOptionsRecord() + { + AxisOptionsRecord r = new AxisOptionsRecord(); + r.setMinimumCategory( (short) -28644 ); + r.setMaximumCategory( (short) -28715 ); + r.setMajorUnitValue( (short) 2 ); + r.setMajorUnit( (short) 0 ); + r.setMinorUnitValue( (short) 1 ); + r.setMinorUnit( (short) 0 ); + r.setBaseUnit( (short) 0 ); + r.setCrossingPoint( (short) -28644 ); + r.setDefaultMinimum( true ); + r.setDefaultMaximum( true ); + r.setDefaultMajor( true ); + r.setDefaultMinorUnit( true ); + r.setIsDate( true ); + r.setDefaultBase( true ); + r.setDefaultCross( true ); + r.setDefaultDateSettings( true ); + return r; + } + + private CategorySeriesAxisRecord createCategorySeriesAxisRecord() + { + CategorySeriesAxisRecord r = new CategorySeriesAxisRecord(); + r.setCrossingPoint( (short) 1 ); + r.setLabelFrequency( (short) 1 ); + r.setTickMarkFrequency( (short) 1 ); + r.setValueAxisCrossing( true ); + r.setCrossesFarRight( false ); + r.setReversed( false ); + return r; + } + + private AxisRecord createAxisRecord( short axisType ) + { + AxisRecord r = new AxisRecord(); + r.setAxisType( axisType ); + return r; + } + + private AxisParentRecord createAxisParentRecord() + { + AxisParentRecord r = new AxisParentRecord(); + r.setAxisType( AxisParentRecord.AXIS_TYPE_MAIN ); + r.setX( 479 ); + r.setY( 221 ); + r.setWidth( 2995 ); + r.setHeight( 2902 ); + return r; + } + + private AxisUsedRecord createAxisUsedRecord( short numAxis ) + { + AxisUsedRecord r = new AxisUsedRecord(); + r.setNumAxis( numAxis ); + return r; + } + + private LinkedDataRecord createDirectLinkRecord() + { + LinkedDataRecord r = new LinkedDataRecord(); + r.setLinkType( LinkedDataRecord.LINK_TYPE_TITLE_OR_TEXT ); + r.setReferenceType( LinkedDataRecord.REFERENCE_TYPE_DIRECT ); + r.setCustomNumberFormat( false ); + r.setIndexNumberFmtRecord( (short) 0 ); + r.setFormulaOfLink(null); + return r; + } + + private FontIndexRecord createFontIndexRecord( int index ) + { + FontIndexRecord r = new FontIndexRecord(); + r.setFontIndex( (short) index ); + return r; + } + + private TextRecord createAllTextRecord() + { + TextRecord r = new TextRecord(); + r.setHorizontalAlignment( TextRecord.HORIZONTAL_ALIGNMENT_CENTER ); + r.setVerticalAlignment( TextRecord.VERTICAL_ALIGNMENT_CENTER ); + r.setDisplayMode( TextRecord.DISPLAY_MODE_TRANSPARENT ); + r.setRgbColor( 0 ); + r.setX( -37 ); + r.setY( -60 ); + r.setWidth( 0 ); + r.setHeight( 0 ); + r.setAutoColor( true ); + r.setShowKey( false ); + r.setShowValue( true ); + r.setVertical( false ); + r.setAutoGeneratedText( true ); + r.setGenerated( true ); + r.setAutoLabelDeleted( false ); + r.setAutoBackground( true ); + r.setRotation( (short) 0 ); + r.setShowCategoryLabelAsPercentage( false ); + r.setShowValueAsPercentage( false ); + r.setShowBubbleSizes( false ); + r.setShowLabel( false ); + r.setIndexOfColorValue( (short) 77 ); + r.setDataLabelPlacement( (short) 0 ); + r.setTextRotation( (short) 0 ); + return r; + } + + private TextRecord createUnknownTextRecord() + { + TextRecord r = new TextRecord(); + r.setHorizontalAlignment( TextRecord.HORIZONTAL_ALIGNMENT_CENTER ); + r.setVerticalAlignment( TextRecord.VERTICAL_ALIGNMENT_CENTER ); + r.setDisplayMode( TextRecord.DISPLAY_MODE_TRANSPARENT ); + r.setRgbColor( 0 ); + r.setX( -37 ); + r.setY( -60 ); + r.setWidth( 0 ); + r.setHeight( 0 ); + r.setAutoColor( true ); + r.setShowKey( false ); + r.setShowValue( false ); + r.setVertical( false ); + r.setAutoGeneratedText( true ); + r.setGenerated( true ); + r.setAutoLabelDeleted( false ); + r.setAutoBackground( true ); + r.setRotation( (short) 0 ); + r.setShowCategoryLabelAsPercentage( false ); + r.setShowValueAsPercentage( false ); + r.setShowBubbleSizes( false ); + r.setShowLabel( false ); + r.setIndexOfColorValue( (short) 77 ); + r.setDataLabelPlacement( (short) 11088 ); + r.setTextRotation( (short) 0 ); + return r; + } + + private DefaultDataLabelTextPropertiesRecord createDefaultTextRecord( short categoryDataType ) + { + DefaultDataLabelTextPropertiesRecord r = new DefaultDataLabelTextPropertiesRecord(); + r.setCategoryDataType( categoryDataType ); + return r; + } + + private SheetPropertiesRecord createSheetPropsRecord() + { + SheetPropertiesRecord r = new SheetPropertiesRecord(); + r.setChartTypeManuallyFormatted( false ); + r.setPlotVisibleOnly( true ); + r.setDoNotSizeWithWindow( false ); + r.setDefaultPlotDimensions( true ); + r.setAutoPlotArea( false ); + return r; + } + + private SeriesToChartGroupRecord createSeriesToChartGroupRecord() + { + return new SeriesToChartGroupRecord(); + } + + private DataFormatRecord createDataFormatRecord() + { + DataFormatRecord r = new DataFormatRecord(); + r.setPointNumber( (short) -1 ); + r.setSeriesIndex( (short) 0 ); + r.setSeriesNumber( (short) 0 ); + r.setUseExcel4Colors( false ); + return r; + } + + private LinkedDataRecord createCategoriesLinkedDataRecord() + { + LinkedDataRecord r = new LinkedDataRecord(); + r.setLinkType( LinkedDataRecord.LINK_TYPE_CATEGORIES ); + r.setReferenceType( LinkedDataRecord.REFERENCE_TYPE_WORKSHEET ); + r.setCustomNumberFormat( false ); + r.setIndexNumberFmtRecord( (short) 0 ); + Area3DPtg p = new Area3DPtg(0, 31, 1, 1, + false, false, false, false, 0); + r.setFormulaOfLink(new Ptg[] { p, }); + return r; + } + + private LinkedDataRecord createValuesLinkedDataRecord() + { + LinkedDataRecord r = new LinkedDataRecord(); + r.setLinkType( LinkedDataRecord.LINK_TYPE_VALUES ); + r.setReferenceType( LinkedDataRecord.REFERENCE_TYPE_WORKSHEET ); + r.setCustomNumberFormat( false ); + r.setIndexNumberFmtRecord( (short) 0 ); + Area3DPtg p = new Area3DPtg(0, 31, 0, 0, + false, false, false, false, 0); + r.setFormulaOfLink(new Ptg[] { p, }); + return r; + } + + private LinkedDataRecord createTitleLinkedDataRecord() + { + LinkedDataRecord r = new LinkedDataRecord(); + r.setLinkType( LinkedDataRecord.LINK_TYPE_TITLE_OR_TEXT ); + r.setReferenceType( LinkedDataRecord.REFERENCE_TYPE_DIRECT ); + r.setCustomNumberFormat( false ); + r.setIndexNumberFmtRecord( (short) 0 ); + r.setFormulaOfLink(null); + return r; + } + + private SeriesRecord createSeriesRecord() + { + SeriesRecord r = new SeriesRecord(); + r.setCategoryDataType( SeriesRecord.CATEGORY_DATA_TYPE_NUMERIC ); + r.setValuesDataType( SeriesRecord.VALUES_DATA_TYPE_NUMERIC ); + r.setNumCategories( (short) 32 ); + r.setNumValues( (short) 31 ); + r.setBubbleSeriesType( SeriesRecord.BUBBLE_SERIES_TYPE_NUMERIC ); + r.setNumBubbleValues( (short) 0 ); + return r; + } + + private EndRecord createEndRecord() + { + return new EndRecord(); + } + + private AreaFormatRecord createAreaFormatRecord1() + { + AreaFormatRecord r = new AreaFormatRecord(); + r.setForegroundColor( 16777215 ); // RGB Color + r.setBackgroundColor( 0 ); // RGB Color + r.setPattern( (short) 1 ); // TODO: Add Pattern constants to record + r.setAutomatic( true ); + r.setInvert( false ); + r.setForecolorIndex( (short) 78 ); + r.setBackcolorIndex( (short) 77 ); + return r; + } + + private AreaFormatRecord createAreaFormatRecord2() + { + AreaFormatRecord r = new AreaFormatRecord(); + r.setForegroundColor(0x00c0c0c0); + r.setBackgroundColor(0x00000000); + r.setPattern((short)1); + r.setAutomatic(false); + r.setInvert(false); + r.setForecolorIndex((short)22); + r.setBackcolorIndex((short)79); + return r; + } + + private LineFormatRecord createLineFormatRecord( boolean drawTicks ) + { + LineFormatRecord r = new LineFormatRecord(); + r.setLineColor( 0 ); + r.setLinePattern( LineFormatRecord.LINE_PATTERN_SOLID ); + r.setWeight( (short) -1 ); + r.setAuto( true ); + r.setDrawTicks( drawTicks ); + r.setColourPaletteIndex( (short) 77 ); // what colour is this? + return r; + } + + private LineFormatRecord createLineFormatRecord2() + { + LineFormatRecord r = new LineFormatRecord(); + r.setLineColor( 0x00808080 ); + r.setLinePattern( (short) 0 ); + r.setWeight( (short) 0 ); + r.setAuto( false ); + r.setDrawTicks( false ); + r.setUnknown( false ); + r.setColourPaletteIndex( (short) 23 ); + return r; + } + + private FrameRecord createFrameRecord1() + { + FrameRecord r = new FrameRecord(); + r.setBorderType( FrameRecord.BORDER_TYPE_REGULAR ); + r.setAutoSize( false ); + r.setAutoPosition( true ); + return r; + } + + private FrameRecord createFrameRecord2() + { + FrameRecord r = new FrameRecord(); + r.setBorderType( FrameRecord.BORDER_TYPE_REGULAR ); + r.setAutoSize( true ); + r.setAutoPosition( true ); + return r; + } + + private PlotGrowthRecord createPlotGrowthRecord( int horizScale, int vertScale ) + { + PlotGrowthRecord r = new PlotGrowthRecord(); + r.setHorizontalScale( horizScale ); + r.setVerticalScale( vertScale ); + return r; + } + + private SCLRecord createSCLRecord( short numerator, short denominator ) + { + SCLRecord r = new SCLRecord(); + r.setDenominator( denominator ); + r.setNumerator( numerator ); + return r; + } + + private BeginRecord createBeginRecord() + { + return new BeginRecord(); + } + + private ChartRecord createChartRecord( int x, int y, int width, int height ) + { + ChartRecord r = new ChartRecord(); + r.setX( x ); + r.setY( y ); + r.setWidth( width ); + r.setHeight( height ); + return r; + } + + private UnitsRecord createUnitsRecord() + { + UnitsRecord r = new UnitsRecord(); + r.setUnits( (short) 0 ); + return r; + } + + + /** + * A series in a chart + */ + public static class HSSFSeries { + private SeriesRecord series; + private SeriesTextRecord seriesTitleText; + private LinkedDataRecord dataName; + private LinkedDataRecord dataValues; + private LinkedDataRecord dataCategoryLabels; + private LinkedDataRecord dataSecondaryCategoryLabels; + + /* package */ HSSFSeries(SeriesRecord series) { + this.series = series; + } + + /* package */ void insertData(LinkedDataRecord data){ + switch(data.getLinkType()){ + + case LinkedDataRecord.LINK_TYPE_TITLE_OR_TEXT: + dataName = data; + break; + case LinkedDataRecord.LINK_TYPE_VALUES: + dataValues = data; + break; + case LinkedDataRecord.LINK_TYPE_CATEGORIES: + dataCategoryLabels = data; + break; + case LinkedDataRecord.LINK_TYPE_SECONDARY_CATEGORIES: + dataSecondaryCategoryLabels = data; + break; + default: + throw new IllegalStateException("Invalid link type: " + data.getLinkType()); + } + } + + /* package */ void setSeriesTitleText(SeriesTextRecord seriesTitleText) + { + this.seriesTitleText = seriesTitleText; + } + + public short getNumValues() { + return series.getNumValues(); + } + /** + * See {@link SeriesRecord} + */ + public short getValueType() { + return series.getValuesDataType(); + } + + /** + * Returns the series' title, if there is one, + * or null if not + */ + public String getSeriesTitle() { + if(seriesTitleText != null) { + return seriesTitleText.getText(); + } + return null; + } + + /** + * Changes the series' title, but only if there + * was one already. + * TODO - add in the records if not + */ + public void setSeriesTitle(String title) { + if(seriesTitleText != null) { + seriesTitleText.setText(title); + } else { + throw new IllegalStateException("No series title found to change"); + } + } + + /** + * @return record with data names + */ + public LinkedDataRecord getDataName(){ + return dataName; + } + + /** + * @return record with data values + */ + public LinkedDataRecord getDataValues(){ + return dataValues; + } + + /** + * @return record with data category labels + */ + public LinkedDataRecord getDataCategoryLabels(){ + return dataCategoryLabels; + } + + /** + * @return record with data secondary category labels + */ + public LinkedDataRecord getDataSecondaryCategoryLabels() { + return dataSecondaryCategoryLabels; + } + + /** + * @return record with series + */ + public SeriesRecord getSeries() { + return series; + } + + private CellRangeAddressBase getCellRange(LinkedDataRecord linkedDataRecord) { + if (linkedDataRecord == null) + { + return null ; + } + + int firstRow = 0; + int lastRow = 0; + int firstCol = 0; + int lastCol = 0; + + for (Ptg ptg : linkedDataRecord.getFormulaOfLink()) { + if (ptg instanceof AreaPtgBase) { + AreaPtgBase areaPtg = (AreaPtgBase) ptg; + + firstRow = areaPtg.getFirstRow(); + lastRow = areaPtg.getLastRow(); + + firstCol = areaPtg.getFirstColumn(); + lastCol = areaPtg.getLastColumn(); + } + } + + return new CellRangeAddress(firstRow, lastRow, firstCol, lastCol); + } + + public CellRangeAddressBase getValuesCellRange() { + return getCellRange(dataValues); + } + + public CellRangeAddressBase getCategoryLabelsCellRange() { + return getCellRange(dataCategoryLabels); + } + + private Integer setVerticalCellRange(LinkedDataRecord linkedDataRecord, + CellRangeAddressBase range) { + if (linkedDataRecord == null) + { + return null; + } + + List ptgList = new ArrayList<>(); + + int rowCount = (range.getLastRow() - range.getFirstRow()) + 1; + int colCount = (range.getLastColumn() - range.getFirstColumn()) + 1; + + for (Ptg ptg : linkedDataRecord.getFormulaOfLink()) { + if (ptg instanceof AreaPtgBase) { + AreaPtgBase areaPtg = (AreaPtgBase) ptg; + + areaPtg.setFirstRow(range.getFirstRow()); + areaPtg.setLastRow(range.getLastRow()); + + areaPtg.setFirstColumn(range.getFirstColumn()); + areaPtg.setLastColumn(range.getLastColumn()); + ptgList.add(areaPtg); + } + } + + linkedDataRecord.setFormulaOfLink(ptgList.toArray(new Ptg[ptgList.size()])); + + return rowCount * colCount; + } + + public void setValuesCellRange(CellRangeAddressBase range) { + Integer count = setVerticalCellRange(dataValues, range); + if (count == null) + { + return; + } + + series.setNumValues((short)(int)count); + } + + public void setCategoryLabelsCellRange(CellRangeAddressBase range) { + Integer count = setVerticalCellRange(dataCategoryLabels, range); + if (count == null) + { + return; + } + + series.setNumCategories((short)(int)count); + } + } + + public HSSFSeries createSeries() throws Exception { + ArrayList seriesTemplate = new ArrayList<>(); + boolean seriesTemplateFilled = false; + + int idx = 0; + int deep = 0; + int chartRecordIdx = -1; + int chartDeep = -1; + int lastSeriesDeep = -1; + int endSeriesRecordIdx = -1; + int seriesIdx = 0; + final List records = sheet.getSheet().getRecords(); + + /* store first series as template and find last series index */ + for(final RecordBase record : records) { + + idx++; + + if (record instanceof BeginRecord) { + deep++; + } else if (record instanceof EndRecord) { + deep--; + + if (lastSeriesDeep == deep) { + lastSeriesDeep = -1; + endSeriesRecordIdx = idx; + if (!seriesTemplateFilled) { + seriesTemplate.add(record); + seriesTemplateFilled = true; + } + } + + if (chartDeep == deep) { + break; + } + } + + if (record instanceof ChartRecord) { + if (record == chartRecord) { + chartRecordIdx = idx; + chartDeep = deep; + } + } else if (record instanceof SeriesRecord) { + if (chartRecordIdx != -1) { + seriesIdx++; + lastSeriesDeep = deep; + } + } + + if (lastSeriesDeep != -1 && !seriesTemplateFilled) { + seriesTemplate.add(record) ; + } + } + + /* check if a series was found */ + if (endSeriesRecordIdx == -1) { + return null; + } + + /* next index in the records list where the new series can be inserted */ + idx = endSeriesRecordIdx + 1; + + HSSFSeries newSeries = null; + + /* duplicate record of the template series */ + ArrayList clonedRecords = new ArrayList<>(); + for(final RecordBase record : seriesTemplate) { + + Record newRecord = null; + + if (record instanceof BeginRecord) { + newRecord = new BeginRecord(); + } else if (record instanceof EndRecord) { + newRecord = new EndRecord(); + } else if (record instanceof SeriesRecord) { + SeriesRecord seriesRecord = (SeriesRecord) ((SeriesRecord)record).clone(); + newSeries = new HSSFSeries(seriesRecord); + newRecord = seriesRecord; + } else if (record instanceof LinkedDataRecord) { + LinkedDataRecord linkedDataRecord = ((LinkedDataRecord)record).clone(); + if (newSeries != null) { + newSeries.insertData(linkedDataRecord); + } + newRecord = linkedDataRecord; + } else if (record instanceof DataFormatRecord) { + DataFormatRecord dataFormatRecord = ((DataFormatRecord)record).clone(); + + dataFormatRecord.setSeriesIndex((short)seriesIdx) ; + dataFormatRecord.setSeriesNumber((short)seriesIdx) ; + + newRecord = dataFormatRecord; + } else if (record instanceof SeriesTextRecord) { + SeriesTextRecord seriesTextRecord = (SeriesTextRecord) ((SeriesTextRecord)record).clone(); + if (newSeries != null) { + newSeries.setSeriesTitleText(seriesTextRecord); + } + newRecord = seriesTextRecord; + } else if (record instanceof Record) { + newRecord = (Record) ((Record)record).clone(); + } + + if (newRecord != null) + { + clonedRecords.add(newRecord); + } + } + + /* check if a user model series object was created */ + if (newSeries == null) + { + return null; + } + + /* transfer series to record list */ + for(final RecordBase record : clonedRecords) { + records.add(idx++, record); + } + + return newSeries; + } + + public boolean removeSeries(HSSFSeries remSeries) { + int deep = 0; + int chartDeep = -1; + int lastSeriesDeep = -1; + int seriesIdx = -1; + boolean removeSeries = false; + boolean chartEntered = false; + boolean result = false; + final List records = sheet.getSheet().getRecords(); + + /* store first series as template and find last series index */ + Iterator iter = records.iterator(); + while (iter.hasNext()) { + RecordBase record = iter.next(); + + if (record instanceof BeginRecord) { + deep++; + } else if (record instanceof EndRecord) { + deep--; + + if (lastSeriesDeep == deep) { + lastSeriesDeep = -1; + + if (removeSeries) { + removeSeries = false; + result = true; + iter.remove(); + } + } + + if (chartDeep == deep) { + break; + } + } + + if (record instanceof ChartRecord) { + if (record == chartRecord) { + chartDeep = deep; + chartEntered = true; + } + } else if (record instanceof SeriesRecord) { + if (chartEntered) { + if (remSeries.series == record) { + lastSeriesDeep = deep; + removeSeries = true; + } else { + seriesIdx++; + } + } + } else if (record instanceof DataFormatRecord) { + if (chartEntered && !removeSeries) { + DataFormatRecord dataFormatRecord = (DataFormatRecord) record; + dataFormatRecord.setSeriesIndex((short) seriesIdx); + dataFormatRecord.setSeriesNumber((short) seriesIdx); + } + } + + if (removeSeries) { + iter.remove(); + } + } + + return result; + } + + public HSSFChartType getType() { + return type; + } +} diff --git a/src/java/org/apache/poi/ss/extractor/EmbeddedData.java b/src/java/org/apache/poi/ss/extractor/EmbeddedData.java new file mode 100644 index 0000000000..0e598b3175 --- /dev/null +++ b/src/java/org/apache/poi/ss/extractor/EmbeddedData.java @@ -0,0 +1,104 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.ss.extractor; + +import org.apache.poi.ss.usermodel.Shape; + +/** + * A collection of embedded object informations and content + */ +public class EmbeddedData { + private String filename; + private byte[] embeddedData; + private Shape shape; + private String contentType = "binary/octet-stream"; + + public EmbeddedData(String filename, byte[] embeddedData, String contentType) { + setFilename(filename); + setEmbeddedData(embeddedData); + setContentType(contentType); + } + + /** + * @return the filename + */ + public String getFilename() { + return filename; + } + + /** + * Sets the filename + * + * @param filename the filename + */ + public void setFilename(String filename) { + if (filename == null) { + this.filename = "unknown.bin"; + } else { + this.filename = filename.replaceAll("[^/\\\\]*[/\\\\]", "").trim(); + } + } + + /** + * @return the embedded object byte array + */ + public byte[] getEmbeddedData() { + return embeddedData; + } + + /** + * Sets the embedded object as byte array + * + * @param embeddedData the embedded object byte array + */ + public void setEmbeddedData(byte[] embeddedData) { + this.embeddedData = (embeddedData == null) ? null : embeddedData.clone(); + } + + /** + * @return the shape which links to the embedded object + */ + public Shape getShape() { + return shape; + } + + /** + * Sets the shape which links to the embedded object + * + * @param shape the shape + */ + public void setShape(Shape shape) { + this.shape = shape; + } + + /** + * @return the content-/mime-type of the embedded object, the default (if unknown) is {@code binary/octet-stream} + */ + public String getContentType() { + return contentType; + } + + /** + * Sets the content-/mime-type + * + * @param contentType the content-type + */ + public void setContentType(String contentType) { + this.contentType = contentType; + } +} \ No newline at end of file diff --git a/src/java/org/apache/poi/ss/extractor/EmbeddedExtractor.java b/src/java/org/apache/poi/ss/extractor/EmbeddedExtractor.java new file mode 100644 index 0000000000..965a4d1a9e --- /dev/null +++ b/src/java/org/apache/poi/ss/extractor/EmbeddedExtractor.java @@ -0,0 +1,405 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.ss.extractor; + +import static org.apache.poi.util.StringUtil.endsWithIgnoreCase; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; + +import org.apache.poi.hpsf.ClassID; +import org.apache.poi.hpsf.ClassIDPredefined; +import org.apache.poi.poifs.filesystem.DirectoryNode; +import org.apache.poi.poifs.filesystem.DocumentInputStream; +import org.apache.poi.poifs.filesystem.Entry; +import org.apache.poi.poifs.filesystem.Ole10Native; +import org.apache.poi.poifs.filesystem.Ole10NativeException; +import org.apache.poi.poifs.filesystem.POIFSFileSystem; +import org.apache.poi.ss.usermodel.Drawing; +import org.apache.poi.ss.usermodel.ObjectData; +import org.apache.poi.ss.usermodel.Picture; +import org.apache.poi.ss.usermodel.PictureData; +import org.apache.poi.ss.usermodel.Shape; +import org.apache.poi.ss.usermodel.ShapeContainer; +import org.apache.poi.ss.usermodel.Sheet; +import org.apache.poi.ss.usermodel.Workbook; +import org.apache.poi.util.Beta; +import org.apache.poi.util.IOUtils; +import org.apache.poi.util.LocaleUtil; +import org.apache.poi.util.POILogFactory; +import org.apache.poi.util.POILogger; + +/** + * This extractor class tries to identify various embedded documents within Excel files + * and provide them via a common interface, i.e. the EmbeddedData instances + */ +@Beta +public class EmbeddedExtractor implements Iterable { + private static final POILogger LOG = POILogFactory.getLogger(EmbeddedExtractor.class); + //arbitrarily selected; may need to increase + private static final int MAX_RECORD_LENGTH = 1_000_000; + + // contentType + private static final String CONTENT_TYPE_BYTES = "binary/octet-stream"; + private static final String CONTENT_TYPE_PDF = "application/pdf"; + private static final String CONTENT_TYPE_DOC = "application/msword"; + private static final String CONTENT_TYPE_XLS = "application/vnd.ms-excel"; + + /** + * @return the list of known extractors, if you provide custom extractors, override this method + */ + @Override + public Iterator iterator() { + EmbeddedExtractor[] ee = { + new Ole10Extractor(), new PdfExtractor(), new BiffExtractor(), new OOXMLExtractor(), new FsExtractor() + }; + return Arrays.asList(ee).iterator(); + } + + public EmbeddedData extractOne(DirectoryNode src) throws IOException { + for (EmbeddedExtractor ee : this) { + if (ee.canExtract(src)) { + return ee.extract(src); + } + } + return null; + } + + public EmbeddedData extractOne(Picture src) throws IOException { + for (EmbeddedExtractor ee : this) { + if (ee.canExtract(src)) { + return ee.extract(src); + } + } + return null; + } + + public List extractAll(Sheet sheet) throws IOException { + Drawing patriarch = sheet.getDrawingPatriarch(); + if (null == patriarch){ + return Collections.emptyList(); + } + List embeddings = new ArrayList<>(); + extractAll(patriarch, embeddings); + return embeddings; + } + + protected void extractAll(ShapeContainer parent, List embeddings) throws IOException { + for (Shape shape : parent) { + EmbeddedData data = null; + if (shape instanceof ObjectData) { + ObjectData od = (ObjectData)shape; + try { + if (od.hasDirectoryEntry()) { + data = extractOne((DirectoryNode)od.getDirectory()); + } else { + data = new EmbeddedData(od.getFileName(), od.getObjectData(), od.getContentType()); + } + } catch (Exception e) { + LOG.log(POILogger.WARN, "Entry not found / readable - ignoring OLE embedding", e); + } + } else if (shape instanceof Picture) { + data = extractOne((Picture)shape); + } else if (shape instanceof ShapeContainer) { + extractAll((ShapeContainer)shape, embeddings); + } + + if (data == null) { + continue; + } + + data.setShape(shape); + String filename = data.getFilename(); + String extension = (filename == null || filename.lastIndexOf('.') == -1) ? ".bin" : filename.substring(filename.lastIndexOf('.')); + + // try to find an alternative name + if (filename == null || filename.isEmpty() || filename.startsWith("MBD") || filename.startsWith("Root Entry")) { + filename = shape.getShapeName(); + if (filename != null) { + filename += extension; + } + } + // default to dummy name + if (filename == null || filename.isEmpty()) { + filename = "picture_" + embeddings.size() + extension; + } + filename = filename.trim(); + data.setFilename(filename); + + embeddings.add(data); + } + } + + + public boolean canExtract(DirectoryNode source) { + return false; + } + + public boolean canExtract(Picture source) { + return false; + } + + protected EmbeddedData extract(DirectoryNode dn) throws IOException { + assert(canExtract(dn)); + ByteArrayOutputStream bos = new ByteArrayOutputStream(20000); + try (POIFSFileSystem dest = new POIFSFileSystem()) { + copyNodes(dn, dest.getRoot()); + // start with a reasonable big size + dest.writeFilesystem(bos); + } + + return new EmbeddedData(dn.getName(), bos.toByteArray(), CONTENT_TYPE_BYTES); + } + + protected EmbeddedData extract(Picture source) throws IOException { + return null; + } + + public static class Ole10Extractor extends EmbeddedExtractor { + @Override + public boolean canExtract(DirectoryNode dn) { + ClassID clsId = dn.getStorageClsid(); + return ClassIDPredefined.lookup(clsId) == ClassIDPredefined.OLE_V1_PACKAGE; + } + + @Override + public EmbeddedData extract(DirectoryNode dn) throws IOException { + try { + // TODO: inspect the CompObj record for more details, i.e. the content type + Ole10Native ole10 = Ole10Native.createFromEmbeddedOleObject(dn); + return new EmbeddedData(ole10.getFileName(), ole10.getDataBuffer(), CONTENT_TYPE_BYTES); + } catch (Ole10NativeException e) { + throw new IOException(e); + } + } + } + + static class PdfExtractor extends EmbeddedExtractor { + static ClassID PdfClassID = new ClassID("{B801CA65-A1FC-11D0-85AD-444553540000}"); + @Override + public boolean canExtract(DirectoryNode dn) { + ClassID clsId = dn.getStorageClsid(); + return (PdfClassID.equals(clsId) || dn.hasEntry("CONTENTS")); + } + + @Override + public EmbeddedData extract(DirectoryNode dn) throws IOException { + try(ByteArrayOutputStream bos = new ByteArrayOutputStream(); + InputStream is = dn.createDocumentInputStream("CONTENTS")) { + IOUtils.copy(is, bos); + return new EmbeddedData(dn.getName() + ".pdf", bos.toByteArray(), CONTENT_TYPE_PDF); + } + } + + @Override + public boolean canExtract(Picture source) { + PictureData pd = source.getPictureData(); + return (pd != null && pd.getPictureType() == Workbook.PICTURE_TYPE_EMF); + } + + /** + * Mac Office encodes embedded objects inside the picture, e.g. PDF is part of an EMF. + * If an embedded stream is inside an EMF picture, this method extracts the payload. + * + * @return the embedded data in an EMF picture or null if none is found + */ + @Override + protected EmbeddedData extract(Picture source) throws IOException { + // check for emf+ embedded pdf (poor mans style :( ) + // Mac Excel 2011 embeds pdf files with this method. + PictureData pd = source.getPictureData(); + if (pd == null || pd.getPictureType() != Workbook.PICTURE_TYPE_EMF) { + return null; + } + + // TODO: investigate if this is just an EMF-hack or if other formats are also embedded in EMF + byte pictureBytes[] = pd.getData(); + int idxStart = indexOf(pictureBytes, 0, "%PDF-".getBytes(LocaleUtil.CHARSET_1252)); + if (idxStart == -1) { + return null; + } + + int idxEnd = indexOf(pictureBytes, idxStart, "%%EOF".getBytes(LocaleUtil.CHARSET_1252)); + if (idxEnd == -1) { + return null; + } + + int pictureBytesLen = idxEnd-idxStart+6; + byte[] pdfBytes = IOUtils.safelyAllocate(pictureBytesLen, MAX_RECORD_LENGTH); + System.arraycopy(pictureBytes, idxStart, pdfBytes, 0, pictureBytesLen); + String filename = source.getShapeName().trim(); + if (!endsWithIgnoreCase(filename, ".pdf")) { + filename += ".pdf"; + } + return new EmbeddedData(filename, pdfBytes, CONTENT_TYPE_PDF); + } + + + } + + static class OOXMLExtractor extends EmbeddedExtractor { + @Override + public boolean canExtract(DirectoryNode dn) { + return dn.hasEntry("package"); + } + + @Override + public EmbeddedData extract(DirectoryNode dn) throws IOException { + + ClassIDPredefined clsId = ClassIDPredefined.lookup(dn.getStorageClsid()); + + String contentType = null; + String ext = null; + + if (clsId != null) { + contentType = clsId.getContentType(); + ext = clsId.getFileExtension(); + } + + if (contentType == null || ext == null) { + contentType = "application/zip"; + ext = ".zip"; + } + + DocumentInputStream dis = dn.createDocumentInputStream("package"); + byte data[] = IOUtils.toByteArray(dis); + dis.close(); + + return new EmbeddedData(dn.getName()+ext, data, contentType); + } + } + + static class BiffExtractor extends EmbeddedExtractor { + @Override + public boolean canExtract(DirectoryNode dn) { + return canExtractExcel(dn) || canExtractWord(dn); + } + + protected boolean canExtractExcel(DirectoryNode dn) { + ClassIDPredefined clsId = ClassIDPredefined.lookup(dn.getStorageClsid()); + return (ClassIDPredefined.EXCEL_V7 == clsId + || ClassIDPredefined.EXCEL_V8 == clsId + || dn.hasEntry("Workbook") /*...*/); + } + + protected boolean canExtractWord(DirectoryNode dn) { + ClassIDPredefined clsId = ClassIDPredefined.lookup(dn.getStorageClsid()); + return (ClassIDPredefined.WORD_V7 == clsId + || ClassIDPredefined.WORD_V8 == clsId + || dn.hasEntry("WordDocument")); + } + + @Override + public EmbeddedData extract(DirectoryNode dn) throws IOException { + EmbeddedData ed = super.extract(dn); + if (canExtractExcel(dn)) { + ed.setFilename(dn.getName() + ".xls"); + ed.setContentType(CONTENT_TYPE_XLS); + } else if (canExtractWord(dn)) { + ed.setFilename(dn.getName() + ".doc"); + ed.setContentType(CONTENT_TYPE_DOC); + } + + return ed; + } + } + + static class FsExtractor extends EmbeddedExtractor { + @Override + public boolean canExtract(DirectoryNode dn) { + return true; + } + @Override + public EmbeddedData extract(DirectoryNode dn) throws IOException { + EmbeddedData ed = super.extract(dn); + ed.setFilename(dn.getName() + ".ole"); + // TODO: read the content type from CombObj stream + return ed; + } + } + + protected static void copyNodes(DirectoryNode src, DirectoryNode dest) throws IOException { + for (Entry e : src) { + if (e instanceof DirectoryNode) { + DirectoryNode srcDir = (DirectoryNode)e; + DirectoryNode destDir = (DirectoryNode)dest.createDirectory(srcDir.getName()); + destDir.setStorageClsid(srcDir.getStorageClsid()); + copyNodes(srcDir, destDir); + } else { + try (InputStream is = src.createDocumentInputStream(e)) { + dest.createDocument(e.getName(), is); + } + } + } + } + + + + /** + * Knuth-Morris-Pratt Algorithm for Pattern Matching + * Finds the first occurrence of the pattern in the text. + */ + private static int indexOf(byte[] data, int offset, byte[] pattern) { + int[] failure = computeFailure(pattern); + + int j = 0; + if (data.length == 0) { + return -1; + } + + for (int i = offset; i < data.length; i++) { + while (j > 0 && pattern[j] != data[i]) { + j = failure[j - 1]; + } + if (pattern[j] == data[i]) { j++; } + if (j == pattern.length) { + return i - pattern.length + 1; + } + } + return -1; + } + + /** + * Computes the failure function using a boot-strapping process, + * where the pattern is matched against itself. + */ + private static int[] computeFailure(byte[] pattern) { + int[] failure = new int[pattern.length]; + + int j = 0; + for (int i = 1; i < pattern.length; i++) { + while (j > 0 && pattern[j] != pattern[i]) { + j = failure[j - 1]; + } + if (pattern[j] == pattern[i]) { + j++; + } + failure[i] = j; + } + + return failure; + } + + +} diff --git a/src/java/org/apache/poi/ss/usermodel/WorkbookFactory.java b/src/java/org/apache/poi/ss/usermodel/WorkbookFactory.java new file mode 100644 index 0000000000..d62fdfc85f --- /dev/null +++ b/src/java/org/apache/poi/ss/usermodel/WorkbookFactory.java @@ -0,0 +1,329 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.ss.usermodel; + +import java.io.BufferedInputStream; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; + +import org.apache.poi.EncryptedDocumentException; +import org.apache.poi.OldFileFormatException; +import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey; +import org.apache.poi.hssf.usermodel.HSSFWorkbook; +import org.apache.poi.poifs.crypt.Decryptor; +import org.apache.poi.poifs.filesystem.DirectoryNode; +import org.apache.poi.poifs.filesystem.DocumentFactoryHelper; +import org.apache.poi.poifs.filesystem.FileMagic; +import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; +import org.apache.poi.poifs.filesystem.OfficeXmlFileException; +import org.apache.poi.util.IOUtils; +import org.apache.poi.util.Removal; + +/** + * Factory for creating the appropriate kind of Workbook + * (be it {@link HSSFWorkbook} or XSSFWorkbook), + * by auto-detecting from the supplied input. + */ +public class WorkbookFactory { + /** + * Creates a HSSFWorkbook from the given NPOIFSFileSystem

+ * + * Note that in order to properly release resources the + * Workbook should be closed after use. + * + * @param fs The {@link NPOIFSFileSystem} to read the document from + * + * @return The created workbook + * + * @throws IOException if an error occurs while reading the data + */ + public static Workbook create(NPOIFSFileSystem fs) throws IOException { + return create(fs, null); + } + + /** + * Creates a Workbook from the given NPOIFSFileSystem, which may + * be password protected + * + * @param fs The {@link NPOIFSFileSystem} to read the document from + * @param password The password that should be used or null if no password is necessary. + * + * @return The created Workbook + * + * @throws IOException if an error occurs while reading the data + */ + private static Workbook create(final NPOIFSFileSystem fs, String password) throws IOException { + return create(fs.getRoot(), password); + } + + + /** + * Creates a Workbook from the given NPOIFSFileSystem. + * + * @param root The {@link DirectoryNode} to start reading the document from + * + * @return The created Workbook + * + * @throws IOException if an error occurs while reading the data + */ + public static Workbook create(final DirectoryNode root) throws IOException { + return create(root, null); + } + + + /** + * Creates a Workbook from the given NPOIFSFileSystem, which may + * be password protected + * + * @param root The {@link DirectoryNode} to start reading the document from + * @param password The password that should be used or null if no password is necessary. + * + * @return The created Workbook + * + * @throws IOException if an error occurs while reading the data + */ + public static Workbook create(final DirectoryNode root, String password) throws IOException { + // Encrypted OOXML files go inside OLE2 containers, is this one? + if (root.hasEntry(Decryptor.DEFAULT_POIFS_ENTRY)) { + InputStream stream = null; + try { + stream = DocumentFactoryHelper.getDecryptedStream(root, password); + + return createXSSFWorkbook(stream); + } finally { + IOUtils.closeQuietly(stream); + } + } + + // If we get here, it isn't an encrypted PPTX file + // So, treat it as a regular HSLF PPT one + boolean passwordSet = false; + if (password != null) { + Biff8EncryptionKey.setCurrentUserPassword(password); + passwordSet = true; + } + try { + return createHSSFWorkbook(root); + } finally { + if (passwordSet) { + Biff8EncryptionKey.setCurrentUserPassword(null); + } + } + } + + /** + * Creates a XSSFWorkbook from the given OOXML Package. + * As the WorkbookFactory is located in the POI module, which doesn't know about the OOXML formats, + * this can be only achieved by using an Object reference to the OPCPackage. + * + *

Note that in order to properly release resources the + * Workbook should be closed after use.

+ * + * @param pkg The {@link OPCPackage} opened for reading data. + * + * @return The created Workbook + * + * @throws IOException if an error occurs while reading the data + * + * @deprecated use XSSFWorkbookFactory.create + */ + @Deprecated + @Removal(version = "4.2.0") + public static Workbook create(Object pkg) throws IOException { + return createXSSFWorkbook(pkg); + } + + /** + * Creates the appropriate HSSFWorkbook / XSSFWorkbook from + * the given InputStream. + * + *

Your input stream MUST either support mark/reset, or + * be wrapped as a {@link BufferedInputStream}! + * Note that using an {@link InputStream} has a higher memory footprint + * than using a {@link File}.

+ * + *

Note that in order to properly release resources the + * Workbook should be closed after use. Note also that loading + * from an InputStream requires more memory than loading + * from a File, so prefer {@link #create(File)} where possible. + * + * @param inp The {@link InputStream} to read data from. + * + * @return The created Workbook + * + * @throws IOException if an error occurs while reading the data + * @throws EncryptedDocumentException If the Workbook given is password protected + */ + public static Workbook create(InputStream inp) throws IOException, EncryptedDocumentException { + return create(inp, null); + } + + /** + * Creates the appropriate HSSFWorkbook / XSSFWorkbook from + * the given InputStream, which may be password protected. + * + *

Your input stream MUST either support mark/reset, or + * be wrapped as a {@link BufferedInputStream}! + * Note that using an {@link InputStream} has a higher memory footprint + * than using a {@link File}.

+ * + *

Note that in order to properly release resources the + * Workbook should be closed after use. Note also that loading + * from an InputStream requires more memory than loading + * from a File, so prefer {@link #create(File)} where possible.

+ * + * @param inp The {@link InputStream} to read data from. + * @param password The password that should be used or null if no password is necessary. + * + * @return The created Workbook + * + * @throws IOException if an error occurs while reading the data + * @throws EncryptedDocumentException If the wrong password is given for a protected file + */ + public static Workbook create(InputStream inp, String password) throws IOException, EncryptedDocumentException { + InputStream is = FileMagic.prepareToCheckMagic(inp); + FileMagic fm = FileMagic.valueOf(is); + + switch (fm) { + case OLE2: + NPOIFSFileSystem fs = new NPOIFSFileSystem(is); + return create(fs, password); + case OOXML: + return createXSSFWorkbook(is); + default: + throw new IOException("Your InputStream was neither an OLE2 stream, nor an OOXML stream"); + } + } + + /** + * Creates the appropriate HSSFWorkbook / XSSFWorkbook from + * the given File, which must exist and be readable. + *

Note that in order to properly release resources the + * Workbook should be closed after use. + * + * @param file The file to read data from. + * + * @return The created Workbook + * + * @throws IOException if an error occurs while reading the data + * @throws EncryptedDocumentException If the Workbook given is password protected + */ + public static Workbook create(File file) throws IOException, EncryptedDocumentException { + return create(file, null); + } + + /** + * Creates the appropriate HSSFWorkbook / XSSFWorkbook from + * the given File, which must exist and be readable, and + * may be password protected + *

Note that in order to properly release resources the + * Workbook should be closed after use. + * + * @param file The file to read data from. + * @param password The password that should be used or null if no password is necessary. + * + * @return The created Workbook + * + * @throws IOException if an error occurs while reading the data + * @throws EncryptedDocumentException If the wrong password is given for a protected file + */ + public static Workbook create(File file, String password) throws IOException, EncryptedDocumentException { + return create(file, password, false); + } + + /** + * Creates the appropriate HSSFWorkbook / XSSFWorkbook from + * the given File, which must exist and be readable, and + * may be password protected + *

Note that in order to properly release resources the + * Workbook should be closed after use. + * + * @param file The file to read data from. + * @param password The password that should be used or null if no password is necessary. + * @param readOnly If the Workbook should be opened in read-only mode to avoid writing back + * changes when the document is closed. + * + * @return The created Workbook + * + * @throws IOException if an error occurs while reading the data + * @throws EncryptedDocumentException If the wrong password is given for a protected file + */ + public static Workbook create(File file, String password, boolean readOnly) throws IOException, EncryptedDocumentException { + if (!file.exists()) { + throw new FileNotFoundException(file.toString()); + } + + NPOIFSFileSystem fs = null; + try { + fs = new NPOIFSFileSystem(file, readOnly); + return create(fs, password); + } catch(OfficeXmlFileException e) { + IOUtils.closeQuietly(fs); + return createXSSFWorkbook(file, readOnly); + } catch(RuntimeException e) { + IOUtils.closeQuietly(fs); + throw e; + } + } + + private static Workbook createHSSFWorkbook(Object... args) throws IOException, EncryptedDocumentException { + return createWorkbook("org.apache.poi.hssf.usermodel.HSSFWorkbookFactory", args); + } + + private static Workbook createXSSFWorkbook(Object... args) throws IOException, EncryptedDocumentException { + return createWorkbook("org.apache.poi.xssf.usermodel.XSSFWorkbookFactory", args); + } + + private static Workbook createWorkbook(String factoryClass, Object args[]) throws IOException, EncryptedDocumentException { + try { + Class clazz = Thread.currentThread().getContextClassLoader().loadClass(factoryClass); + Class argsClz[] = new Class[args.length]; + int i=0; + for (Object o : args) { + Class c = o.getClass(); + if (Boolean.class.isAssignableFrom(c)) { + c = boolean.class; + } else if (InputStream.class.isAssignableFrom(c)) { + c = InputStream.class; + } + argsClz[i++] = c; + } + Method m = clazz.getMethod("createWorkbook", argsClz); + return (Workbook)m.invoke(null, args); + } catch (InvocationTargetException e) { + Throwable t = e.getCause(); + if (t instanceof IOException) { + throw (IOException)t; + } else if (t instanceof EncryptedDocumentException) { + throw (EncryptedDocumentException)t; + } else if (t instanceof OldFileFormatException) { + throw (OldFileFormatException)t; + } else if (t instanceof RuntimeException) { + throw (RuntimeException)t; + } else { + throw new IOException(t.getMessage(), t); + } + } catch (Exception e) { + throw new IOException(e); + } + } + +} diff --git a/src/ooxml/java/org/apache/poi/POIXMLDocument.java b/src/ooxml/java/org/apache/poi/POIXMLDocument.java deleted file mode 100644 index a7eaaf2c87..0000000000 --- a/src/ooxml/java/org/apache/poi/POIXMLDocument.java +++ /dev/null @@ -1,228 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi; - -import java.io.Closeable; -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; - -import org.apache.poi.openxml4j.exceptions.InvalidFormatException; -import org.apache.poi.openxml4j.exceptions.OpenXML4JException; -import org.apache.poi.openxml4j.opc.OPCPackage; -import org.apache.poi.openxml4j.opc.PackageAccess; -import org.apache.poi.openxml4j.opc.PackagePart; -import org.apache.poi.openxml4j.opc.PackageRelationship; -import org.apache.poi.openxml4j.opc.PackageRelationshipCollection; -import org.apache.xmlbeans.impl.common.SystemCache; - -/** - * This holds the common functionality for all POI OOXML Document classes. - */ -public abstract class POIXMLDocument extends POIXMLDocumentPart implements Closeable { - public static final String DOCUMENT_CREATOR = "Apache POI"; - - // OLE embeddings relation name - public static final String OLE_OBJECT_REL_TYPE="http://schemas.openxmlformats.org/officeDocument/2006/relationships/oleObject"; - - // Embedded OPC documents relation name - public static final String PACK_OBJECT_REL_TYPE="http://schemas.openxmlformats.org/officeDocument/2006/relationships/package"; - - /** The OPC Package */ - private OPCPackage pkg; - - /** - * The properties of the OPC package, opened as needed - */ - private POIXMLProperties properties; - - protected POIXMLDocument(OPCPackage pkg) { - super(pkg); - init(pkg); - } - - protected POIXMLDocument(OPCPackage pkg, String coreDocumentRel) { - super(pkg, coreDocumentRel); - init(pkg); - } - - private void init(OPCPackage p) { - this.pkg = p; - - // Workaround for XMLBEANS-512 - ensure that when we parse - // the file, we start with a fresh XML Parser each time, - // and avoid the risk of getting a SaxHandler that's in error - SystemCache.get().setSaxLoader(null); - } - - /** - * Wrapper to open a package, which works around shortcomings in java's this() constructor calls - * - * @param path the path to the document - * @return the new OPCPackage - * - * @exception IOException if there was a problem opening the document - */ - public static OPCPackage openPackage(String path) throws IOException { - try { - return OPCPackage.open(path); - } catch (InvalidFormatException e) { - throw new IOException(e.toString(), e); - } - } - - /** - * Get the assigned OPCPackage - * - * @return the assigned OPCPackage - */ - public OPCPackage getPackage() { - return this.pkg; - } - - protected PackagePart getCorePart() { - return getPackagePart(); - } - - /** - * Retrieves all the PackageParts which are defined as relationships of the base document with the - * specified content type. - * - * @param contentType the content type - * - * @return all the base document PackageParts which match the content type - * - * @throws InvalidFormatException when the relationships or the parts contain errors - * - * @see org.apache.poi.xssf.usermodel.XSSFRelation - * @see org.apache.poi.xslf.usermodel.XSLFRelation - * @see org.apache.poi.xwpf.usermodel.XWPFRelation - * @see org.apache.poi.xdgf.usermodel.XDGFRelation - */ - protected PackagePart[] getRelatedByType(String contentType) throws InvalidFormatException { - PackageRelationshipCollection partsC = - getPackagePart().getRelationshipsByType(contentType); - - PackagePart[] parts = new PackagePart[partsC.size()]; - int count = 0; - for (PackageRelationship rel : partsC) { - parts[count] = getPackagePart().getRelatedPart(rel); - count++; - } - return parts; - } - - /** - * Get the document properties. This gives you access to the - * core ooxml properties, and the extended ooxml properties. - * - * @return the document properties - */ - public POIXMLProperties getProperties() { - if(properties == null) { - try { - properties = new POIXMLProperties(pkg); - } catch (Exception e){ - throw new POIXMLException(e); - } - } - return properties; - } - - /** - * Get the document's embedded files. - * - * @return the document's embedded files - * - * @throws OpenXML4JException if the embedded parts can't be determined - */ - public abstract List getAllEmbedds() throws OpenXML4JException; - - protected final void load(POIXMLFactory factory) throws IOException { - Map context = new HashMap<>(); - try { - read(factory, context); - } catch (OpenXML4JException e){ - throw new POIXMLException(e); - } - onDocumentRead(); - context.clear(); - } - - /** - * Closes the underlying {@link OPCPackage} from which this - * document was read, if there is one - * - *

Once this has been called, no further - * operations, updates or reads should be performed on the - * document. - * - * @throws IOException for writable packages, if an IO exception occur during the saving process. - */ - @Override - public void close() throws IOException { - if (pkg != null) { - if (pkg.getPackageAccess() == PackageAccess.READ) { - pkg.revert(); - } else { - pkg.close(); - } - pkg = null; - } - } - - /** - * Write out this document to an Outputstream. - * - * Note - if the Document was opened from a {@link File} rather - * than an {@link InputStream}, you must write out to - * a different file, overwriting via an OutputStream isn't possible. - * - * If {@code stream} is a {@link java.io.FileOutputStream} on a networked drive - * or has a high cost/latency associated with each written byte, - * consider wrapping the OutputStream in a {@link java.io.BufferedOutputStream} - * to improve write performance. - * - * @param stream - the java OutputStream you wish to write the file to - * - * @exception IOException if anything can't be written. - */ - @SuppressWarnings("resource") - public final void write(OutputStream stream) throws IOException { - OPCPackage p = getPackage(); - if(p == null) { - throw new IOException("Cannot write data, document seems to have been closed already"); - } - - //force all children to commit their changes into the underlying OOXML Package - // TODO Shouldn't they be committing to the new one instead? - Set context = new HashSet<>(); - onSave(context); - context.clear(); - - //save extended and custom properties - getProperties().commit(); - - p.save(stream); - } -} diff --git a/src/ooxml/java/org/apache/poi/POIXMLDocumentPart.java b/src/ooxml/java/org/apache/poi/POIXMLDocumentPart.java deleted file mode 100644 index e977e6ea39..0000000000 --- a/src/ooxml/java/org/apache/poi/POIXMLDocumentPart.java +++ /dev/null @@ -1,746 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi; - -import java.io.IOException; -import java.net.URI; -import java.util.ArrayList; -import java.util.Collections; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; - -import org.apache.poi.openxml4j.exceptions.InvalidFormatException; -import org.apache.poi.openxml4j.exceptions.OpenXML4JException; -import org.apache.poi.openxml4j.exceptions.PartAlreadyExistsException; -import org.apache.poi.openxml4j.opc.OPCPackage; -import org.apache.poi.openxml4j.opc.PackagePart; -import org.apache.poi.openxml4j.opc.PackagePartName; -import org.apache.poi.openxml4j.opc.PackageRelationship; -import org.apache.poi.openxml4j.opc.PackageRelationshipCollection; -import org.apache.poi.openxml4j.opc.PackageRelationshipTypes; -import org.apache.poi.openxml4j.opc.PackagingURIHelper; -import org.apache.poi.openxml4j.opc.TargetMode; -import org.apache.poi.util.Internal; -import org.apache.poi.util.POILogFactory; -import org.apache.poi.util.POILogger; -import org.apache.poi.xddf.usermodel.chart.XDDFChart; -import org.apache.poi.xssf.usermodel.XSSFRelation; -import org.apache.poi.xssf.usermodel.XSSFWorkbook; - -/** - * Represents an entry of a OOXML package. - *

- * Each POIXMLDocumentPart keeps a reference to the underlying a {@link org.apache.poi.openxml4j.opc.PackagePart}. - *

- */ -public class POIXMLDocumentPart { - private static final POILogger logger = POILogFactory.getLogger(POIXMLDocumentPart.class); - - private String coreDocumentRel = PackageRelationshipTypes.CORE_DOCUMENT; - private PackagePart packagePart; - private POIXMLDocumentPart parent; - private Map relations = new LinkedHashMap<>(); - private boolean isCommited = false; - - /** - * to check whether embedded part is already committed - * - * @return return true if embedded part is committed - */ - public boolean isCommited() { - return isCommited; - } - - /** - * setter method to set embedded part is committed - * - * @param isCommited boolean value - */ - public void setCommited(boolean isCommited) { - this.isCommited = isCommited; - } - - /** - * The RelationPart is a cached relationship between the document, which contains the RelationPart, - * and one of its referenced child document parts. - * The child document parts may only belong to one parent, but it's often referenced by other - * parents too, having varying {@link PackageRelationship#getId() relationship ids} pointing to it. - */ - public static class RelationPart { - private final PackageRelationship relationship; - private final POIXMLDocumentPart documentPart; - - RelationPart(PackageRelationship relationship, POIXMLDocumentPart documentPart) { - this.relationship = relationship; - this.documentPart = documentPart; - } - - /** - * @return the cached relationship, which uniquely identifies this child document part within the parent - */ - public PackageRelationship getRelationship() { - return relationship; - } - - /** - * @param the cast of the caller to a document sub class - * @return the child document part - */ - @SuppressWarnings("unchecked") - public T getDocumentPart() { - return (T) documentPart; - } - } - - /** - * Counter that provides the amount of incoming relations from other parts - * to this part. - */ - private int relationCounter; - - int incrementRelationCounter() { - relationCounter++; - return relationCounter; - } - - int decrementRelationCounter() { - relationCounter--; - return relationCounter; - } - - int getRelationCounter() { - return relationCounter; - } - - /** - * Construct POIXMLDocumentPart representing a "core document" package part. - * - * @param pkg the OPCPackage containing this document - */ - public POIXMLDocumentPart(OPCPackage pkg) { - this(pkg, PackageRelationshipTypes.CORE_DOCUMENT); - } - - /** - * Construct POIXMLDocumentPart representing a custom "core document" package part. - * - * @param pkg the OPCPackage containing this document - * @param coreDocumentRel the relation type of this document - */ - public POIXMLDocumentPart(OPCPackage pkg, String coreDocumentRel) { - this(getPartFromOPCPackage(pkg, coreDocumentRel)); - this.coreDocumentRel = coreDocumentRel; - } - - /** - * Creates new POIXMLDocumentPart - called by client code to create new parts from scratch. - * - * @see #createRelationship(POIXMLRelation, POIXMLFactory, int, boolean) - */ - public POIXMLDocumentPart() { - } - - /** - * Creates an POIXMLDocumentPart representing the given package part and relationship. - * Called by {@link #read(POIXMLFactory, java.util.Map)} when reading in an existing file. - * - * @param part - The package part that holds xml data representing this sheet. - * @see #read(POIXMLFactory, java.util.Map) - * @since POI 3.14-Beta1 - */ - public POIXMLDocumentPart(PackagePart part) { - this(null, part); - } - - /** - * Creates an POIXMLDocumentPart representing the given package part, relationship and parent - * Called by {@link #read(POIXMLFactory, java.util.Map)} when reading in an existing file. - * - * @param parent - Parent part - * @param part - The package part that holds xml data representing this sheet. - * @see #read(POIXMLFactory, java.util.Map) - * @since POI 3.14-Beta1 - */ - public POIXMLDocumentPart(POIXMLDocumentPart parent, PackagePart part) { - this.packagePart = part; - this.parent = parent; - } - - /** - * When you open something like a theme, call this to - * re-base the XML Document onto the core child of the - * current core document - * - * @param pkg the package to be rebased - * @throws InvalidFormatException if there was an error in the core document relation - * @throws IllegalStateException if there are more than one core document relations - */ - protected final void rebase(OPCPackage pkg) throws InvalidFormatException { - PackageRelationshipCollection cores = - packagePart.getRelationshipsByType(coreDocumentRel); - if (cores.size() != 1) { - throw new IllegalStateException( - "Tried to rebase using " + coreDocumentRel + - " but found " + cores.size() + " parts of the right type" - ); - } - packagePart = packagePart.getRelatedPart(cores.getRelationship(0)); - } - - /** - * Provides access to the underlying PackagePart - * - * @return the underlying PackagePart - */ - public final PackagePart getPackagePart() { - return packagePart; - } - - /** - * Returns the list of child relations for this POIXMLDocumentPart - * - * @return child relations - */ - public final List getRelations() { - List l = new ArrayList<>(); - for (RelationPart rp : relations.values()) { - l.add(rp.getDocumentPart()); - } - return Collections.unmodifiableList(l); - } - - /** - * Returns the list of child relations for this POIXMLDocumentPart - * - * @return child relations - */ - public final List getRelationParts() { - List l = new ArrayList<>(relations.values()); - return Collections.unmodifiableList(l); - } - - /** - * Returns the target {@link POIXMLDocumentPart}, where a - * {@link PackageRelationship} is set from the {@link PackagePart} of this - * {@link POIXMLDocumentPart} to the {@link PackagePart} of the target - * {@link POIXMLDocumentPart} with a {@link PackageRelationship#getId()} - * matching the given parameter value. - * - * @param id The relation id to look for - * @return the target part of the relation, or null, if none exists - */ - public final POIXMLDocumentPart getRelationById(String id) { - RelationPart rp = getRelationPartById(id); - return (rp == null) ? null : rp.getDocumentPart(); - } - - /** - * Returns the target {@link RelationPart}, where a - * {@link PackageRelationship} is set from the {@link PackagePart} of this - * {@link POIXMLDocumentPart} to the {@link PackagePart} of the target - * {@link POIXMLDocumentPart} with a {@link PackageRelationship#getId()} - * matching the given parameter value. - * - * @param id The relation id to look for - * @return the target relation part, or null, if none exists - * @since 4.0.0 - */ - public final RelationPart getRelationPartById(String id) { - return relations.get(id); - } - - /** - * Returns the first {@link PackageRelationship#getId()} of the - * {@link PackageRelationship}, that sources from the {@link PackagePart} of - * this {@link POIXMLDocumentPart} to the {@link PackagePart} of the given - * parameter value.

- *

- * There can be multiple references to the given {@link POIXMLDocumentPart} - * and only the first in the order of creation is returned. - * - * @param part The {@link POIXMLDocumentPart} for which the according - * relation-id shall be found. - * @return The value of the {@link PackageRelationship#getId()} or null, if - * parts are not related. - */ - public final String getRelationId(POIXMLDocumentPart part) { - for (RelationPart rp : relations.values()) { - if (rp.getDocumentPart() == part) { - return rp.getRelationship().getId(); - } - } - return null; - } - - /** - * Add a new child POIXMLDocumentPart - * - * @param relId the preferred relation id, when null the next free relation id will be used - * @param relationshipType the package relationship type - * @param part the child to add - * @return the new RelationPart - * @since 3.14-Beta1 - */ - public final RelationPart addRelation(String relId, POIXMLRelation relationshipType, POIXMLDocumentPart part) { - PackageRelationship pr = this.packagePart.findExistingRelation(part.getPackagePart()); - if (pr == null) { - PackagePartName ppn = part.getPackagePart().getPartName(); - String relType = relationshipType.getRelation(); - pr = packagePart.addRelationship(ppn, TargetMode.INTERNAL, relType, relId); - } - addRelation(pr, part); - return new RelationPart(pr, part); - } - - /** - * Add a new child POIXMLDocumentPart - * - * @param pr the relationship of the child - * @param part the child to add - */ - private void addRelation(PackageRelationship pr, POIXMLDocumentPart part) { - relations.put(pr.getId(), new RelationPart(pr, part)); - part.incrementRelationCounter(); - - } - - /** - * Remove the relation to the specified part in this package and remove the - * part, if it is no longer needed.

- *

- * If there are multiple relationships to the same part, this will only - * remove the first relationship in the order of creation. The removal - * via the part id ({@link #removeRelation(String)} is preferred. - * - * @param part the part which relation is to be removed from this document - */ - protected final void removeRelation(POIXMLDocumentPart part) { - removeRelation(part, true); - } - - /** - * Remove the relation to the specified part in this package and remove the - * part, if it is no longer needed and flag is set to true.

- *

- * If there are multiple relationships to the same part, this will only - * remove the first relationship in the order of creation. The removal - * via the part id ({@link #removeRelation(String, boolean)} is preferred. - * - * @param part The related part, to which the relation shall be removed. - * @param removeUnusedParts true, if the part shall be removed from the package if not - * needed any longer. - * @return true, if the relation was removed - */ - protected final boolean removeRelation(POIXMLDocumentPart part, boolean removeUnusedParts) { - String id = getRelationId(part); - return removeRelation(id, removeUnusedParts); - } - - /** - * Remove the relation to the specified part in this package and remove the - * part, if it is no longer needed.

- *

- * If there are multiple relationships to the same part, this will only - * remove the first relationship in the order of creation. The removal - * via the part id ({@link #removeRelation(String)} is preferred. - * - * @param partId the part id which relation is to be removed from this document - * @since 4.0.0 - */ - protected final void removeRelation(String partId) { - removeRelation(partId, true); - } - - /** - * Remove the relation to the specified part in this package and remove the - * part, if it is no longer needed and flag is set to true.

- * - * @param partId The related part id, to which the relation shall be removed. - * @param removeUnusedParts true, if the part shall be removed from the package if not - * needed any longer. - * @return true, if the relation was removed - * @since 4.0.0 - */ - private final boolean removeRelation(String partId, boolean removeUnusedParts) { - RelationPart rp = relations.get(partId); - if (rp == null) { - // part is not related with this POIXMLDocumentPart - return false; - } - POIXMLDocumentPart part = rp.getDocumentPart(); - /* decrement usage counter */ - part.decrementRelationCounter(); - /* remove packagepart relationship */ - getPackagePart().removeRelationship(partId); - /* remove POIXMLDocument from relations */ - relations.remove(partId); - - if (removeUnusedParts) { - /* if last relation to target part was removed, delete according target part */ - if (part.getRelationCounter() == 0) { - try { - part.onDocumentRemove(); - } catch (IOException e) { - throw new POIXMLException(e); - } - getPackagePart().getPackage().removePart(part.getPackagePart()); - } - } - return true; - } - - - /** - * Returns the parent POIXMLDocumentPart. All parts except root have not-null parent. - * - * @return the parent POIXMLDocumentPart or null for the root element. - */ - public final POIXMLDocumentPart getParent() { - return parent; - } - - @Override - public String toString() { - return packagePart == null ? "" : packagePart.toString(); - } - - /** - * Save the content in the underlying package part. - * Default implementation is empty meaning that the package part is left unmodified. - *

- * Sub-classes should override and add logic to marshal the "model" into Ooxml4J. - *

- * For example, the code saving a generic XML entry may look as follows: - *

-     * protected void commit() throws IOException {
-     *   PackagePart part = getPackagePart();
-     *   OutputStream out = part.getOutputStream();
-     *   XmlObject bean = getXmlBean(); //the "model" which holds changes in memory
-     *   bean.save(out, DEFAULT_XML_OPTIONS);
-     *   out.close();
-     * }
-     * 
- * - * @throws IOException a subclass may throw an IOException if the changes can't be committed - */ - protected void commit() throws IOException { - - } - - /** - * Save changes in the underlying OOXML package. - * Recursively fires {@link #commit()} for each package part - * - * @param alreadySaved context set containing already visited nodes - * @throws IOException a related part may throw an IOException if the changes can't be saved - */ - protected final void onSave(Set alreadySaved) throws IOException { - //if part is already committed then return - if (this.isCommited) { - return; - } - - // this usually clears out previous content in the part... - prepareForCommit(); - - commit(); - alreadySaved.add(this.getPackagePart()); - for (RelationPart rp : relations.values()) { - POIXMLDocumentPart p = rp.getDocumentPart(); - if (!alreadySaved.contains(p.getPackagePart())) { - p.onSave(alreadySaved); - } - } - } - - /** - * Ensure that a memory based package part does not have lingering data from previous - * commit() calls. - *

- * Note: This is overwritten for some objects, as *PictureData seem to store the actual content - * in the part directly without keeping a copy like all others therefore we need to handle them differently. - */ - protected void prepareForCommit() { - PackagePart part = this.getPackagePart(); - if (part != null) { - part.clear(); - } - } - - /** - * Create a new child POIXMLDocumentPart - * - * @param descriptor the part descriptor - * @param factory the factory that will create an instance of the requested relation - * @return the created child POIXMLDocumentPart - * @throws PartAlreadyExistsException If rule M1.12 is not verified : Packages shall not contain - * equivalent part names and package implementers shall neither - * create nor recognize packages with equivalent part names. - */ - public final POIXMLDocumentPart createRelationship(POIXMLRelation descriptor, POIXMLFactory factory) { - return createRelationship(descriptor, factory, -1, false).getDocumentPart(); - } - - /** - * Create a new child POIXMLDocumentPart - * - * @param descriptor the part descriptor - * @param factory the factory that will create an instance of the requested relation - * @param idx part number - * @return the created child POIXMLDocumentPart - * @throws PartAlreadyExistsException If rule M1.12 is not verified : Packages shall not contain - * equivalent part names and package implementers shall neither - * create nor recognize packages with equivalent part names. - */ - public final POIXMLDocumentPart createRelationship(POIXMLRelation descriptor, POIXMLFactory factory, int idx) { - return createRelationship(descriptor, factory, idx, false).getDocumentPart(); - } - - /** - * Identifies the next available part number for a part of the given type, - * if possible, otherwise -1 if none are available. - * The found (valid) index can then be safely given to - * {@link #createRelationship(POIXMLRelation, POIXMLFactory, int)} or - * {@link #createRelationship(POIXMLRelation, POIXMLFactory, int, boolean)} - * without naming clashes. - * If parts with other types are already claiming a name for this relationship - * type (eg a {@link XSSFRelation#CHART} using the drawing part namespace - * normally used by {@link XSSFRelation#DRAWINGS}), those will be considered - * when finding the next spare number. - * - * @param descriptor The relationship type to find the part number for - * @param minIdx The minimum free index to assign, use -1 for any - * @return The next free part number, or -1 if none available - */ - protected final int getNextPartNumber(POIXMLRelation descriptor, int minIdx) { - OPCPackage pkg = packagePart.getPackage(); - - try { - String name = descriptor.getDefaultFileName(); - if (name.equals(descriptor.getFileName(9999))) { - // Non-index based, check if default is free - PackagePartName ppName = PackagingURIHelper.createPartName(name); - if (pkg.containPart(ppName)) { - // Default name already taken, not index based, nothing free - return -1; - } else { - // Default name free - return 0; - } - } - - // Default to searching from 1, unless they asked for 0+ - int idx = (minIdx < 0) ? 1 : minIdx; - int maxIdx = minIdx + pkg.getParts().size(); - while (idx <= maxIdx) { - name = descriptor.getFileName(idx); - PackagePartName ppName = PackagingURIHelper.createPartName(name); - if (!pkg.containPart(ppName)) { - return idx; - } - idx++; - } - } catch (InvalidFormatException e) { - // Give a general wrapped exception for the problem - throw new POIXMLException(e); - } - return -1; - } - - /** - * Create a new child POIXMLDocumentPart - * - * @param descriptor the part descriptor - * @param factory the factory that will create an instance of the requested relation - * @param idx part number - * @param noRelation if true, then no relationship is added. - * @return the created child POIXMLDocumentPart - * @throws PartAlreadyExistsException If rule M1.12 is not verified : Packages shall not contain - * equivalent part names and package implementers shall neither - * create nor recognize packages with equivalent part names. - */ - public final RelationPart createRelationship(POIXMLRelation descriptor, POIXMLFactory factory, int idx, boolean noRelation) { - try { - PackagePartName ppName = PackagingURIHelper.createPartName(descriptor.getFileName(idx)); - PackageRelationship rel = null; - PackagePart part = packagePart.getPackage().createPart(ppName, descriptor.getContentType()); - if (!noRelation) { - /* only add to relations, if according relationship is being created. */ - rel = packagePart.addRelationship(ppName, TargetMode.INTERNAL, descriptor.getRelation()); - } - POIXMLDocumentPart doc = factory.newDocumentPart(descriptor); - doc.packagePart = part; - doc.parent = this; - if (!noRelation) { - /* only add to relations, if according relationship is being created. */ - addRelation(rel, doc); - } - - return new RelationPart(rel, doc); - } catch (PartAlreadyExistsException pae) { - // Return the specific exception so the user knows - // that the name is already taken - throw pae; - } catch (Exception e) { - // Give a general wrapped exception for the problem - throw new POIXMLException(e); - } - } - - /** - * Iterate through the underlying PackagePart and create child POIXMLFactory instances - * using the specified factory - * - * @param factory the factory object that creates POIXMLFactory instances - * @param context context map containing already visited noted keyed by targetURI - * @throws OpenXML4JException thrown when a related part can't be read - */ - protected void read(POIXMLFactory factory, Map context) throws OpenXML4JException { - PackagePart pp = getPackagePart(); - // add mapping a second time, in case of initial caller hasn't done so - POIXMLDocumentPart otherChild = context.put(pp, this); - if (otherChild != null && otherChild != this) { - throw new POIXMLException("Unique PackagePart-POIXMLDocumentPart relation broken!"); - } - - if (!pp.hasRelationships()) return; - - PackageRelationshipCollection rels = packagePart.getRelationships(); - List readLater = new ArrayList<>(); - - // scan breadth-first, so parent-relations are hopefully the shallowest element - for (PackageRelationship rel : rels) { - if (rel.getTargetMode() == TargetMode.INTERNAL) { - URI uri = rel.getTargetURI(); - - // check for internal references (e.g. '#Sheet1!A1') - PackagePartName relName; - if (uri.getRawFragment() != null) { - relName = PackagingURIHelper.createPartName(uri.getPath()); - } else { - relName = PackagingURIHelper.createPartName(uri); - } - - final PackagePart p = packagePart.getPackage().getPart(relName); - if (p == null) { - logger.log(POILogger.ERROR, "Skipped invalid entry " + rel.getTargetURI()); - continue; - } - - POIXMLDocumentPart childPart = context.get(p); - if (childPart == null) { - childPart = factory.createDocumentPart(this, p); - //here we are checking if part if embedded and excel then set it to chart class - //so that at the time to writing we can also write updated embedded part - if (this instanceof XDDFChart && childPart instanceof XSSFWorkbook) { - ((XDDFChart) this).setWorkbook((XSSFWorkbook) childPart); - } - childPart.parent = this; - // already add child to context, so other children can reference it - context.put(p, childPart); - readLater.add(childPart); - } - - addRelation(rel, childPart); - } - } - - for (POIXMLDocumentPart childPart : readLater) { - childPart.read(factory, context); - } - } - - /** - * Get the PackagePart that is the target of a relationship from this Part. - * - * @param rel The relationship - * @return The target part - * @throws InvalidFormatException thrown if the related part has is erroneous - */ - protected PackagePart getTargetPart(PackageRelationship rel) throws InvalidFormatException { - return getPackagePart().getRelatedPart(rel); - } - - - /** - * Fired when a new package part is created - * - * @throws IOException a subclass may throw an IOException on document creation - */ - protected void onDocumentCreate() throws IOException { - - } - - /** - * Fired when a package part is read - * - * @throws IOException a subclass may throw an IOException when a document is read - */ - protected void onDocumentRead() throws IOException { - - } - - /** - * Fired when a package part is about to be removed from the package - * - * @throws IOException a subclass may throw an IOException when a document is removed - */ - protected void onDocumentRemove() throws IOException { - - } - - /** - * Internal method, do not use! - *

- * This method only exists to allow access to protected {@link POIXMLDocumentPart#onDocumentRead()} - * from {@link org.apache.poi.xwpf.usermodel.XWPFDocument} without reflection. It should be removed. - * - * @param part the part which is to be read - * @throws IOException if the part can't be read - */ - @Internal - @Deprecated - public static void _invokeOnDocumentRead(POIXMLDocumentPart part) throws IOException { - part.onDocumentRead(); - } - - /** - * Retrieves the core document part - * - * @since POI 3.14-Beta1 - */ - private static PackagePart getPartFromOPCPackage(OPCPackage pkg, String coreDocumentRel) { - PackageRelationship coreRel = pkg.getRelationshipsByType(coreDocumentRel).getRelationship(0); - - if (coreRel != null) { - PackagePart pp = pkg.getPart(coreRel); - if (pp == null) { - throw new POIXMLException("OOXML file structure broken/invalid - core document '" + coreRel.getTargetURI() + "' not found."); - } - return pp; - } - - coreRel = pkg.getRelationshipsByType(PackageRelationshipTypes.STRICT_CORE_DOCUMENT).getRelationship(0); - if (coreRel != null) { - throw new POIXMLException("Strict OOXML isn't currently supported, please see bug #57699"); - } - - throw new POIXMLException("OOXML file structure broken/invalid - no core document found!"); - } -} diff --git a/src/ooxml/java/org/apache/poi/POIXMLException.java b/src/ooxml/java/org/apache/poi/POIXMLException.java deleted file mode 100644 index 82832ecff8..0000000000 --- a/src/ooxml/java/org/apache/poi/POIXMLException.java +++ /dev/null @@ -1,70 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi; - -/** - * Indicates a generic OOXML error. - * - * @author Yegor Kozlov - */ -@SuppressWarnings("serial") -public final class POIXMLException extends RuntimeException{ - /** - * Create a new POIXMLException with no - * detail mesage. - */ - public POIXMLException() { - super(); - } - - /** - * Create a new POIXMLException with - * the String specified as an error message. - * - * @param msg The error message for the exception. - */ - public POIXMLException(String msg) { - super(msg); - } - - /** - * Create a new POIXMLException with - * the String specified as an error message and the cause. - * - * @param msg The error message for the exception. - * @param cause the cause (which is saved for later retrieval by the - * {@link #getCause()} method). (A null value is - * permitted, and indicates that the cause is nonexistent or - * unknown.) - */ - public POIXMLException(String msg, Throwable cause) { - super(msg, cause); - } - - /** - * Create a new POIXMLException with - * the specified cause. - * - * @param cause the cause (which is saved for later retrieval by the - * {@link #getCause()} method). (A null value is - * permitted, and indicates that the cause is nonexistent or - * unknown.) - */ - public POIXMLException(Throwable cause) { - super(cause); - } -} diff --git a/src/ooxml/java/org/apache/poi/POIXMLFactory.java b/src/ooxml/java/org/apache/poi/POIXMLFactory.java deleted file mode 100644 index 651f40cbc1..0000000000 --- a/src/ooxml/java/org/apache/poi/POIXMLFactory.java +++ /dev/null @@ -1,139 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi; - -import java.lang.reflect.InvocationTargetException; - -import org.apache.poi.openxml4j.exceptions.InvalidFormatException; -import org.apache.poi.openxml4j.opc.PackagePart; -import org.apache.poi.openxml4j.opc.PackageRelationship; -import org.apache.poi.util.POILogFactory; -import org.apache.poi.util.POILogger; - -/** - * Defines a factory API that enables sub-classes to create instances of POIXMLDocumentPart - */ -public abstract class POIXMLFactory { - private static final POILogger LOGGER = POILogFactory.getLogger(POIXMLFactory.class); - - private static final Class[] PARENT_PART = {POIXMLDocumentPart.class, PackagePart.class}; - private static final Class[] ORPHAN_PART = {PackagePart.class}; - - /** - * Create a POIXMLDocumentPart from existing package part and relation. This method is called - * from {@link POIXMLDocument#load(POIXMLFactory)} when parsing a document - * - * @param parent parent part - * @param part the PackagePart representing the created instance - * @return A new instance of a POIXMLDocumentPart. - * - * @since by POI 3.14-Beta1 - */ - public POIXMLDocumentPart createDocumentPart(POIXMLDocumentPart parent, PackagePart part) { - PackageRelationship rel = getPackageRelationship(parent, part); - POIXMLRelation descriptor = getDescriptor(rel.getRelationshipType()); - - if (descriptor == null || descriptor.getRelationClass() == null) { - LOGGER.log(POILogger.DEBUG, "using default POIXMLDocumentPart for " + rel.getRelationshipType()); - return new POIXMLDocumentPart(parent, part); - } - - Class cls = descriptor.getRelationClass(); - try { - try { - return createDocumentPart(cls, PARENT_PART, new Object[]{parent, part}); - } catch (NoSuchMethodException e) { - return createDocumentPart(cls, ORPHAN_PART, new Object[]{part}); - } - } catch (Exception e) { - throw new POIXMLException((e.getCause() != null ? e.getCause() : e).getMessage(), e); - } - } - - /** - * Need to delegate instantiation to sub class because of constructor visibility - * - * @param cls the document class to be instantiated - * @param classes the classes of the constructor arguments - * @param values the values of the constructor arguments - * @return the new document / part - * @throws SecurityException thrown if the object can't be instantiated - * @throws NoSuchMethodException thrown if there is no constructor found for the given arguments - * @throws InstantiationException thrown if the object can't be instantiated - * @throws IllegalAccessException thrown if the object can't be instantiated - * @throws InvocationTargetException thrown if the object can't be instantiated - * - * @since POI 3.14-Beta1 - */ - protected abstract POIXMLDocumentPart createDocumentPart - (Class cls, Class[] classes, Object[] values) - throws SecurityException, NoSuchMethodException, InstantiationException, IllegalAccessException, InvocationTargetException; - - /** - * returns the descriptor for the given relationship type - * - * @param relationshipType the relationship type of the descriptor - * @return the descriptor or null if type is unknown - * - * @since POI 3.14-Beta1 - */ - protected abstract POIXMLRelation getDescriptor(String relationshipType); - - /** - * Create a new POIXMLDocumentPart using the supplied descriptor. This method is used when adding new parts - * to a document, for example, when adding a sheet to a workbook, slide to a presentation, etc. - * - * @param descriptor describes the object to create - * @return A new instance of a POIXMLDocumentPart. - */ - public POIXMLDocumentPart newDocumentPart(POIXMLRelation descriptor) { - Class cls = descriptor.getRelationClass(); - try { - return createDocumentPart(cls, null, null); - } catch (Exception e) { - throw new POIXMLException(e); - } - } - - /** - * Retrieves the package relationship of the child part within the parent - * - * @param parent the parent to search for the part - * @param part the part to look for - * - * @return the relationship - * - * @throws POIXMLException if the relations are erroneous or the part is not related - * - * @since POI 3.14-Beta1 - */ - protected PackageRelationship getPackageRelationship(POIXMLDocumentPart parent, PackagePart part) { - try { - String partName = part.getPartName().getName(); - for (PackageRelationship pr : parent.getPackagePart().getRelationships()) { - String packName = pr.getTargetURI().toASCIIString(); - if (packName.equalsIgnoreCase(partName)) { - return pr; - } - } - } catch (InvalidFormatException e) { - throw new POIXMLException("error while determining package relations", e); - } - - throw new POIXMLException("package part isn't a child of the parent document."); - } -} diff --git a/src/ooxml/java/org/apache/poi/POIXMLProperties.java b/src/ooxml/java/org/apache/poi/POIXMLProperties.java deleted file mode 100644 index b956b7ee55..0000000000 --- a/src/ooxml/java/org/apache/poi/POIXMLProperties.java +++ /dev/null @@ -1,611 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi; - -import static org.apache.poi.POIXMLTypeLoader.DEFAULT_XML_OPTIONS; - -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.util.Date; - -import org.apache.poi.openxml4j.exceptions.InvalidFormatException; -import org.apache.poi.openxml4j.exceptions.OpenXML4JException; -import org.apache.poi.openxml4j.opc.ContentTypes; -import org.apache.poi.openxml4j.opc.OPCPackage; -import org.apache.poi.openxml4j.opc.PackagePart; -import org.apache.poi.openxml4j.opc.PackagePartName; -import org.apache.poi.openxml4j.opc.PackageRelationshipCollection; -import org.apache.poi.openxml4j.opc.PackageRelationshipTypes; -import org.apache.poi.openxml4j.opc.PackagingURIHelper; -import org.apache.poi.openxml4j.opc.StreamHelper; -import org.apache.poi.openxml4j.opc.TargetMode; -import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart; -import org.apache.poi.openxml4j.util.Nullable; -import org.apache.xmlbeans.XmlException; -import org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperty; - -/** - * Wrapper around the three different kinds of OOXML properties - * and metadata a document can have (Core, Extended and Custom), - * as well Thumbnails. - */ -public class POIXMLProperties { - private OPCPackage pkg; - private CoreProperties core; - private ExtendedProperties ext; - private CustomProperties cust; - - private PackagePart extPart; - private PackagePart custPart; - - - private static final org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument NEW_EXT_INSTANCE; - private static final org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument NEW_CUST_INSTANCE; - static { - NEW_EXT_INSTANCE = org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument.Factory.newInstance(); - NEW_EXT_INSTANCE.addNewProperties(); - - NEW_CUST_INSTANCE = org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument.Factory.newInstance(); - NEW_CUST_INSTANCE.addNewProperties(); - } - - public POIXMLProperties(OPCPackage docPackage) throws IOException, OpenXML4JException, XmlException { - this.pkg = docPackage; - - // Core properties - core = new CoreProperties((PackagePropertiesPart)pkg.getPackageProperties() ); - - // Extended properties - PackageRelationshipCollection extRel = - pkg.getRelationshipsByType(PackageRelationshipTypes.EXTENDED_PROPERTIES); - if(extRel.size() == 1) { - extPart = pkg.getPart( extRel.getRelationship(0)); - org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument props = org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument.Factory.parse( - extPart.getInputStream(), DEFAULT_XML_OPTIONS - ); - ext = new ExtendedProperties(props); - } else { - extPart = null; - ext = new ExtendedProperties((org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument)NEW_EXT_INSTANCE.copy()); - } - - // Custom properties - PackageRelationshipCollection custRel = - pkg.getRelationshipsByType(PackageRelationshipTypes.CUSTOM_PROPERTIES); - if(custRel.size() == 1) { - custPart = pkg.getPart( custRel.getRelationship(0)); - org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument props = org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument.Factory.parse( - custPart.getInputStream(), DEFAULT_XML_OPTIONS - ); - cust = new CustomProperties(props); - } else { - custPart = null; - cust = new CustomProperties((org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument)NEW_CUST_INSTANCE.copy()); - } - } - - /** - * Returns the core document properties - * - * @return the core document properties - */ - public CoreProperties getCoreProperties() { - return core; - } - - /** - * Returns the extended document properties - * - * @return the extended document properties - */ - public ExtendedProperties getExtendedProperties() { - return ext; - } - - /** - * Returns the custom document properties - * - * @return the custom document properties - */ - public CustomProperties getCustomProperties() { - return cust; - } - - /** - * Returns the {@link PackagePart} for the Document - * Thumbnail, or null if there isn't one - * - * @return The Document Thumbnail part or null - */ - protected PackagePart getThumbnailPart() { - PackageRelationshipCollection rels = - pkg.getRelationshipsByType(PackageRelationshipTypes.THUMBNAIL); - if(rels.size() == 1) { - return pkg.getPart(rels.getRelationship(0)); - } - return null; - } - /** - * Returns the name of the Document thumbnail, eg - * thumbnail.jpeg, or null if there - * isn't one. - * - * @return The thumbnail filename, or null - */ - public String getThumbnailFilename() { - PackagePart tPart = getThumbnailPart(); - if (tPart == null) return null; - String name = tPart.getPartName().getName(); - return name.substring(name.lastIndexOf('/')); - } - /** - * Returns the Document thumbnail image data, or {@code null} if there isn't one. - * - * @return The thumbnail data, or null - * - * @throws IOException if the thumbnail can't be read - */ - public InputStream getThumbnailImage() throws IOException { - PackagePart tPart = getThumbnailPart(); - if (tPart == null) return null; - return tPart.getInputStream(); - } - - /** - * Sets the Thumbnail for the document, replacing any existing one. - * - * @param filename The filename for the thumbnail image, eg {@code thumbnail.jpg} - * @param imageData The inputstream to read the thumbnail image from - * - * @throws IOException if the thumbnail can't be written - */ - public void setThumbnail(String filename, InputStream imageData) throws IOException { - PackagePart tPart = getThumbnailPart(); - if (tPart == null) { - // New thumbnail - pkg.addThumbnail(filename, imageData); - } else { - // Change existing - String newType = ContentTypes.getContentTypeFromFileExtension(filename); - if (! newType.equals(tPart.getContentType())) { - throw new IllegalArgumentException("Can't set a Thumbnail of type " + - newType + " when existing one is of a different type " + - tPart.getContentType()); - } - StreamHelper.copyStream(imageData, tPart.getOutputStream()); - } - } - - /** - * Commit changes to the underlying OPC package - * - * @throws IOException if the properties can't be saved - * @throws POIXMLException if the properties are erroneous - */ - public void commit() throws IOException{ - - if(extPart == null && !NEW_EXT_INSTANCE.toString().equals(ext.props.toString())){ - try { - PackagePartName prtname = PackagingURIHelper.createPartName("/docProps/app.xml"); - pkg.addRelationship(prtname, TargetMode.INTERNAL, "http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties"); - extPart = pkg.createPart(prtname, "application/vnd.openxmlformats-officedocument.extended-properties+xml"); - } catch (InvalidFormatException e){ - throw new POIXMLException(e); - } - } - if(custPart == null && !NEW_CUST_INSTANCE.toString().equals(cust.props.toString())){ - try { - PackagePartName prtname = PackagingURIHelper.createPartName("/docProps/custom.xml"); - pkg.addRelationship(prtname, TargetMode.INTERNAL, "http://schemas.openxmlformats.org/officeDocument/2006/relationships/custom-properties"); - custPart = pkg.createPart(prtname, "application/vnd.openxmlformats-officedocument.custom-properties+xml"); - } catch (InvalidFormatException e){ - throw new POIXMLException(e); - } - } - if(extPart != null){ - OutputStream out = extPart.getOutputStream(); - if (extPart.getSize() > 0) { - extPart.clear(); - } - ext.props.save(out, DEFAULT_XML_OPTIONS); - out.close(); - } - if(custPart != null){ - OutputStream out = custPart.getOutputStream(); - cust.props.save(out, DEFAULT_XML_OPTIONS); - out.close(); - } - } - - /** - * The core document properties - */ - public static class CoreProperties { - private PackagePropertiesPart part; - private CoreProperties(PackagePropertiesPart part) { - this.part = part; - } - - public String getCategory() { - return part.getCategoryProperty().getValue(); - } - public void setCategory(String category) { - part.setCategoryProperty(category); - } - public String getContentStatus() { - return part.getContentStatusProperty().getValue(); - } - public void setContentStatus(String contentStatus) { - part.setContentStatusProperty(contentStatus); - } - public String getContentType() { - return part.getContentTypeProperty().getValue(); - } - public void setContentType(String contentType) { - part.setContentTypeProperty(contentType); - } - public Date getCreated() { - return part.getCreatedProperty().getValue(); - } - public void setCreated(Nullable date) { - part.setCreatedProperty(date); - } - public void setCreated(String date) { - part.setCreatedProperty(date); - } - public String getCreator() { - return part.getCreatorProperty().getValue(); - } - public void setCreator(String creator) { - part.setCreatorProperty(creator); - } - public String getDescription() { - return part.getDescriptionProperty().getValue(); - } - public void setDescription(String description) { - part.setDescriptionProperty(description); - } - public String getIdentifier() { - return part.getIdentifierProperty().getValue(); - } - public void setIdentifier(String identifier) { - part.setIdentifierProperty(identifier); - } - public String getKeywords() { - return part.getKeywordsProperty().getValue(); - } - public void setKeywords(String keywords) { - part.setKeywordsProperty(keywords); - } - public Date getLastPrinted() { - return part.getLastPrintedProperty().getValue(); - } - public void setLastPrinted(Nullable date) { - part.setLastPrintedProperty(date); - } - public void setLastPrinted(String date) { - part.setLastPrintedProperty(date); - } - /** @since POI 3.15 beta 3 */ - public String getLastModifiedByUser() { - return part.getLastModifiedByProperty().getValue(); - } - /** @since POI 3.15 beta 3 */ - public void setLastModifiedByUser(String user) { - part.setLastModifiedByProperty(user); - } - public Date getModified() { - return part.getModifiedProperty().getValue(); - } - public void setModified(Nullable date) { - part.setModifiedProperty(date); - } - public void setModified(String date) { - part.setModifiedProperty(date); - } - public String getSubject() { - return part.getSubjectProperty().getValue(); - } - public void setSubjectProperty(String subject) { - part.setSubjectProperty(subject); - } - public void setTitle(String title) { - part.setTitleProperty(title); - } - public String getTitle() { - return part.getTitleProperty().getValue(); - } - public String getRevision() { - return part.getRevisionProperty().getValue(); - } - public void setRevision(String revision) { - try { - Long.valueOf(revision); - part.setRevisionProperty(revision); - } - catch (NumberFormatException e) {} - } - - public PackagePropertiesPart getUnderlyingProperties() { - return part; - } - } - - /** - * Extended document properties - */ - public static class ExtendedProperties { - private org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument props; - private ExtendedProperties(org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument props) { - this.props = props; - } - - public org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties getUnderlyingProperties() { - return props.getProperties(); - } - - public String getTemplate() { - if (props.getProperties().isSetTemplate()) { - return props.getProperties().getTemplate(); - } - return null; - } - public String getManager() { - if (props.getProperties().isSetManager()) { - return props.getProperties().getManager(); - } - return null; - } - public String getCompany() { - if (props.getProperties().isSetCompany()) { - return props.getProperties().getCompany(); - } - return null; - } - public String getPresentationFormat() { - if (props.getProperties().isSetPresentationFormat()) { - return props.getProperties().getPresentationFormat(); - } - return null; - } - public String getApplication() { - if (props.getProperties().isSetApplication()) { - return props.getProperties().getApplication(); - } - return null; - } - public String getAppVersion() { - if (props.getProperties().isSetAppVersion()) { - return props.getProperties().getAppVersion(); - } - return null; - } - - public int getPages() { - if (props.getProperties().isSetPages()) { - return props.getProperties().getPages(); - } - return -1; - } - public int getWords() { - if (props.getProperties().isSetWords()) { - return props.getProperties().getWords(); - } - return -1; - } - public int getCharacters() { - if (props.getProperties().isSetCharacters()) { - return props.getProperties().getCharacters(); - } - return -1; - } - public int getCharactersWithSpaces() { - if (props.getProperties().isSetCharactersWithSpaces()) { - return props.getProperties().getCharactersWithSpaces(); - } - return -1; - } - public int getLines() { - if (props.getProperties().isSetLines()) { - return props.getProperties().getLines(); - } - return -1; - } - public int getParagraphs() { - if (props.getProperties().isSetParagraphs()) { - return props.getProperties().getParagraphs(); - } - return -1; - } - public int getSlides() { - if (props.getProperties().isSetSlides()) { - return props.getProperties().getSlides(); - } - return -1; - } - public int getNotes() { - if (props.getProperties().isSetNotes()) { - return props.getProperties().getNotes(); - } - return -1; - } - public int getTotalTime() { - if (props.getProperties().isSetTotalTime()) { - return props.getProperties().getTotalTime(); - } - return -1; - } - public int getHiddenSlides() { - if (props.getProperties().isSetHiddenSlides()) { - return props.getProperties().getHiddenSlides(); - } - return -1; - } - public int getMMClips() { - if (props.getProperties().isSetMMClips()) { - return props.getProperties().getMMClips(); - } - return -1; - } - - public String getHyperlinkBase() { - if (props.getProperties().isSetHyperlinkBase()) { - return props.getProperties().getHyperlinkBase(); - } - return null; - } - } - - /** - * Custom document properties - */ - public static class CustomProperties { - /** - * Each custom property element contains an fmtid attribute - * with the same GUID value ({D5CDD505-2E9C-101B-9397-08002B2CF9AE}). - */ - public static final String FORMAT_ID = "{D5CDD505-2E9C-101B-9397-08002B2CF9AE}"; - - private org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument props; - private CustomProperties(org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument props) { - this.props = props; - } - - public org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperties getUnderlyingProperties() { - return props.getProperties(); - } - - /** - * Add a new property - * - * @param name the property name - * @throws IllegalArgumentException if a property with this name already exists - */ - private CTProperty add(String name) { - if(contains(name)) { - throw new IllegalArgumentException("A property with this name " + - "already exists in the custom properties"); - } - - CTProperty p = props.getProperties().addNewProperty(); - int pid = nextPid(); - p.setPid(pid); - p.setFmtid(FORMAT_ID); - p.setName(name); - return p; - } - - /** - * Add a new string property - * - * @param name the property name - * @param value the property value - * - * @throws IllegalArgumentException if a property with this name already exists - */ - public void addProperty(String name, String value){ - CTProperty p = add(name); - p.setLpwstr(value); - } - - /** - * Add a new double property - * - * @param name the property name - * @param value the property value - * - * @throws IllegalArgumentException if a property with this name already exists - */ - public void addProperty(String name, double value){ - CTProperty p = add(name); - p.setR8(value); - } - - /** - * Add a new integer property - * - * @param name the property name - * @param value the property value - * - * @throws IllegalArgumentException if a property with this name already exists - */ - public void addProperty(String name, int value){ - CTProperty p = add(name); - p.setI4(value); - } - - /** - * Add a new boolean property - * - * @param name the property name - * @param value the property value - * - * @throws IllegalArgumentException if a property with this name already exists - */ - public void addProperty(String name, boolean value){ - CTProperty p = add(name); - p.setBool(value); - } - - /** - * Generate next id that uniquely relates a custom property - * - * @return next property id starting with 2 - */ - protected int nextPid() { - int propid = 1; - for(CTProperty p : props.getProperties().getPropertyArray()){ - if(p.getPid() > propid) propid = p.getPid(); - } - return propid + 1; - } - - /** - * Check if a property with this name already exists in the collection of custom properties - * - * @param name the name to check - * @return whether a property with the given name exists in the custom properties - */ - public boolean contains(String name) { - for(CTProperty p : props.getProperties().getPropertyArray()){ - if(p.getName().equals(name)) return true; - } - return false; - } - - /** - * Retrieve the custom property with this name, or null if none exists. - * - * You will need to test the various isSetX methods to work out - * what the type of the property is, before fetching the - * appropriate value for it. - * - * @param name the name of the property to fetch - * - * @return the custom property with this name, or null if none exists - */ - public CTProperty getProperty(String name) { - for(CTProperty p : props.getProperties().getPropertyArray()){ - if(p.getName().equals(name)) { - return p; - } - } - return null; - } - } -} diff --git a/src/ooxml/java/org/apache/poi/POIXMLPropertiesTextExtractor.java b/src/ooxml/java/org/apache/poi/POIXMLPropertiesTextExtractor.java deleted file mode 100644 index f0fe9c30f7..0000000000 --- a/src/ooxml/java/org/apache/poi/POIXMLPropertiesTextExtractor.java +++ /dev/null @@ -1,274 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ - -package org.apache.poi; - -import java.math.BigDecimal; -import java.text.DateFormat; -import java.text.DateFormatSymbols; -import java.text.SimpleDateFormat; -import java.util.Date; -import java.util.Locale; - -import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart; -import org.apache.poi.util.LocaleUtil; -import org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperty; - -/** - * A {@link POITextExtractor} for returning the textual - * content of the OOXML file properties, eg author - * and title. - */ -public class POIXMLPropertiesTextExtractor extends POIXMLTextExtractor { - - private final DateFormat dateFormat; - - /** - * Creates a new POIXMLPropertiesTextExtractor for the given open document. - * - * @param doc the given open document - */ - public POIXMLPropertiesTextExtractor(POIXMLDocument doc) { - super(doc); - DateFormatSymbols dfs = DateFormatSymbols.getInstance(Locale.ROOT); - dateFormat = new SimpleDateFormat("EEE MMM dd HH:mm:ss zzz yyyy", dfs); - dateFormat.setTimeZone(LocaleUtil.TIMEZONE_UTC); - } - - /** - * Creates a new POIXMLPropertiesTextExtractor, for the - * same file that another TextExtractor is already - * working on. - * - * @param otherExtractor the extractor referencing the given file - */ - public POIXMLPropertiesTextExtractor(POIXMLTextExtractor otherExtractor) { - this(otherExtractor.getDocument()); - } - - private void appendIfPresent(StringBuilder text, String thing, boolean value) { - appendIfPresent(text, thing, Boolean.toString(value)); - } - - private void appendIfPresent(StringBuilder text, String thing, int value) { - appendIfPresent(text, thing, Integer.toString(value)); - } - - private void appendIfPresent(StringBuilder text, String thing, Date value) { - if (value == null) { - return; - } - appendIfPresent(text, thing, dateFormat.format(value)); - } - - private void appendIfPresent(StringBuilder text, String thing, String value) { - if (value == null) { - return; - } - text.append(thing); - text.append(" = "); - text.append(value); - text.append("\n"); - } - - /** - * Returns the core document properties, eg author - * - * @return the core document properties - */ - @SuppressWarnings("resource") - public String getCorePropertiesText() { - POIXMLDocument document = getDocument(); - if (document == null) { // event based extractor does not have a document - return ""; - } - - StringBuilder text = new StringBuilder(64); - PackagePropertiesPart props = - document.getProperties().getCoreProperties().getUnderlyingProperties(); - - appendIfPresent(text, "Category", props.getCategoryProperty().getValue()); - appendIfPresent(text, "Category", props.getCategoryProperty().getValue()); - appendIfPresent(text, "ContentStatus", props.getContentStatusProperty().getValue()); - appendIfPresent(text, "ContentType", props.getContentTypeProperty().getValue()); - appendIfPresent(text, "Created", props.getCreatedProperty().getValue()); - appendIfPresent(text, "CreatedString", props.getCreatedPropertyString()); - appendIfPresent(text, "Creator", props.getCreatorProperty().getValue()); - appendIfPresent(text, "Description", props.getDescriptionProperty().getValue()); - appendIfPresent(text, "Identifier", props.getIdentifierProperty().getValue()); - appendIfPresent(text, "Keywords", props.getKeywordsProperty().getValue()); - appendIfPresent(text, "Language", props.getLanguageProperty().getValue()); - appendIfPresent(text, "LastModifiedBy", props.getLastModifiedByProperty().getValue()); - appendIfPresent(text, "LastPrinted", props.getLastPrintedProperty().getValue()); - appendIfPresent(text, "LastPrintedString", props.getLastPrintedPropertyString()); - appendIfPresent(text, "Modified", props.getModifiedProperty().getValue()); - appendIfPresent(text, "ModifiedString", props.getModifiedPropertyString()); - appendIfPresent(text, "Revision", props.getRevisionProperty().getValue()); - appendIfPresent(text, "Subject", props.getSubjectProperty().getValue()); - appendIfPresent(text, "Title", props.getTitleProperty().getValue()); - appendIfPresent(text, "Version", props.getVersionProperty().getValue()); - - return text.toString(); - } - - /** - * Returns the extended document properties, eg application - * - * @return the extended document properties - */ - @SuppressWarnings("resource") - public String getExtendedPropertiesText() { - POIXMLDocument document = getDocument(); - if (document == null) { // event based extractor does not have a document - return ""; - } - - StringBuilder text = new StringBuilder(64); - org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties - props = document.getProperties().getExtendedProperties().getUnderlyingProperties(); - - appendIfPresent(text, "Application", props.getApplication()); - appendIfPresent(text, "AppVersion", props.getAppVersion()); - appendIfPresent(text, "Characters", props.getCharacters()); - appendIfPresent(text, "CharactersWithSpaces", props.getCharactersWithSpaces()); - appendIfPresent(text, "Company", props.getCompany()); - appendIfPresent(text, "HyperlinkBase", props.getHyperlinkBase()); - appendIfPresent(text, "HyperlinksChanged", props.getHyperlinksChanged()); - appendIfPresent(text, "Lines", props.getLines()); - appendIfPresent(text, "LinksUpToDate", props.getLinksUpToDate()); - appendIfPresent(text, "Manager", props.getManager()); - appendIfPresent(text, "Pages", props.getPages()); - appendIfPresent(text, "Paragraphs", props.getParagraphs()); - appendIfPresent(text, "PresentationFormat", props.getPresentationFormat()); - appendIfPresent(text, "Template", props.getTemplate()); - appendIfPresent(text, "TotalTime", props.getTotalTime()); - - return text.toString(); - } - - /** - * Returns the custom document properties, if there are any - * - * @return the custom document properties - */ - @SuppressWarnings({"resource"}) - public String getCustomPropertiesText() { - POIXMLDocument document = getDocument(); - if (document == null) { // event based extractor does not have a document - return ""; - } - - StringBuilder text = new StringBuilder(); - org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperties - props = document.getProperties().getCustomProperties().getUnderlyingProperties(); - - for (CTProperty property : props.getPropertyArray()) { - String val = "(not implemented!)"; - - if (property.isSetLpwstr()) { - val = property.getLpwstr(); - } else if (property.isSetLpstr()) { - val = property.getLpstr(); - } else if (property.isSetDate()) { - val = property.getDate().toString(); - } else if (property.isSetFiletime()) { - val = property.getFiletime().toString(); - } else if (property.isSetBool()) { - val = Boolean.toString(property.getBool()); - } - - // Integers - else if (property.isSetI1()) { - val = Integer.toString(property.getI1()); - } else if (property.isSetI2()) { - val = Integer.toString(property.getI2()); - } else if (property.isSetI4()) { - val = Integer.toString(property.getI4()); - } else if (property.isSetI8()) { - val = Long.toString(property.getI8()); - } else if (property.isSetInt()) { - val = Integer.toString(property.getInt()); - } - - // Unsigned Integers - else if (property.isSetUi1()) { - val = Integer.toString(property.getUi1()); - } else if (property.isSetUi2()) { - val = Integer.toString(property.getUi2()); - } else if (property.isSetUi4()) { - val = Long.toString(property.getUi4()); - } else if (property.isSetUi8()) { - val = property.getUi8().toString(); - } else if (property.isSetUint()) { - val = Long.toString(property.getUint()); - } - - // Reals - else if (property.isSetR4()) { - val = Float.toString(property.getR4()); - } else if (property.isSetR8()) { - val = Double.toString(property.getR8()); - } else if (property.isSetDecimal()) { - BigDecimal d = property.getDecimal(); - if (d == null) { - val = null; - } else { - val = d.toPlainString(); - } - } - - /*else if (property.isSetArray()) { - // TODO Fetch the array values and output - } - else if (property.isSetVector()) { - // TODO Fetch the vector values and output - } - - else if (property.isSetBlob() || property.isSetOblob()) { - // TODO Decode, if possible - } - else if (property.isSetStream() || property.isSetOstream() || - property.isSetVstream()) { - // TODO Decode, if possible - } - else if (property.isSetStorage() || property.isSetOstorage()) { - // TODO Decode, if possible - }*/ - - text.append(property.getName()).append(" = ").append(val).append("\n"); - } - - return text.toString(); - } - - @Override - public String getText() { - try { - return - getCorePropertiesText() + - getExtendedPropertiesText() + - getCustomPropertiesText(); - } catch (Exception e) { - throw new RuntimeException(e); - } - } - - @Override - public POIXMLPropertiesTextExtractor getMetadataTextExtractor() { - throw new IllegalStateException("You already have the Metadata Text Extractor, not recursing!"); - } -} diff --git a/src/ooxml/java/org/apache/poi/POIXMLRelation.java b/src/ooxml/java/org/apache/poi/POIXMLRelation.java deleted file mode 100644 index 55d162c5f3..0000000000 --- a/src/ooxml/java/org/apache/poi/POIXMLRelation.java +++ /dev/null @@ -1,170 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi; - -import java.io.IOException; -import java.io.InputStream; -import java.util.Iterator; - -import org.apache.poi.openxml4j.exceptions.InvalidFormatException; -import org.apache.poi.openxml4j.opc.PackagePart; -import org.apache.poi.openxml4j.opc.PackagePartName; -import org.apache.poi.openxml4j.opc.PackageRelationship; -import org.apache.poi.openxml4j.opc.PackageRelationshipCollection; -import org.apache.poi.openxml4j.opc.PackagingURIHelper; -import org.apache.poi.util.POILogFactory; -import org.apache.poi.util.POILogger; - -/** - * Represents a descriptor of a OOXML relation. - */ -public abstract class POIXMLRelation { - - private static final POILogger log = POILogFactory.getLogger(POIXMLRelation.class); - - /** - * Describes the content stored in a part. - */ - private String _type; - - /** - * The kind of connection between a source part and a target part in a package. - */ - private String _relation; - - /** - * The path component of a pack URI. - */ - private String _defaultName; - - /** - * Defines what object is used to construct instances of this relationship - */ - private Class _cls; - - /** - * Instantiates a POIXMLRelation. - * - * @param type content type - * @param rel relationship - * @param defaultName default item name - * @param cls defines what object is used to construct instances of this relationship - */ - public POIXMLRelation(String type, String rel, String defaultName, Class cls) { - _type = type; - _relation = rel; - _defaultName = defaultName; - _cls = cls; - } - - /** - * Instantiates a POIXMLRelation. - * - * @param type content type - * @param rel relationship - * @param defaultName default item name - */ - public POIXMLRelation(String type, String rel, String defaultName) { - this(type, rel, defaultName, null); - } - /** - * Return the content type. Content types define a media type, a subtype, and an - * optional set of parameters, as defined in RFC 2616. - * - * @return the content type - */ - public String getContentType() { - return _type; - } - - /** - * Return the relationship, the kind of connection between a source part and a target part in a package. - * Relationships make the connections between parts directly discoverable without looking at the content - * in the parts, and without altering the parts themselves. - * - * @return the relationship - */ - public String getRelation() { - return _relation; - } - - /** - * Return the default part name. Part names are used to refer to a part in the context of a - * package, typically as part of a URI. - * - * @return the default part name - */ - public String getDefaultFileName() { - return _defaultName; - } - - /** - * Returns the filename for the nth one of these, e.g. /xl/comments4.xml - * - * @param index the suffix for the document type - * @return the filename including the suffix - */ - public String getFileName(int index) { - if(! _defaultName.contains("#")) { - // Generic filename in all cases - return getDefaultFileName(); - } - return _defaultName.replace("#", Integer.toString(index)); - } - - /** - * Returns the index of the filename within the package for the given part. - * e.g. 4 for /xl/comments4.xml - * - * @param part the part to read the suffix from - * @return the suffix - */ - public Integer getFileNameIndex(POIXMLDocumentPart part) { - String regex = _defaultName.replace("#", "(\\d+)"); - return Integer.valueOf(part.getPackagePart().getPartName().getName().replaceAll(regex, "$1")); - } - - /** - * Return type of the object used to construct instances of this relationship - * - * @return the class of the object used to construct instances of this relation - */ - public Class getRelationClass(){ - return _cls; - } - - /** - * Fetches the InputStream to read the contents, based - * of the specified core part, for which we are defined - * as a suitable relationship - * - * @since 3.16-beta3 - */ - public InputStream getContents(PackagePart corePart) throws IOException, InvalidFormatException { - PackageRelationshipCollection prc = - corePart.getRelationshipsByType(getRelation()); - Iterator it = prc.iterator(); - if(it.hasNext()) { - PackageRelationship rel = it.next(); - PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI()); - PackagePart part = corePart.getPackage().getPart(relName); - return part.getInputStream(); - } - log.log(POILogger.WARN, "No part " + getDefaultFileName() + " found"); - return null; - } -} diff --git a/src/ooxml/java/org/apache/poi/POIXMLTextExtractor.java b/src/ooxml/java/org/apache/poi/POIXMLTextExtractor.java deleted file mode 100644 index 003fe353f1..0000000000 --- a/src/ooxml/java/org/apache/poi/POIXMLTextExtractor.java +++ /dev/null @@ -1,121 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ - -package org.apache.poi; - -import java.io.IOException; - -import org.apache.poi.POIXMLProperties.CoreProperties; -import org.apache.poi.POIXMLProperties.CustomProperties; -import org.apache.poi.POIXMLProperties.ExtendedProperties; -import org.apache.poi.openxml4j.opc.OPCPackage; -import org.apache.poi.openxml4j.util.ZipSecureFile; - -public abstract class POIXMLTextExtractor extends POITextExtractor { - /** The POIXMLDocument that's open */ - private final POIXMLDocument _document; - - /** - * Creates a new text extractor for the given document - * - * @param document the document to extract from - */ - public POIXMLTextExtractor(POIXMLDocument document) { - _document = document; - } - - /** - * Returns the core document properties - * - * @return the core document properties - */ - public CoreProperties getCoreProperties() { - return _document.getProperties().getCoreProperties(); - } - /** - * Returns the extended document properties - * - * @return the extended document properties - */ - public ExtendedProperties getExtendedProperties() { - return _document.getProperties().getExtendedProperties(); - } - /** - * Returns the custom document properties - * - * @return the custom document properties - */ - public CustomProperties getCustomProperties() { - return _document.getProperties().getCustomProperties(); - } - - /** - * Returns opened document - * - * @return the opened document - */ - @Override - public final POIXMLDocument getDocument() { - return _document; - } - - /** - * Returns the opened OPCPackage that contains the document - * - * @return the opened OPCPackage - */ - public OPCPackage getPackage() { - return _document.getPackage(); - } - - /** - * Returns an OOXML properties text extractor for the - * document properties metadata, such as title and author. - */ - @Override - public POIXMLPropertiesTextExtractor getMetadataTextExtractor() { - return new POIXMLPropertiesTextExtractor(_document); - } - - @Override - public void close() throws IOException { - // e.g. XSSFEventBaseExcelExtractor passes a null-document - if(_document != null) { - @SuppressWarnings("resource") - OPCPackage pkg = _document.getPackage(); - if(pkg != null) { - // revert the package to not re-write the file, which is very likely not wanted for a TextExtractor! - pkg.revert(); - } - } - super.close(); - } - - protected void checkMaxTextSize(CharSequence text, String string) { - if(string == null) { - return; - } - - int size = text.length() + string.length(); - if(size > ZipSecureFile.getMaxTextSize()) { - throw new IllegalStateException("The text would exceed the max allowed overall size of extracted text. " - + "By default this is prevented as some documents may exhaust available memory and it may indicate that the file is used to inflate memory usage and thus could pose a security risk. " - + "You can adjust this limit via ZipSecureFile.setMaxTextSize() if you need to work with files which have a lot of text. " - + "Size: " + size + ", limit: MAX_TEXT_SIZE: " + ZipSecureFile.getMaxTextSize()); - } - } -} diff --git a/src/ooxml/java/org/apache/poi/POIXMLTypeLoader.java b/src/ooxml/java/org/apache/poi/POIXMLTypeLoader.java deleted file mode 100644 index 8578a8333f..0000000000 --- a/src/ooxml/java/org/apache/poi/POIXMLTypeLoader.java +++ /dev/null @@ -1,168 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ - -package org.apache.poi; - -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.Reader; -import java.io.StringReader; -import java.lang.ref.WeakReference; -import java.net.URL; -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; - -import javax.xml.stream.XMLStreamReader; - -import org.apache.poi.openxml4j.opc.PackageNamespaces; -import org.apache.poi.util.DocumentHelper; -import org.apache.poi.util.Removal; -import org.apache.xmlbeans.SchemaType; -import org.apache.xmlbeans.SchemaTypeLoader; -import org.apache.xmlbeans.XmlBeans; -import org.apache.xmlbeans.XmlException; -import org.apache.xmlbeans.XmlObject; -import org.apache.xmlbeans.XmlOptions; -import org.apache.xmlbeans.xml.stream.XMLInputStream; -import org.apache.xmlbeans.xml.stream.XMLStreamException; -import org.w3c.dom.Document; -import org.w3c.dom.Node; -import org.xml.sax.InputSource; -import org.xml.sax.SAXException; - -@SuppressWarnings("deprecation") -public class POIXMLTypeLoader { - - private static ThreadLocal typeLoader = new ThreadLocal<>(); - - // TODO: Do these have a good home like o.a.p.openxml4j.opc.PackageNamespaces and PackageRelationshipTypes? - // These constants should be common to all of POI and easy to use by other applications such as Tika - private static final String MS_OFFICE_URN = "urn:schemas-microsoft-com:office:office"; - private static final String MS_EXCEL_URN = "urn:schemas-microsoft-com:office:excel"; - private static final String MS_WORD_URN = "urn:schemas-microsoft-com:office:word"; - private static final String MS_VML_URN = "urn:schemas-microsoft-com:vml"; - - public static final XmlOptions DEFAULT_XML_OPTIONS; - static { - DEFAULT_XML_OPTIONS = new XmlOptions(); - DEFAULT_XML_OPTIONS.setSaveOuter(); - DEFAULT_XML_OPTIONS.setUseDefaultNamespace(); - DEFAULT_XML_OPTIONS.setSaveAggressiveNamespaces(); - DEFAULT_XML_OPTIONS.setCharacterEncoding("UTF-8"); - // Piccolo is disabled for POI builts, i.e. JAXP is used for parsing - // so only user code using XmlObject/XmlToken.Factory.parse - // directly can bypass the entity check, which is probably unlikely (... and not within our responsibility :)) - // DEFAULT_XML_OPTIONS.setLoadEntityBytesLimit(4096); - - // POI is not thread-safe - so we can switch to unsynchronized xmlbeans mode - see #61350 - // Update: disabled again for now as it caused strange NPEs and other problems - // when reading properties in separate workbooks in multiple threads - // DEFAULT_XML_OPTIONS.setUnsynchronized(); - - Map map = new HashMap<>(); - map.put("http://schemas.openxmlformats.org/drawingml/2006/main", "a"); - map.put("http://schemas.openxmlformats.org/drawingml/2006/chart", "c"); - map.put("http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing", "wp"); - map.put(PackageNamespaces.MARKUP_COMPATIBILITY, "ve"); - map.put("http://schemas.openxmlformats.org/officeDocument/2006/math", "m"); - map.put("http://schemas.openxmlformats.org/officeDocument/2006/relationships", "r"); - map.put("http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes", "vt"); - map.put("http://schemas.openxmlformats.org/presentationml/2006/main", "p"); - map.put("http://schemas.openxmlformats.org/wordprocessingml/2006/main", "w"); - map.put("http://schemas.microsoft.com/office/word/2006/wordml", "wne"); - map.put(MS_OFFICE_URN, "o"); - map.put(MS_EXCEL_URN, "x"); - map.put(MS_WORD_URN, "w10"); - map.put(MS_VML_URN, "v"); - DEFAULT_XML_OPTIONS.setSaveSuggestedPrefixes(Collections.unmodifiableMap(map)); - } - - private static XmlOptions getXmlOptions(XmlOptions options) { - return options == null ? DEFAULT_XML_OPTIONS : options; - } - - private static SchemaTypeLoader getTypeLoader(SchemaType type) { - SchemaTypeLoader tl = typeLoader.get(); - if (tl == null) { - ClassLoader cl = type.getClass().getClassLoader(); - tl = XmlBeans.typeLoaderForClassLoader(cl); - typeLoader.set(tl); - } - return tl; - } - - public static XmlObject newInstance(SchemaType type, XmlOptions options) { - return getTypeLoader(type).newInstance(type, getXmlOptions(options)); - } - - public static XmlObject parse(String xmlText, SchemaType type, XmlOptions options) throws XmlException { - try { - return parse(new StringReader(xmlText), type, options); - } catch (IOException e) { - throw new XmlException("Unable to parse xml bean", e); - } - } - - public static XmlObject parse(File file, SchemaType type, XmlOptions options) throws XmlException, IOException { - try (InputStream is = new FileInputStream(file)) { - return parse(is, type, options); - } - } - - public static XmlObject parse(URL file, SchemaType type, XmlOptions options) throws XmlException, IOException { - try (InputStream is = file.openStream()) { - return parse(is, type, options); - } - } - - public static XmlObject parse(InputStream jiois, SchemaType type, XmlOptions options) throws XmlException, IOException { - try { - Document doc = DocumentHelper.readDocument(jiois); - return getTypeLoader(type).parse(doc.getDocumentElement(), type, getXmlOptions(options)); - } catch (SAXException e) { - throw new IOException("Unable to parse xml bean", e); - } - } - - public static XmlObject parse(XMLStreamReader xsr, SchemaType type, XmlOptions options) throws XmlException { - return getTypeLoader(type).parse(xsr, type, getXmlOptions(options)); - } - - public static XmlObject parse(Reader jior, SchemaType type, XmlOptions options) throws XmlException, IOException { - try { - Document doc = DocumentHelper.readDocument(new InputSource(jior)); - return getTypeLoader(type).parse(doc.getDocumentElement(), type, getXmlOptions(options)); - } catch (SAXException e) { - throw new XmlException("Unable to parse xml bean", e); - } - } - - public static XmlObject parse(Node node, SchemaType type, XmlOptions options) throws XmlException { - return getTypeLoader(type).parse(node, type, getXmlOptions(options)); - } - - public static XmlObject parse(XMLInputStream xis, SchemaType type, XmlOptions options) throws XmlException, XMLStreamException { - return getTypeLoader(type).parse(xis, type, getXmlOptions(options)); - } - - public static XMLInputStream newValidatingXMLInputStream ( XMLInputStream xis, SchemaType type, XmlOptions options ) throws XmlException, XMLStreamException { - return getTypeLoader(type).newValidatingXMLInputStream(xis, type, getXmlOptions(options)); - } -} diff --git a/src/ooxml/java/org/apache/poi/dev/OOXMLLister.java b/src/ooxml/java/org/apache/poi/dev/OOXMLLister.java deleted file mode 100644 index cbbca45cf9..0000000000 --- a/src/ooxml/java/org/apache/poi/dev/OOXMLLister.java +++ /dev/null @@ -1,152 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi.dev; - -import java.io.*; -import java.util.ArrayList; - -import org.apache.poi.openxml4j.exceptions.InvalidFormatException; -import org.apache.poi.openxml4j.opc.OPCPackage; -import org.apache.poi.openxml4j.opc.PackageAccess; -import org.apache.poi.openxml4j.opc.PackagePart; -import org.apache.poi.openxml4j.opc.PackageRelationship; -import org.apache.poi.openxml4j.opc.PackageRelationshipCollection; - -/** - * Prints out the contents of a OOXML container. - * Useful for seeing what parts are defined, and how - * they're all related to each other. - */ -public class OOXMLLister implements Closeable { - private final OPCPackage container; - private final PrintStream disp; - - public OOXMLLister(OPCPackage container) { - this(container, System.out); - } - public OOXMLLister(OPCPackage container, PrintStream disp) { - this.container = container; - this.disp = disp; - } - - /** - * Figures out how big a given PackagePart is. - * - * @param part the PackagePart - * @return the size of the PackagePart - * - * @throws IOException if the part can't be read - */ - public static long getSize(PackagePart part) throws IOException { - InputStream in = part.getInputStream(); - try { - byte[] b = new byte[8192]; - long size = 0; - int read = 0; - - while(read > -1) { - read = in.read(b); - if(read > 0) { - size += read; - } - } - - return size; - } finally { - in.close(); - } - } - - /** - * Displays information on all the different - * parts of the OOXML file container. - * @throws InvalidFormatException if the package relations are invalid - * @throws IOException if the package can't be read - */ - public void displayParts() throws InvalidFormatException, IOException { - ArrayList parts = container.getParts(); - for (PackagePart part : parts) { - disp.println(part.getPartName()); - disp.println("\t" + part.getContentType()); - - if(! part.getPartName().toString().equals("/docProps/core.xml")) { - disp.println("\t" + getSize(part) + " bytes"); - } - - if(! part.isRelationshipPart()) { - disp.println("\t" + part.getRelationships().size() + " relations"); - for(PackageRelationship rel : part.getRelationships()) { - displayRelation(rel, "\t "); - } - } - } - } - /** - * Displays information on all the different - * relationships between different parts - * of the OOXML file container. - */ - public void displayRelations() { - PackageRelationshipCollection rels = - container.getRelationships(); - for (PackageRelationship rel : rels) { - displayRelation(rel, ""); - } - } - - private void displayRelation(PackageRelationship rel, String indent) { - disp.println(indent+"Relationship:"); - disp.println(indent+"\tFrom: "+ rel.getSourceURI()); - disp.println(indent+"\tTo: " + rel.getTargetURI()); - disp.println(indent+"\tID: " + rel.getId()); - disp.println(indent+"\tMode: " + rel.getTargetMode()); - disp.println(indent+"\tType: " + rel.getRelationshipType()); - } - - @Override - public void close() throws IOException { - container.close(); - } - - public static void main(String[] args) throws IOException, InvalidFormatException { - if(args.length == 0) { - System.err.println("Use:"); - System.err.println("\tjava OOXMLLister "); - System.exit(1); - } - - File f = new File(args[0]); - if(! f.exists()) { - System.err.println("Error, file not found!"); - System.err.println("\t" + f); - System.exit(2); - } - - OOXMLLister lister = new OOXMLLister( - OPCPackage.open(f.toString(), PackageAccess.READ) - ); - - try { - lister.disp.println(f + "\n"); - lister.displayParts(); - lister.disp.println(); - lister.displayRelations(); - } finally { - lister.close(); - } - } -} diff --git a/src/ooxml/java/org/apache/poi/dev/OOXMLPrettyPrint.java b/src/ooxml/java/org/apache/poi/dev/OOXMLPrettyPrint.java deleted file mode 100644 index e8ae9eb103..0000000000 --- a/src/ooxml/java/org/apache/poi/dev/OOXMLPrettyPrint.java +++ /dev/null @@ -1,137 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi.dev; - -import java.io.BufferedOutputStream; -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.OutputStream; -import java.util.Enumeration; -import java.util.zip.ZipEntry; -import java.util.zip.ZipException; -import java.util.zip.ZipFile; -import java.util.zip.ZipOutputStream; - -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; -import javax.xml.parsers.ParserConfigurationException; -import javax.xml.transform.OutputKeys; -import javax.xml.transform.Result; -import javax.xml.transform.Source; -import javax.xml.transform.Transformer; -import javax.xml.transform.TransformerException; -import javax.xml.transform.TransformerFactory; -import javax.xml.transform.dom.DOMSource; -import javax.xml.transform.stream.StreamResult; - -import org.apache.poi.openxml4j.opc.internal.ZipHelper; -import org.apache.poi.openxml4j.util.ZipSecureFile; -import org.apache.poi.util.IOUtils; -import org.w3c.dom.Document; -import org.xml.sax.InputSource; - -/** - * Reads a zipped OOXML file and produces a copy with the included - * pretty-printed XML files. - * - * This is useful for comparing OOXML files produced by different tools as the often - * use different formatting of the XML. - */ -public class OOXMLPrettyPrint { - private final DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance(); - private final DocumentBuilder documentBuilder; - - public OOXMLPrettyPrint() throws ParserConfigurationException { - // allow files with much lower inflation rate here as there is no risk of Zip Bomb attacks in this developer tool - ZipSecureFile.setMinInflateRatio(0.00001); - - documentBuilder = documentBuilderFactory.newDocumentBuilder(); - } - - public static void main(String[] args) throws Exception { - if(args.length <= 1 || args.length % 2 != 0) { - System.err.println("Use:"); - System.err.println("\tjava OOXMLPrettyPrint [ ] ..."); - System.exit(1); - } - - for(int i = 0;i < args.length;i+=2) { - File f = new File(args[i]); - if(! f.exists()) { - System.err.println("Error, file not found!"); - System.err.println("\t" + f); - System.exit(2); - } - - handleFile(f, new File(args[i+1])); - } - System.out.println("Done."); - } - - private static void handleFile(File file, File outFile) throws ZipException, - IOException, ParserConfigurationException { - System.out.println("Reading zip-file " + file + " and writing pretty-printed XML to " + outFile); - - try (ZipFile zipFile = ZipHelper.openZipFile(file)) { - try (ZipOutputStream out = new ZipOutputStream(new BufferedOutputStream(new FileOutputStream(outFile)))) { - new OOXMLPrettyPrint().handle(zipFile, out); - } - } finally { - System.out.println(); - } - } - - private void handle(ZipFile file, ZipOutputStream out) throws IOException { - Enumeration entries = file.entries(); - while(entries.hasMoreElements()) { - ZipEntry entry = entries.nextElement(); - - String name = entry.getName(); - out.putNextEntry(new ZipEntry(name)); - try { - if(name.endsWith(".xml") || name.endsWith(".rels")) { - Document document = documentBuilder.parse(new InputSource(file.getInputStream(entry))); - document.setXmlStandalone(true); - pretty(document, out, 2); - } else { - System.out.println("Not pretty-printing non-XML file " + name); - IOUtils.copy(file.getInputStream(entry), out); - } - } catch (Exception e) { - throw new IOException("While handling entry " + name, e); - } finally { - out.closeEntry(); - } - System.out.print("."); - } - } - - private static void pretty(Document document, OutputStream outputStream, int indent) throws TransformerException { - TransformerFactory transformerFactory = TransformerFactory.newInstance(); - Transformer transformer = transformerFactory.newTransformer(); - transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); - if (indent > 0) { - // set properties to indent the resulting XML nicely - transformer.setOutputProperty(OutputKeys.INDENT, "yes"); - transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", Integer.toString(indent)); - } - Result result = new StreamResult(outputStream); - Source source = new DOMSource(document); - transformer.transform(source, result); - } -} diff --git a/src/ooxml/java/org/apache/poi/extractor/CommandLineTextExtractor.java b/src/ooxml/java/org/apache/poi/extractor/CommandLineTextExtractor.java deleted file mode 100644 index 264daa028f..0000000000 --- a/src/ooxml/java/org/apache/poi/extractor/CommandLineTextExtractor.java +++ /dev/null @@ -1,62 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi.extractor; - -import java.io.File; - -import org.apache.poi.POITextExtractor; - -/** - * A command line wrapper around {@link ExtractorFactory}, useful - * for when debugging. - */ -public class CommandLineTextExtractor { - public static final String DIVIDER = "======================="; - - public static void main(String[] args) throws Exception { - if(args.length < 1) { - System.err.println("Use:"); - System.err.println(" CommandLineTextExtractor [filename] [filename]"); - System.exit(1); - } - - for (String arg : args) { - System.out.println(DIVIDER); - - File f = new File(arg); - System.out.println(f); - - POITextExtractor extractor = - ExtractorFactory.createExtractor(f); - try { - POITextExtractor metadataExtractor = - extractor.getMetadataTextExtractor(); - - System.out.println(" " + DIVIDER); - String metaData = metadataExtractor.getText(); - System.out.println(metaData); - System.out.println(" " + DIVIDER); - String text = extractor.getText(); - System.out.println(text); - System.out.println(DIVIDER); - System.out.println("Had " + metaData.length() + " characters of metadata and " + text.length() + " characters of text"); - } finally { - extractor.close(); - } - } - } -} diff --git a/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java b/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java deleted file mode 100644 index 9a7765af0d..0000000000 --- a/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java +++ /dev/null @@ -1,436 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi.extractor; - -import java.io.ByteArrayInputStream; -import java.io.File; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.InputStream; -import java.util.ArrayList; -import java.util.Iterator; - -import org.apache.poi.EncryptedDocumentException; -import org.apache.poi.POIOLE2TextExtractor; -import org.apache.poi.POITextExtractor; -import org.apache.poi.POIXMLTextExtractor; -import org.apache.poi.hsmf.MAPIMessage; -import org.apache.poi.hsmf.datatypes.AttachmentChunks; -import org.apache.poi.hsmf.extractor.OutlookTextExtactor; -import org.apache.poi.hssf.extractor.ExcelExtractor; -import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey; -import org.apache.poi.hwpf.extractor.WordExtractor; -import org.apache.poi.openxml4j.exceptions.OpenXML4JException; -import org.apache.poi.openxml4j.opc.OPCPackage; -import org.apache.poi.openxml4j.opc.PackageAccess; -import org.apache.poi.openxml4j.opc.PackagePart; -import org.apache.poi.openxml4j.opc.PackageRelationshipCollection; -import org.apache.poi.openxml4j.opc.PackageRelationshipTypes; -import org.apache.poi.poifs.crypt.Decryptor; -import org.apache.poi.poifs.crypt.EncryptionInfo; -import org.apache.poi.poifs.filesystem.DirectoryEntry; -import org.apache.poi.poifs.filesystem.DirectoryNode; -import org.apache.poi.poifs.filesystem.Entry; -import org.apache.poi.poifs.filesystem.FileMagic; -import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; -import org.apache.poi.poifs.filesystem.NotOLE2FileException; -import org.apache.poi.poifs.filesystem.OPOIFSFileSystem; -import org.apache.poi.poifs.filesystem.OfficeXmlFileException; -import org.apache.poi.poifs.filesystem.POIFSFileSystem; -import org.apache.poi.sl.extractor.SlideShowExtractor; -import org.apache.poi.util.IOUtils; -import org.apache.poi.util.NotImplemented; -import org.apache.poi.util.POILogFactory; -import org.apache.poi.util.POILogger; -import org.apache.poi.util.Removal; -import org.apache.poi.xdgf.extractor.XDGFVisioExtractor; -import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor; -import org.apache.poi.xslf.usermodel.XMLSlideShow; -import org.apache.poi.xslf.usermodel.XSLFRelation; -import org.apache.poi.xslf.usermodel.XSLFSlideShow; -import org.apache.poi.xssf.extractor.XSSFBEventBasedExcelExtractor; -import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor; -import org.apache.poi.xssf.extractor.XSSFExcelExtractor; -import org.apache.poi.xssf.usermodel.XSSFRelation; -import org.apache.poi.xwpf.extractor.XWPFWordExtractor; -import org.apache.poi.xwpf.usermodel.XWPFRelation; -import org.apache.xmlbeans.XmlException; - -/** - * Figures out the correct POITextExtractor for your supplied - * document, and returns it. - * - *

Note 1 - will fail for many file formats if the POI Scratchpad jar is - * not present on the runtime classpath

- *

Note 2 - rather than using this, for most cases you would be better - * off switching to Apache Tika instead!

- */ -@SuppressWarnings("WeakerAccess") -public class ExtractorFactory { - private static final POILogger logger = POILogFactory.getLogger(ExtractorFactory.class); - - public static final String CORE_DOCUMENT_REL = PackageRelationshipTypes.CORE_DOCUMENT; - protected static final String VISIO_DOCUMENT_REL = PackageRelationshipTypes.VISIO_CORE_DOCUMENT; - protected static final String STRICT_DOCUMENT_REL = PackageRelationshipTypes.STRICT_CORE_DOCUMENT; - - /** - * Should this thread prefer event based over usermodel based extractors? - * (usermodel extractors tend to be more accurate, but use more memory) - * Default is false. - */ - public static boolean getThreadPrefersEventExtractors() { - return OLE2ExtractorFactory.getThreadPrefersEventExtractors(); - } - - /** - * Should all threads prefer event based over usermodel based extractors? - * (usermodel extractors tend to be more accurate, but use more memory) - * Default is to use the thread level setting, which defaults to false. - */ - public static Boolean getAllThreadsPreferEventExtractors() { - return OLE2ExtractorFactory.getAllThreadsPreferEventExtractors(); - } - - /** - * Should this thread prefer event based over usermodel based extractors? - * Will only be used if the All Threads setting is null. - */ - public static void setThreadPrefersEventExtractors(boolean preferEventExtractors) { - OLE2ExtractorFactory.setThreadPrefersEventExtractors(preferEventExtractors); - } - - /** - * Should all threads prefer event based over usermodel based extractors? - * If set, will take preference over the Thread level setting. - */ - public static void setAllThreadsPreferEventExtractors(Boolean preferEventExtractors) { - OLE2ExtractorFactory.setAllThreadsPreferEventExtractors(preferEventExtractors); - } - - /** - * Should this thread use event based extractors is available? - * Checks the all-threads one first, then thread specific. - */ - protected static boolean getPreferEventExtractor() { - return OLE2ExtractorFactory.getPreferEventExtractor(); - } - - public static T createExtractor(File f) throws IOException, OpenXML4JException, XmlException { - NPOIFSFileSystem fs = null; - try { - fs = new NPOIFSFileSystem(f); - if (fs.getRoot().hasEntry(Decryptor.DEFAULT_POIFS_ENTRY)) { - return (T)createEncryptedOOXMLExtractor(fs); - } - POITextExtractor extractor = createExtractor(fs); - extractor.setFilesystem(fs); - return (T)extractor; - } catch (OfficeXmlFileException e) { - // ensure file-handle release - IOUtils.closeQuietly(fs); - return (T)createExtractor(OPCPackage.open(f.toString(), PackageAccess.READ)); - } catch (NotOLE2FileException ne) { - // ensure file-handle release - IOUtils.closeQuietly(fs); - throw new IllegalArgumentException("Your File was neither an OLE2 file, nor an OOXML file"); - } catch (OpenXML4JException | Error | RuntimeException | IOException | XmlException e) { - // ensure file-handle release - IOUtils.closeQuietly(fs); - throw e; - } - } - - public static POITextExtractor createExtractor(InputStream inp) throws IOException, OpenXML4JException, XmlException { - InputStream is = FileMagic.prepareToCheckMagic(inp); - - FileMagic fm = FileMagic.valueOf(is); - - switch (fm) { - case OLE2: - NPOIFSFileSystem fs = new NPOIFSFileSystem(is); - boolean isEncrypted = fs.getRoot().hasEntry(Decryptor.DEFAULT_POIFS_ENTRY); - return isEncrypted ? createEncryptedOOXMLExtractor(fs) : createExtractor(fs); - case OOXML: - return createExtractor(OPCPackage.open(is)); - default: - throw new IllegalArgumentException("Your InputStream was neither an OLE2 stream, nor an OOXML stream"); - } - } - - /** - * Tries to determine the actual type of file and produces a matching text-extractor for it. - * - * @param pkg An {@link OPCPackage}. - * @return A {@link POIXMLTextExtractor} for the given file. - * @throws IOException If an error occurs while reading the file - * @throws OpenXML4JException If an error parsing the OpenXML file format is found. - * @throws XmlException If an XML parsing error occurs. - * @throws IllegalArgumentException If no matching file type could be found. - */ - public static POITextExtractor createExtractor(OPCPackage pkg) throws IOException, OpenXML4JException, XmlException { - try { - // Check for the normal Office core document - PackageRelationshipCollection core; - core = pkg.getRelationshipsByType(CORE_DOCUMENT_REL); - - // If nothing was found, try some of the other OOXML-based core types - if (core.size() == 0) { - // Could it be an OOXML-Strict one? - core = pkg.getRelationshipsByType(STRICT_DOCUMENT_REL); - } - if (core.size() == 0) { - // Could it be a visio one? - core = pkg.getRelationshipsByType(VISIO_DOCUMENT_REL); - if (core.size() == 1) - return new XDGFVisioExtractor(pkg); - } - - // Should just be a single core document, complain if not - if (core.size() != 1) { - throw new IllegalArgumentException("Invalid OOXML Package received - expected 1 core document, found " + core.size()); - } - - // Grab the core document part, and try to identify from that - final PackagePart corePart = pkg.getPart(core.getRelationship(0)); - final String contentType = corePart.getContentType(); - - // Is it XSSF? - for (XSSFRelation rel : XSSFExcelExtractor.SUPPORTED_TYPES) { - if ( rel.getContentType().equals( contentType ) ) { - if (getPreferEventExtractor()) { - return new XSSFEventBasedExcelExtractor(pkg); - } - return new XSSFExcelExtractor(pkg); - } - } - - // Is it XWPF? - for (XWPFRelation rel : XWPFWordExtractor.SUPPORTED_TYPES) { - if ( rel.getContentType().equals( contentType ) ) { - return new XWPFWordExtractor(pkg); - } - } - - // Is it XSLF? - for (XSLFRelation rel : XSLFPowerPointExtractor.SUPPORTED_TYPES) { - if ( rel.getContentType().equals( contentType ) ) { - return new SlideShowExtractor(new XMLSlideShow(pkg)); - } - } - - // special handling for SlideShow-Theme-files, - if (XSLFRelation.THEME_MANAGER.getContentType().equals(contentType)) { - return new SlideShowExtractor(new XMLSlideShow(pkg)); - } - - // How about xlsb? - for (XSSFRelation rel : XSSFBEventBasedExcelExtractor.SUPPORTED_TYPES) { - if (rel.getContentType().equals(contentType)) { - return new XSSFBEventBasedExcelExtractor(pkg); - } - } - - throw new IllegalArgumentException("No supported documents found in the OOXML package (found "+contentType+")"); - - } catch (IOException | Error | RuntimeException | XmlException | OpenXML4JException e) { - // ensure that we close the package again if there is an error opening it, however - // we need to revert the package to not re-write the file via close(), which is very likely not wanted for a TextExtractor! - pkg.revert(); - throw e; - } - } - - public static T createExtractor(POIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException { - return createExtractor(fs.getRoot()); - } - public static T createExtractor(NPOIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException { - return createExtractor(fs.getRoot()); - } - public static T createExtractor(OPOIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException { - return createExtractor(fs.getRoot()); - } - - public static T createExtractor(DirectoryNode poifsDir) throws IOException, OpenXML4JException, XmlException - { - // First, check for OOXML - for (String entryName : poifsDir.getEntryNames()) { - if (entryName.equals("Package")) { - OPCPackage pkg = OPCPackage.open(poifsDir.createDocumentInputStream("Package")); - return (T)createExtractor(pkg); - } - } - - // If not, ask the OLE2 code to check, with Scratchpad if possible - return (T)OLE2ExtractorFactory.createExtractor(poifsDir); - } - - /** - * Returns an array of text extractors, one for each of - * the embedded documents in the file (if there are any). - * If there are no embedded documents, you'll get back an - * empty array. Otherwise, you'll get one open - * {@link POITextExtractor} for each embedded file. - * - * @deprecated Use the method with correct "embedded" - */ - @Deprecated - @Removal(version="4.2") - public static POITextExtractor[] getEmbededDocsTextExtractors(POIOLE2TextExtractor ext) throws IOException, OpenXML4JException, XmlException { - return getEmbeddedDocsTextExtractors(ext); - } - - /** - * Returns an array of text extractors, one for each of - * the embedded documents in the file (if there are any). - * If there are no embedded documents, you'll get back an - * empty array. Otherwise, you'll get one open - * {@link POITextExtractor} for each embedded file. - */ - public static POITextExtractor[] getEmbeddedDocsTextExtractors(POIOLE2TextExtractor ext) throws IOException, OpenXML4JException, XmlException { - // All the embedded directories we spotted - ArrayList dirs = new ArrayList<>(); - // For anything else not directly held in as a POIFS directory - ArrayList nonPOIFS = new ArrayList<>(); - - // Find all the embedded directories - DirectoryEntry root = ext.getRoot(); - if (root == null) { - throw new IllegalStateException("The extractor didn't know which POIFS it came from!"); - } - - if (ext instanceof ExcelExtractor) { - // These are in MBD... under the root - Iterator it = root.getEntries(); - while (it.hasNext()) { - Entry entry = it.next(); - if (entry.getName().startsWith("MBD")) { - dirs.add(entry); - } - } - } else if (ext instanceof WordExtractor) { - // These are in ObjectPool -> _... under the root - try { - DirectoryEntry op = (DirectoryEntry) root.getEntry("ObjectPool"); - Iterator it = op.getEntries(); - while (it.hasNext()) { - Entry entry = it.next(); - if (entry.getName().startsWith("_")) { - dirs.add(entry); - } - } - } catch (FileNotFoundException e) { - logger.log(POILogger.INFO, "Ignoring FileNotFoundException while extracting Word document", e.getLocalizedMessage()); - // ignored here - } - //} else if(ext instanceof PowerPointExtractor) { - // Tricky, not stored directly in poifs - // TODO - } else if (ext instanceof OutlookTextExtactor) { - // Stored in the Attachment blocks - MAPIMessage msg = ((OutlookTextExtactor)ext).getMAPIMessage(); - for (AttachmentChunks attachment : msg.getAttachmentFiles()) { - if (attachment.getAttachData() != null) { - byte[] data = attachment.getAttachData().getValue(); - nonPOIFS.add( new ByteArrayInputStream(data) ); - } else if (attachment.getAttachmentDirectory() != null) { - dirs.add(attachment.getAttachmentDirectory().getDirectory()); - } - } - } - - // Create the extractors - if (dirs.size() == 0 && nonPOIFS.size() == 0){ - return new POITextExtractor[0]; - } - - ArrayList textExtractors = new ArrayList<>(); - for (Entry dir : dirs) { - textExtractors.add(createExtractor((DirectoryNode) dir)); - } - for (InputStream nonPOIF : nonPOIFS) { - try { - textExtractors.add(createExtractor(nonPOIF)); - } catch (IllegalArgumentException e) { - // Ignore, just means it didn't contain - // a format we support as yet - logger.log(POILogger.INFO, "Format not supported yet", e.getLocalizedMessage()); - } catch (XmlException | OpenXML4JException e) { - throw new IOException(e.getMessage(), e); - } - } - return textExtractors.toArray(new POITextExtractor[textExtractors.size()]); - } - - /** - * Returns an array of text extractors, one for each of - * the embedded documents in the file (if there are any). - * If there are no embedded documents, you'll get back an - * empty array. Otherwise, you'll get one open - * {@link POITextExtractor} for each embedded file. - * - * @deprecated Use the method with correct "embedded" - */ - @Deprecated - @Removal(version="4.2") - @NotImplemented - @SuppressWarnings({"UnusedParameters", "UnusedReturnValue"}) - public static POITextExtractor[] getEmbededDocsTextExtractors(POIXMLTextExtractor ext) { - return getEmbeddedDocsTextExtractors(ext); - } - - /** - * Returns an array of text extractors, one for each of - * the embedded documents in the file (if there are any). - * If there are no embedded documents, you'll get back an - * empty array. Otherwise, you'll get one open - * {@link POITextExtractor} for each embedded file. - */ - @NotImplemented - @SuppressWarnings({"UnusedParameters", "UnusedReturnValue"}) - public static POITextExtractor[] getEmbeddedDocsTextExtractors(POIXMLTextExtractor ext) { - throw new IllegalStateException("Not yet supported"); - } - - private static POITextExtractor createEncryptedOOXMLExtractor(NPOIFSFileSystem fs) - throws IOException { - String pass = Biff8EncryptionKey.getCurrentUserPassword(); - if (pass == null) { - pass = Decryptor.DEFAULT_PASSWORD; - } - - EncryptionInfo ei = new EncryptionInfo(fs); - Decryptor dec = ei.getDecryptor(); - InputStream is = null; - try { - if (!dec.verifyPassword(pass)) { - throw new EncryptedDocumentException("Invalid password specified - use Biff8EncryptionKey.setCurrentUserPassword() before calling extractor"); - } - is = dec.getDataStream(fs); - return createExtractor(OPCPackage.open(is)); - } catch (IOException e) { - throw e; - } catch (Exception e) { - throw new EncryptedDocumentException(e); - } finally { - IOUtils.closeQuietly(is); - - // also close the NPOIFSFileSystem here as we read all the data - // while decrypting - fs.close(); - } - } -} diff --git a/src/ooxml/java/org/apache/poi/ooxml/POIXMLDocument.java b/src/ooxml/java/org/apache/poi/ooxml/POIXMLDocument.java new file mode 100644 index 0000000000..8925776b9e --- /dev/null +++ b/src/ooxml/java/org/apache/poi/ooxml/POIXMLDocument.java @@ -0,0 +1,228 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.ooxml; + +import java.io.Closeable; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.poi.openxml4j.exceptions.InvalidFormatException; +import org.apache.poi.openxml4j.exceptions.OpenXML4JException; +import org.apache.poi.openxml4j.opc.OPCPackage; +import org.apache.poi.openxml4j.opc.PackageAccess; +import org.apache.poi.openxml4j.opc.PackagePart; +import org.apache.poi.openxml4j.opc.PackageRelationship; +import org.apache.poi.openxml4j.opc.PackageRelationshipCollection; +import org.apache.xmlbeans.impl.common.SystemCache; + +/** + * This holds the common functionality for all POI OOXML Document classes. + */ +public abstract class POIXMLDocument extends POIXMLDocumentPart implements Closeable { + public static final String DOCUMENT_CREATOR = "Apache POI"; + + // OLE embeddings relation name + public static final String OLE_OBJECT_REL_TYPE="http://schemas.openxmlformats.org/officeDocument/2006/relationships/oleObject"; + + // Embedded OPC documents relation name + public static final String PACK_OBJECT_REL_TYPE="http://schemas.openxmlformats.org/officeDocument/2006/relationships/package"; + + /** The OPC Package */ + private OPCPackage pkg; + + /** + * The properties of the OPC package, opened as needed + */ + private POIXMLProperties properties; + + protected POIXMLDocument(OPCPackage pkg) { + super(pkg); + init(pkg); + } + + protected POIXMLDocument(OPCPackage pkg, String coreDocumentRel) { + super(pkg, coreDocumentRel); + init(pkg); + } + + private void init(OPCPackage p) { + this.pkg = p; + + // Workaround for XMLBEANS-512 - ensure that when we parse + // the file, we start with a fresh XML Parser each time, + // and avoid the risk of getting a SaxHandler that's in error + SystemCache.get().setSaxLoader(null); + } + + /** + * Wrapper to open a package, which works around shortcomings in java's this() constructor calls + * + * @param path the path to the document + * @return the new OPCPackage + * + * @exception IOException if there was a problem opening the document + */ + public static OPCPackage openPackage(String path) throws IOException { + try { + return OPCPackage.open(path); + } catch (InvalidFormatException e) { + throw new IOException(e.toString(), e); + } + } + + /** + * Get the assigned OPCPackage + * + * @return the assigned OPCPackage + */ + public OPCPackage getPackage() { + return this.pkg; + } + + protected PackagePart getCorePart() { + return getPackagePart(); + } + + /** + * Retrieves all the PackageParts which are defined as relationships of the base document with the + * specified content type. + * + * @param contentType the content type + * + * @return all the base document PackageParts which match the content type + * + * @throws InvalidFormatException when the relationships or the parts contain errors + * + * @see org.apache.poi.xssf.usermodel.XSSFRelation + * @see org.apache.poi.xslf.usermodel.XSLFRelation + * @see org.apache.poi.xwpf.usermodel.XWPFRelation + * @see org.apache.poi.xdgf.usermodel.XDGFRelation + */ + protected PackagePart[] getRelatedByType(String contentType) throws InvalidFormatException { + PackageRelationshipCollection partsC = + getPackagePart().getRelationshipsByType(contentType); + + PackagePart[] parts = new PackagePart[partsC.size()]; + int count = 0; + for (PackageRelationship rel : partsC) { + parts[count] = getPackagePart().getRelatedPart(rel); + count++; + } + return parts; + } + + /** + * Get the document properties. This gives you access to the + * core ooxml properties, and the extended ooxml properties. + * + * @return the document properties + */ + public POIXMLProperties getProperties() { + if(properties == null) { + try { + properties = new POIXMLProperties(pkg); + } catch (Exception e){ + throw new POIXMLException(e); + } + } + return properties; + } + + /** + * Get the document's embedded files. + * + * @return the document's embedded files + * + * @throws OpenXML4JException if the embedded parts can't be determined + */ + public abstract List getAllEmbedds() throws OpenXML4JException; + + protected final void load(POIXMLFactory factory) throws IOException { + Map context = new HashMap<>(); + try { + read(factory, context); + } catch (OpenXML4JException e){ + throw new POIXMLException(e); + } + onDocumentRead(); + context.clear(); + } + + /** + * Closes the underlying {@link OPCPackage} from which this + * document was read, if there is one + * + *

Once this has been called, no further + * operations, updates or reads should be performed on the + * document. + * + * @throws IOException for writable packages, if an IO exception occur during the saving process. + */ + @Override + public void close() throws IOException { + if (pkg != null) { + if (pkg.getPackageAccess() == PackageAccess.READ) { + pkg.revert(); + } else { + pkg.close(); + } + pkg = null; + } + } + + /** + * Write out this document to an Outputstream. + * + * Note - if the Document was opened from a {@link File} rather + * than an {@link InputStream}, you must write out to + * a different file, overwriting via an OutputStream isn't possible. + * + * If {@code stream} is a {@link java.io.FileOutputStream} on a networked drive + * or has a high cost/latency associated with each written byte, + * consider wrapping the OutputStream in a {@link java.io.BufferedOutputStream} + * to improve write performance. + * + * @param stream - the java OutputStream you wish to write the file to + * + * @exception IOException if anything can't be written. + */ + @SuppressWarnings("resource") + public final void write(OutputStream stream) throws IOException { + OPCPackage p = getPackage(); + if(p == null) { + throw new IOException("Cannot write data, document seems to have been closed already"); + } + + //force all children to commit their changes into the underlying OOXML Package + // TODO Shouldn't they be committing to the new one instead? + Set context = new HashSet<>(); + onSave(context); + context.clear(); + + //save extended and custom properties + getProperties().commit(); + + p.save(stream); + } +} diff --git a/src/ooxml/java/org/apache/poi/ooxml/POIXMLDocumentPart.java b/src/ooxml/java/org/apache/poi/ooxml/POIXMLDocumentPart.java new file mode 100644 index 0000000000..5a368c576a --- /dev/null +++ b/src/ooxml/java/org/apache/poi/ooxml/POIXMLDocumentPart.java @@ -0,0 +1,746 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.ooxml; + +import java.io.IOException; +import java.net.URI; +import java.util.ArrayList; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.poi.openxml4j.exceptions.InvalidFormatException; +import org.apache.poi.openxml4j.exceptions.OpenXML4JException; +import org.apache.poi.openxml4j.exceptions.PartAlreadyExistsException; +import org.apache.poi.openxml4j.opc.OPCPackage; +import org.apache.poi.openxml4j.opc.PackagePart; +import org.apache.poi.openxml4j.opc.PackagePartName; +import org.apache.poi.openxml4j.opc.PackageRelationship; +import org.apache.poi.openxml4j.opc.PackageRelationshipCollection; +import org.apache.poi.openxml4j.opc.PackageRelationshipTypes; +import org.apache.poi.openxml4j.opc.PackagingURIHelper; +import org.apache.poi.openxml4j.opc.TargetMode; +import org.apache.poi.util.Internal; +import org.apache.poi.util.POILogFactory; +import org.apache.poi.util.POILogger; +import org.apache.poi.xddf.usermodel.chart.XDDFChart; +import org.apache.poi.xssf.usermodel.XSSFRelation; +import org.apache.poi.xssf.usermodel.XSSFWorkbook; + +/** + * Represents an entry of a OOXML package. + *

+ * Each POIXMLDocumentPart keeps a reference to the underlying a {@link org.apache.poi.openxml4j.opc.PackagePart}. + *

+ */ +public class POIXMLDocumentPart { + private static final POILogger logger = POILogFactory.getLogger(POIXMLDocumentPart.class); + + private String coreDocumentRel = PackageRelationshipTypes.CORE_DOCUMENT; + private PackagePart packagePart; + private POIXMLDocumentPart parent; + private Map relations = new LinkedHashMap<>(); + private boolean isCommited = false; + + /** + * to check whether embedded part is already committed + * + * @return return true if embedded part is committed + */ + public boolean isCommited() { + return isCommited; + } + + /** + * setter method to set embedded part is committed + * + * @param isCommited boolean value + */ + public void setCommited(boolean isCommited) { + this.isCommited = isCommited; + } + + /** + * The RelationPart is a cached relationship between the document, which contains the RelationPart, + * and one of its referenced child document parts. + * The child document parts may only belong to one parent, but it's often referenced by other + * parents too, having varying {@link PackageRelationship#getId() relationship ids} pointing to it. + */ + public static class RelationPart { + private final PackageRelationship relationship; + private final POIXMLDocumentPart documentPart; + + RelationPart(PackageRelationship relationship, POIXMLDocumentPart documentPart) { + this.relationship = relationship; + this.documentPart = documentPart; + } + + /** + * @return the cached relationship, which uniquely identifies this child document part within the parent + */ + public PackageRelationship getRelationship() { + return relationship; + } + + /** + * @param the cast of the caller to a document sub class + * @return the child document part + */ + @SuppressWarnings("unchecked") + public T getDocumentPart() { + return (T) documentPart; + } + } + + /** + * Counter that provides the amount of incoming relations from other parts + * to this part. + */ + private int relationCounter; + + int incrementRelationCounter() { + relationCounter++; + return relationCounter; + } + + int decrementRelationCounter() { + relationCounter--; + return relationCounter; + } + + int getRelationCounter() { + return relationCounter; + } + + /** + * Construct POIXMLDocumentPart representing a "core document" package part. + * + * @param pkg the OPCPackage containing this document + */ + public POIXMLDocumentPart(OPCPackage pkg) { + this(pkg, PackageRelationshipTypes.CORE_DOCUMENT); + } + + /** + * Construct POIXMLDocumentPart representing a custom "core document" package part. + * + * @param pkg the OPCPackage containing this document + * @param coreDocumentRel the relation type of this document + */ + public POIXMLDocumentPart(OPCPackage pkg, String coreDocumentRel) { + this(getPartFromOPCPackage(pkg, coreDocumentRel)); + this.coreDocumentRel = coreDocumentRel; + } + + /** + * Creates new POIXMLDocumentPart - called by client code to create new parts from scratch. + * + * @see #createRelationship(POIXMLRelation, POIXMLFactory, int, boolean) + */ + public POIXMLDocumentPart() { + } + + /** + * Creates an POIXMLDocumentPart representing the given package part and relationship. + * Called by {@link #read(POIXMLFactory, java.util.Map)} when reading in an existing file. + * + * @param part - The package part that holds xml data representing this sheet. + * @see #read(POIXMLFactory, java.util.Map) + * @since POI 3.14-Beta1 + */ + public POIXMLDocumentPart(PackagePart part) { + this(null, part); + } + + /** + * Creates an POIXMLDocumentPart representing the given package part, relationship and parent + * Called by {@link #read(POIXMLFactory, java.util.Map)} when reading in an existing file. + * + * @param parent - Parent part + * @param part - The package part that holds xml data representing this sheet. + * @see #read(POIXMLFactory, java.util.Map) + * @since POI 3.14-Beta1 + */ + public POIXMLDocumentPart(POIXMLDocumentPart parent, PackagePart part) { + this.packagePart = part; + this.parent = parent; + } + + /** + * When you open something like a theme, call this to + * re-base the XML Document onto the core child of the + * current core document + * + * @param pkg the package to be rebased + * @throws InvalidFormatException if there was an error in the core document relation + * @throws IllegalStateException if there are more than one core document relations + */ + protected final void rebase(OPCPackage pkg) throws InvalidFormatException { + PackageRelationshipCollection cores = + packagePart.getRelationshipsByType(coreDocumentRel); + if (cores.size() != 1) { + throw new IllegalStateException( + "Tried to rebase using " + coreDocumentRel + + " but found " + cores.size() + " parts of the right type" + ); + } + packagePart = packagePart.getRelatedPart(cores.getRelationship(0)); + } + + /** + * Provides access to the underlying PackagePart + * + * @return the underlying PackagePart + */ + public final PackagePart getPackagePart() { + return packagePart; + } + + /** + * Returns the list of child relations for this POIXMLDocumentPart + * + * @return child relations + */ + public final List getRelations() { + List l = new ArrayList<>(); + for (RelationPart rp : relations.values()) { + l.add(rp.getDocumentPart()); + } + return Collections.unmodifiableList(l); + } + + /** + * Returns the list of child relations for this POIXMLDocumentPart + * + * @return child relations + */ + public final List getRelationParts() { + List l = new ArrayList<>(relations.values()); + return Collections.unmodifiableList(l); + } + + /** + * Returns the target {@link POIXMLDocumentPart}, where a + * {@link PackageRelationship} is set from the {@link PackagePart} of this + * {@link POIXMLDocumentPart} to the {@link PackagePart} of the target + * {@link POIXMLDocumentPart} with a {@link PackageRelationship#getId()} + * matching the given parameter value. + * + * @param id The relation id to look for + * @return the target part of the relation, or null, if none exists + */ + public final POIXMLDocumentPart getRelationById(String id) { + RelationPart rp = getRelationPartById(id); + return (rp == null) ? null : rp.getDocumentPart(); + } + + /** + * Returns the target {@link RelationPart}, where a + * {@link PackageRelationship} is set from the {@link PackagePart} of this + * {@link POIXMLDocumentPart} to the {@link PackagePart} of the target + * {@link POIXMLDocumentPart} with a {@link PackageRelationship#getId()} + * matching the given parameter value. + * + * @param id The relation id to look for + * @return the target relation part, or null, if none exists + * @since 4.0.0 + */ + public final RelationPart getRelationPartById(String id) { + return relations.get(id); + } + + /** + * Returns the first {@link PackageRelationship#getId()} of the + * {@link PackageRelationship}, that sources from the {@link PackagePart} of + * this {@link POIXMLDocumentPart} to the {@link PackagePart} of the given + * parameter value.

+ *

+ * There can be multiple references to the given {@link POIXMLDocumentPart} + * and only the first in the order of creation is returned. + * + * @param part The {@link POIXMLDocumentPart} for which the according + * relation-id shall be found. + * @return The value of the {@link PackageRelationship#getId()} or null, if + * parts are not related. + */ + public final String getRelationId(POIXMLDocumentPart part) { + for (RelationPart rp : relations.values()) { + if (rp.getDocumentPart() == part) { + return rp.getRelationship().getId(); + } + } + return null; + } + + /** + * Add a new child POIXMLDocumentPart + * + * @param relId the preferred relation id, when null the next free relation id will be used + * @param relationshipType the package relationship type + * @param part the child to add + * @return the new RelationPart + * @since 3.14-Beta1 + */ + public final RelationPart addRelation(String relId, POIXMLRelation relationshipType, POIXMLDocumentPart part) { + PackageRelationship pr = this.packagePart.findExistingRelation(part.getPackagePart()); + if (pr == null) { + PackagePartName ppn = part.getPackagePart().getPartName(); + String relType = relationshipType.getRelation(); + pr = packagePart.addRelationship(ppn, TargetMode.INTERNAL, relType, relId); + } + addRelation(pr, part); + return new RelationPart(pr, part); + } + + /** + * Add a new child POIXMLDocumentPart + * + * @param pr the relationship of the child + * @param part the child to add + */ + private void addRelation(PackageRelationship pr, POIXMLDocumentPart part) { + relations.put(pr.getId(), new RelationPart(pr, part)); + part.incrementRelationCounter(); + + } + + /** + * Remove the relation to the specified part in this package and remove the + * part, if it is no longer needed.

+ *

+ * If there are multiple relationships to the same part, this will only + * remove the first relationship in the order of creation. The removal + * via the part id ({@link #removeRelation(String)} is preferred. + * + * @param part the part which relation is to be removed from this document + */ + protected final void removeRelation(POIXMLDocumentPart part) { + removeRelation(part, true); + } + + /** + * Remove the relation to the specified part in this package and remove the + * part, if it is no longer needed and flag is set to true.

+ *

+ * If there are multiple relationships to the same part, this will only + * remove the first relationship in the order of creation. The removal + * via the part id ({@link #removeRelation(String, boolean)} is preferred. + * + * @param part The related part, to which the relation shall be removed. + * @param removeUnusedParts true, if the part shall be removed from the package if not + * needed any longer. + * @return true, if the relation was removed + */ + protected final boolean removeRelation(POIXMLDocumentPart part, boolean removeUnusedParts) { + String id = getRelationId(part); + return removeRelation(id, removeUnusedParts); + } + + /** + * Remove the relation to the specified part in this package and remove the + * part, if it is no longer needed.

+ *

+ * If there are multiple relationships to the same part, this will only + * remove the first relationship in the order of creation. The removal + * via the part id ({@link #removeRelation(String)} is preferred. + * + * @param partId the part id which relation is to be removed from this document + * @since 4.0.0 + */ + protected final void removeRelation(String partId) { + removeRelation(partId, true); + } + + /** + * Remove the relation to the specified part in this package and remove the + * part, if it is no longer needed and flag is set to true.

+ * + * @param partId The related part id, to which the relation shall be removed. + * @param removeUnusedParts true, if the part shall be removed from the package if not + * needed any longer. + * @return true, if the relation was removed + * @since 4.0.0 + */ + private final boolean removeRelation(String partId, boolean removeUnusedParts) { + RelationPart rp = relations.get(partId); + if (rp == null) { + // part is not related with this POIXMLDocumentPart + return false; + } + POIXMLDocumentPart part = rp.getDocumentPart(); + /* decrement usage counter */ + part.decrementRelationCounter(); + /* remove packagepart relationship */ + getPackagePart().removeRelationship(partId); + /* remove POIXMLDocument from relations */ + relations.remove(partId); + + if (removeUnusedParts) { + /* if last relation to target part was removed, delete according target part */ + if (part.getRelationCounter() == 0) { + try { + part.onDocumentRemove(); + } catch (IOException e) { + throw new POIXMLException(e); + } + getPackagePart().getPackage().removePart(part.getPackagePart()); + } + } + return true; + } + + + /** + * Returns the parent POIXMLDocumentPart. All parts except root have not-null parent. + * + * @return the parent POIXMLDocumentPart or null for the root element. + */ + public final POIXMLDocumentPart getParent() { + return parent; + } + + @Override + public String toString() { + return packagePart == null ? "" : packagePart.toString(); + } + + /** + * Save the content in the underlying package part. + * Default implementation is empty meaning that the package part is left unmodified. + *

+ * Sub-classes should override and add logic to marshal the "model" into Ooxml4J. + *

+ * For example, the code saving a generic XML entry may look as follows: + *

+     * protected void commit() throws IOException {
+     *   PackagePart part = getPackagePart();
+     *   OutputStream out = part.getOutputStream();
+     *   XmlObject bean = getXmlBean(); //the "model" which holds changes in memory
+     *   bean.save(out, DEFAULT_XML_OPTIONS);
+     *   out.close();
+     * }
+     * 
+ * + * @throws IOException a subclass may throw an IOException if the changes can't be committed + */ + protected void commit() throws IOException { + + } + + /** + * Save changes in the underlying OOXML package. + * Recursively fires {@link #commit()} for each package part + * + * @param alreadySaved context set containing already visited nodes + * @throws IOException a related part may throw an IOException if the changes can't be saved + */ + protected final void onSave(Set alreadySaved) throws IOException { + //if part is already committed then return + if (this.isCommited) { + return; + } + + // this usually clears out previous content in the part... + prepareForCommit(); + + commit(); + alreadySaved.add(this.getPackagePart()); + for (RelationPart rp : relations.values()) { + POIXMLDocumentPart p = rp.getDocumentPart(); + if (!alreadySaved.contains(p.getPackagePart())) { + p.onSave(alreadySaved); + } + } + } + + /** + * Ensure that a memory based package part does not have lingering data from previous + * commit() calls. + *

+ * Note: This is overwritten for some objects, as *PictureData seem to store the actual content + * in the part directly without keeping a copy like all others therefore we need to handle them differently. + */ + protected void prepareForCommit() { + PackagePart part = this.getPackagePart(); + if (part != null) { + part.clear(); + } + } + + /** + * Create a new child POIXMLDocumentPart + * + * @param descriptor the part descriptor + * @param factory the factory that will create an instance of the requested relation + * @return the created child POIXMLDocumentPart + * @throws PartAlreadyExistsException If rule M1.12 is not verified : Packages shall not contain + * equivalent part names and package implementers shall neither + * create nor recognize packages with equivalent part names. + */ + public final POIXMLDocumentPart createRelationship(POIXMLRelation descriptor, POIXMLFactory factory) { + return createRelationship(descriptor, factory, -1, false).getDocumentPart(); + } + + /** + * Create a new child POIXMLDocumentPart + * + * @param descriptor the part descriptor + * @param factory the factory that will create an instance of the requested relation + * @param idx part number + * @return the created child POIXMLDocumentPart + * @throws PartAlreadyExistsException If rule M1.12 is not verified : Packages shall not contain + * equivalent part names and package implementers shall neither + * create nor recognize packages with equivalent part names. + */ + public final POIXMLDocumentPart createRelationship(POIXMLRelation descriptor, POIXMLFactory factory, int idx) { + return createRelationship(descriptor, factory, idx, false).getDocumentPart(); + } + + /** + * Identifies the next available part number for a part of the given type, + * if possible, otherwise -1 if none are available. + * The found (valid) index can then be safely given to + * {@link #createRelationship(POIXMLRelation, POIXMLFactory, int)} or + * {@link #createRelationship(POIXMLRelation, POIXMLFactory, int, boolean)} + * without naming clashes. + * If parts with other types are already claiming a name for this relationship + * type (eg a {@link XSSFRelation#CHART} using the drawing part namespace + * normally used by {@link XSSFRelation#DRAWINGS}), those will be considered + * when finding the next spare number. + * + * @param descriptor The relationship type to find the part number for + * @param minIdx The minimum free index to assign, use -1 for any + * @return The next free part number, or -1 if none available + */ + protected final int getNextPartNumber(POIXMLRelation descriptor, int minIdx) { + OPCPackage pkg = packagePart.getPackage(); + + try { + String name = descriptor.getDefaultFileName(); + if (name.equals(descriptor.getFileName(9999))) { + // Non-index based, check if default is free + PackagePartName ppName = PackagingURIHelper.createPartName(name); + if (pkg.containPart(ppName)) { + // Default name already taken, not index based, nothing free + return -1; + } else { + // Default name free + return 0; + } + } + + // Default to searching from 1, unless they asked for 0+ + int idx = (minIdx < 0) ? 1 : minIdx; + int maxIdx = minIdx + pkg.getParts().size(); + while (idx <= maxIdx) { + name = descriptor.getFileName(idx); + PackagePartName ppName = PackagingURIHelper.createPartName(name); + if (!pkg.containPart(ppName)) { + return idx; + } + idx++; + } + } catch (InvalidFormatException e) { + // Give a general wrapped exception for the problem + throw new POIXMLException(e); + } + return -1; + } + + /** + * Create a new child POIXMLDocumentPart + * + * @param descriptor the part descriptor + * @param factory the factory that will create an instance of the requested relation + * @param idx part number + * @param noRelation if true, then no relationship is added. + * @return the created child POIXMLDocumentPart + * @throws PartAlreadyExistsException If rule M1.12 is not verified : Packages shall not contain + * equivalent part names and package implementers shall neither + * create nor recognize packages with equivalent part names. + */ + public final RelationPart createRelationship(POIXMLRelation descriptor, POIXMLFactory factory, int idx, boolean noRelation) { + try { + PackagePartName ppName = PackagingURIHelper.createPartName(descriptor.getFileName(idx)); + PackageRelationship rel = null; + PackagePart part = packagePart.getPackage().createPart(ppName, descriptor.getContentType()); + if (!noRelation) { + /* only add to relations, if according relationship is being created. */ + rel = packagePart.addRelationship(ppName, TargetMode.INTERNAL, descriptor.getRelation()); + } + POIXMLDocumentPart doc = factory.newDocumentPart(descriptor); + doc.packagePart = part; + doc.parent = this; + if (!noRelation) { + /* only add to relations, if according relationship is being created. */ + addRelation(rel, doc); + } + + return new RelationPart(rel, doc); + } catch (PartAlreadyExistsException pae) { + // Return the specific exception so the user knows + // that the name is already taken + throw pae; + } catch (Exception e) { + // Give a general wrapped exception for the problem + throw new POIXMLException(e); + } + } + + /** + * Iterate through the underlying PackagePart and create child POIXMLFactory instances + * using the specified factory + * + * @param factory the factory object that creates POIXMLFactory instances + * @param context context map containing already visited noted keyed by targetURI + * @throws OpenXML4JException thrown when a related part can't be read + */ + protected void read(POIXMLFactory factory, Map context) throws OpenXML4JException { + PackagePart pp = getPackagePart(); + // add mapping a second time, in case of initial caller hasn't done so + POIXMLDocumentPart otherChild = context.put(pp, this); + if (otherChild != null && otherChild != this) { + throw new POIXMLException("Unique PackagePart-POIXMLDocumentPart relation broken!"); + } + + if (!pp.hasRelationships()) return; + + PackageRelationshipCollection rels = packagePart.getRelationships(); + List readLater = new ArrayList<>(); + + // scan breadth-first, so parent-relations are hopefully the shallowest element + for (PackageRelationship rel : rels) { + if (rel.getTargetMode() == TargetMode.INTERNAL) { + URI uri = rel.getTargetURI(); + + // check for internal references (e.g. '#Sheet1!A1') + PackagePartName relName; + if (uri.getRawFragment() != null) { + relName = PackagingURIHelper.createPartName(uri.getPath()); + } else { + relName = PackagingURIHelper.createPartName(uri); + } + + final PackagePart p = packagePart.getPackage().getPart(relName); + if (p == null) { + logger.log(POILogger.ERROR, "Skipped invalid entry " + rel.getTargetURI()); + continue; + } + + POIXMLDocumentPart childPart = context.get(p); + if (childPart == null) { + childPart = factory.createDocumentPart(this, p); + //here we are checking if part if embedded and excel then set it to chart class + //so that at the time to writing we can also write updated embedded part + if (this instanceof XDDFChart && childPart instanceof XSSFWorkbook) { + ((XDDFChart) this).setWorkbook((XSSFWorkbook) childPart); + } + childPart.parent = this; + // already add child to context, so other children can reference it + context.put(p, childPart); + readLater.add(childPart); + } + + addRelation(rel, childPart); + } + } + + for (POIXMLDocumentPart childPart : readLater) { + childPart.read(factory, context); + } + } + + /** + * Get the PackagePart that is the target of a relationship from this Part. + * + * @param rel The relationship + * @return The target part + * @throws InvalidFormatException thrown if the related part has is erroneous + */ + protected PackagePart getTargetPart(PackageRelationship rel) throws InvalidFormatException { + return getPackagePart().getRelatedPart(rel); + } + + + /** + * Fired when a new package part is created + * + * @throws IOException a subclass may throw an IOException on document creation + */ + protected void onDocumentCreate() throws IOException { + + } + + /** + * Fired when a package part is read + * + * @throws IOException a subclass may throw an IOException when a document is read + */ + protected void onDocumentRead() throws IOException { + + } + + /** + * Fired when a package part is about to be removed from the package + * + * @throws IOException a subclass may throw an IOException when a document is removed + */ + protected void onDocumentRemove() throws IOException { + + } + + /** + * Internal method, do not use! + *

+ * This method only exists to allow access to protected {@link POIXMLDocumentPart#onDocumentRead()} + * from {@link org.apache.poi.xwpf.usermodel.XWPFDocument} without reflection. It should be removed. + * + * @param part the part which is to be read + * @throws IOException if the part can't be read + */ + @Internal + @Deprecated + public static void _invokeOnDocumentRead(POIXMLDocumentPart part) throws IOException { + part.onDocumentRead(); + } + + /** + * Retrieves the core document part + * + * @since POI 3.14-Beta1 + */ + private static PackagePart getPartFromOPCPackage(OPCPackage pkg, String coreDocumentRel) { + PackageRelationship coreRel = pkg.getRelationshipsByType(coreDocumentRel).getRelationship(0); + + if (coreRel != null) { + PackagePart pp = pkg.getPart(coreRel); + if (pp == null) { + throw new POIXMLException("OOXML file structure broken/invalid - core document '" + coreRel.getTargetURI() + "' not found."); + } + return pp; + } + + coreRel = pkg.getRelationshipsByType(PackageRelationshipTypes.STRICT_CORE_DOCUMENT).getRelationship(0); + if (coreRel != null) { + throw new POIXMLException("Strict OOXML isn't currently supported, please see bug #57699"); + } + + throw new POIXMLException("OOXML file structure broken/invalid - no core document found!"); + } +} diff --git a/src/ooxml/java/org/apache/poi/ooxml/POIXMLException.java b/src/ooxml/java/org/apache/poi/ooxml/POIXMLException.java new file mode 100644 index 0000000000..d002fa0e84 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/ooxml/POIXMLException.java @@ -0,0 +1,70 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.ooxml; + +/** + * Indicates a generic OOXML error. + * + * @author Yegor Kozlov + */ +@SuppressWarnings("serial") +public final class POIXMLException extends RuntimeException{ + /** + * Create a new POIXMLException with no + * detail mesage. + */ + public POIXMLException() { + super(); + } + + /** + * Create a new POIXMLException with + * the String specified as an error message. + * + * @param msg The error message for the exception. + */ + public POIXMLException(String msg) { + super(msg); + } + + /** + * Create a new POIXMLException with + * the String specified as an error message and the cause. + * + * @param msg The error message for the exception. + * @param cause the cause (which is saved for later retrieval by the + * {@link #getCause()} method). (A null value is + * permitted, and indicates that the cause is nonexistent or + * unknown.) + */ + public POIXMLException(String msg, Throwable cause) { + super(msg, cause); + } + + /** + * Create a new POIXMLException with + * the specified cause. + * + * @param cause the cause (which is saved for later retrieval by the + * {@link #getCause()} method). (A null value is + * permitted, and indicates that the cause is nonexistent or + * unknown.) + */ + public POIXMLException(Throwable cause) { + super(cause); + } +} diff --git a/src/ooxml/java/org/apache/poi/ooxml/POIXMLFactory.java b/src/ooxml/java/org/apache/poi/ooxml/POIXMLFactory.java new file mode 100644 index 0000000000..ca6cdb30c0 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/ooxml/POIXMLFactory.java @@ -0,0 +1,139 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.ooxml; + +import java.lang.reflect.InvocationTargetException; + +import org.apache.poi.openxml4j.exceptions.InvalidFormatException; +import org.apache.poi.openxml4j.opc.PackagePart; +import org.apache.poi.openxml4j.opc.PackageRelationship; +import org.apache.poi.util.POILogFactory; +import org.apache.poi.util.POILogger; + +/** + * Defines a factory API that enables sub-classes to create instances of POIXMLDocumentPart + */ +public abstract class POIXMLFactory { + private static final POILogger LOGGER = POILogFactory.getLogger(POIXMLFactory.class); + + private static final Class[] PARENT_PART = {POIXMLDocumentPart.class, PackagePart.class}; + private static final Class[] ORPHAN_PART = {PackagePart.class}; + + /** + * Create a POIXMLDocumentPart from existing package part and relation. This method is called + * from {@link POIXMLDocument#load(POIXMLFactory)} when parsing a document + * + * @param parent parent part + * @param part the PackagePart representing the created instance + * @return A new instance of a POIXMLDocumentPart. + * + * @since by POI 3.14-Beta1 + */ + public POIXMLDocumentPart createDocumentPart(POIXMLDocumentPart parent, PackagePart part) { + PackageRelationship rel = getPackageRelationship(parent, part); + POIXMLRelation descriptor = getDescriptor(rel.getRelationshipType()); + + if (descriptor == null || descriptor.getRelationClass() == null) { + LOGGER.log(POILogger.DEBUG, "using default POIXMLDocumentPart for " + rel.getRelationshipType()); + return new POIXMLDocumentPart(parent, part); + } + + Class cls = descriptor.getRelationClass(); + try { + try { + return createDocumentPart(cls, PARENT_PART, new Object[]{parent, part}); + } catch (NoSuchMethodException e) { + return createDocumentPart(cls, ORPHAN_PART, new Object[]{part}); + } + } catch (Exception e) { + throw new POIXMLException((e.getCause() != null ? e.getCause() : e).getMessage(), e); + } + } + + /** + * Need to delegate instantiation to sub class because of constructor visibility + * + * @param cls the document class to be instantiated + * @param classes the classes of the constructor arguments + * @param values the values of the constructor arguments + * @return the new document / part + * @throws SecurityException thrown if the object can't be instantiated + * @throws NoSuchMethodException thrown if there is no constructor found for the given arguments + * @throws InstantiationException thrown if the object can't be instantiated + * @throws IllegalAccessException thrown if the object can't be instantiated + * @throws InvocationTargetException thrown if the object can't be instantiated + * + * @since POI 3.14-Beta1 + */ + protected abstract POIXMLDocumentPart createDocumentPart + (Class cls, Class[] classes, Object[] values) + throws SecurityException, NoSuchMethodException, InstantiationException, IllegalAccessException, InvocationTargetException; + + /** + * returns the descriptor for the given relationship type + * + * @param relationshipType the relationship type of the descriptor + * @return the descriptor or null if type is unknown + * + * @since POI 3.14-Beta1 + */ + protected abstract POIXMLRelation getDescriptor(String relationshipType); + + /** + * Create a new POIXMLDocumentPart using the supplied descriptor. This method is used when adding new parts + * to a document, for example, when adding a sheet to a workbook, slide to a presentation, etc. + * + * @param descriptor describes the object to create + * @return A new instance of a POIXMLDocumentPart. + */ + public POIXMLDocumentPart newDocumentPart(POIXMLRelation descriptor) { + Class cls = descriptor.getRelationClass(); + try { + return createDocumentPart(cls, null, null); + } catch (Exception e) { + throw new POIXMLException(e); + } + } + + /** + * Retrieves the package relationship of the child part within the parent + * + * @param parent the parent to search for the part + * @param part the part to look for + * + * @return the relationship + * + * @throws POIXMLException if the relations are erroneous or the part is not related + * + * @since POI 3.14-Beta1 + */ + protected PackageRelationship getPackageRelationship(POIXMLDocumentPart parent, PackagePart part) { + try { + String partName = part.getPartName().getName(); + for (PackageRelationship pr : parent.getPackagePart().getRelationships()) { + String packName = pr.getTargetURI().toASCIIString(); + if (packName.equalsIgnoreCase(partName)) { + return pr; + } + } + } catch (InvalidFormatException e) { + throw new POIXMLException("error while determining package relations", e); + } + + throw new POIXMLException("package part isn't a child of the parent document."); + } +} diff --git a/src/ooxml/java/org/apache/poi/ooxml/POIXMLProperties.java b/src/ooxml/java/org/apache/poi/ooxml/POIXMLProperties.java new file mode 100644 index 0000000000..04ca65fb21 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/ooxml/POIXMLProperties.java @@ -0,0 +1,611 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.ooxml; + +import static org.apache.poi.ooxml.POIXMLTypeLoader.DEFAULT_XML_OPTIONS; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.Date; + +import org.apache.poi.openxml4j.exceptions.InvalidFormatException; +import org.apache.poi.openxml4j.exceptions.OpenXML4JException; +import org.apache.poi.openxml4j.opc.ContentTypes; +import org.apache.poi.openxml4j.opc.OPCPackage; +import org.apache.poi.openxml4j.opc.PackagePart; +import org.apache.poi.openxml4j.opc.PackagePartName; +import org.apache.poi.openxml4j.opc.PackageRelationshipCollection; +import org.apache.poi.openxml4j.opc.PackageRelationshipTypes; +import org.apache.poi.openxml4j.opc.PackagingURIHelper; +import org.apache.poi.openxml4j.opc.StreamHelper; +import org.apache.poi.openxml4j.opc.TargetMode; +import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart; +import org.apache.poi.openxml4j.util.Nullable; +import org.apache.xmlbeans.XmlException; +import org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperty; + +/** + * Wrapper around the three different kinds of OOXML properties + * and metadata a document can have (Core, Extended and Custom), + * as well Thumbnails. + */ +public class POIXMLProperties { + private OPCPackage pkg; + private CoreProperties core; + private ExtendedProperties ext; + private CustomProperties cust; + + private PackagePart extPart; + private PackagePart custPart; + + + private static final org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument NEW_EXT_INSTANCE; + private static final org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument NEW_CUST_INSTANCE; + static { + NEW_EXT_INSTANCE = org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument.Factory.newInstance(); + NEW_EXT_INSTANCE.addNewProperties(); + + NEW_CUST_INSTANCE = org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument.Factory.newInstance(); + NEW_CUST_INSTANCE.addNewProperties(); + } + + public POIXMLProperties(OPCPackage docPackage) throws IOException, OpenXML4JException, XmlException { + this.pkg = docPackage; + + // Core properties + core = new CoreProperties((PackagePropertiesPart)pkg.getPackageProperties() ); + + // Extended properties + PackageRelationshipCollection extRel = + pkg.getRelationshipsByType(PackageRelationshipTypes.EXTENDED_PROPERTIES); + if(extRel.size() == 1) { + extPart = pkg.getPart( extRel.getRelationship(0)); + org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument props = org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument.Factory.parse( + extPart.getInputStream(), DEFAULT_XML_OPTIONS + ); + ext = new ExtendedProperties(props); + } else { + extPart = null; + ext = new ExtendedProperties((org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument)NEW_EXT_INSTANCE.copy()); + } + + // Custom properties + PackageRelationshipCollection custRel = + pkg.getRelationshipsByType(PackageRelationshipTypes.CUSTOM_PROPERTIES); + if(custRel.size() == 1) { + custPart = pkg.getPart( custRel.getRelationship(0)); + org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument props = org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument.Factory.parse( + custPart.getInputStream(), DEFAULT_XML_OPTIONS + ); + cust = new CustomProperties(props); + } else { + custPart = null; + cust = new CustomProperties((org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument)NEW_CUST_INSTANCE.copy()); + } + } + + /** + * Returns the core document properties + * + * @return the core document properties + */ + public CoreProperties getCoreProperties() { + return core; + } + + /** + * Returns the extended document properties + * + * @return the extended document properties + */ + public ExtendedProperties getExtendedProperties() { + return ext; + } + + /** + * Returns the custom document properties + * + * @return the custom document properties + */ + public CustomProperties getCustomProperties() { + return cust; + } + + /** + * Returns the {@link PackagePart} for the Document + * Thumbnail, or null if there isn't one + * + * @return The Document Thumbnail part or null + */ + protected PackagePart getThumbnailPart() { + PackageRelationshipCollection rels = + pkg.getRelationshipsByType(PackageRelationshipTypes.THUMBNAIL); + if(rels.size() == 1) { + return pkg.getPart(rels.getRelationship(0)); + } + return null; + } + /** + * Returns the name of the Document thumbnail, eg + * thumbnail.jpeg, or null if there + * isn't one. + * + * @return The thumbnail filename, or null + */ + public String getThumbnailFilename() { + PackagePart tPart = getThumbnailPart(); + if (tPart == null) return null; + String name = tPart.getPartName().getName(); + return name.substring(name.lastIndexOf('/')); + } + /** + * Returns the Document thumbnail image data, or {@code null} if there isn't one. + * + * @return The thumbnail data, or null + * + * @throws IOException if the thumbnail can't be read + */ + public InputStream getThumbnailImage() throws IOException { + PackagePart tPart = getThumbnailPart(); + if (tPart == null) return null; + return tPart.getInputStream(); + } + + /** + * Sets the Thumbnail for the document, replacing any existing one. + * + * @param filename The filename for the thumbnail image, eg {@code thumbnail.jpg} + * @param imageData The inputstream to read the thumbnail image from + * + * @throws IOException if the thumbnail can't be written + */ + public void setThumbnail(String filename, InputStream imageData) throws IOException { + PackagePart tPart = getThumbnailPart(); + if (tPart == null) { + // New thumbnail + pkg.addThumbnail(filename, imageData); + } else { + // Change existing + String newType = ContentTypes.getContentTypeFromFileExtension(filename); + if (! newType.equals(tPart.getContentType())) { + throw new IllegalArgumentException("Can't set a Thumbnail of type " + + newType + " when existing one is of a different type " + + tPart.getContentType()); + } + StreamHelper.copyStream(imageData, tPart.getOutputStream()); + } + } + + /** + * Commit changes to the underlying OPC package + * + * @throws IOException if the properties can't be saved + * @throws POIXMLException if the properties are erroneous + */ + public void commit() throws IOException{ + + if(extPart == null && !NEW_EXT_INSTANCE.toString().equals(ext.props.toString())){ + try { + PackagePartName prtname = PackagingURIHelper.createPartName("/docProps/app.xml"); + pkg.addRelationship(prtname, TargetMode.INTERNAL, "http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties"); + extPart = pkg.createPart(prtname, "application/vnd.openxmlformats-officedocument.extended-properties+xml"); + } catch (InvalidFormatException e){ + throw new POIXMLException(e); + } + } + if(custPart == null && !NEW_CUST_INSTANCE.toString().equals(cust.props.toString())){ + try { + PackagePartName prtname = PackagingURIHelper.createPartName("/docProps/custom.xml"); + pkg.addRelationship(prtname, TargetMode.INTERNAL, "http://schemas.openxmlformats.org/officeDocument/2006/relationships/custom-properties"); + custPart = pkg.createPart(prtname, "application/vnd.openxmlformats-officedocument.custom-properties+xml"); + } catch (InvalidFormatException e){ + throw new POIXMLException(e); + } + } + if(extPart != null){ + OutputStream out = extPart.getOutputStream(); + if (extPart.getSize() > 0) { + extPart.clear(); + } + ext.props.save(out, DEFAULT_XML_OPTIONS); + out.close(); + } + if(custPart != null){ + OutputStream out = custPart.getOutputStream(); + cust.props.save(out, DEFAULT_XML_OPTIONS); + out.close(); + } + } + + /** + * The core document properties + */ + public static class CoreProperties { + private PackagePropertiesPart part; + private CoreProperties(PackagePropertiesPart part) { + this.part = part; + } + + public String getCategory() { + return part.getCategoryProperty().getValue(); + } + public void setCategory(String category) { + part.setCategoryProperty(category); + } + public String getContentStatus() { + return part.getContentStatusProperty().getValue(); + } + public void setContentStatus(String contentStatus) { + part.setContentStatusProperty(contentStatus); + } + public String getContentType() { + return part.getContentTypeProperty().getValue(); + } + public void setContentType(String contentType) { + part.setContentTypeProperty(contentType); + } + public Date getCreated() { + return part.getCreatedProperty().getValue(); + } + public void setCreated(Nullable date) { + part.setCreatedProperty(date); + } + public void setCreated(String date) { + part.setCreatedProperty(date); + } + public String getCreator() { + return part.getCreatorProperty().getValue(); + } + public void setCreator(String creator) { + part.setCreatorProperty(creator); + } + public String getDescription() { + return part.getDescriptionProperty().getValue(); + } + public void setDescription(String description) { + part.setDescriptionProperty(description); + } + public String getIdentifier() { + return part.getIdentifierProperty().getValue(); + } + public void setIdentifier(String identifier) { + part.setIdentifierProperty(identifier); + } + public String getKeywords() { + return part.getKeywordsProperty().getValue(); + } + public void setKeywords(String keywords) { + part.setKeywordsProperty(keywords); + } + public Date getLastPrinted() { + return part.getLastPrintedProperty().getValue(); + } + public void setLastPrinted(Nullable date) { + part.setLastPrintedProperty(date); + } + public void setLastPrinted(String date) { + part.setLastPrintedProperty(date); + } + /** @since POI 3.15 beta 3 */ + public String getLastModifiedByUser() { + return part.getLastModifiedByProperty().getValue(); + } + /** @since POI 3.15 beta 3 */ + public void setLastModifiedByUser(String user) { + part.setLastModifiedByProperty(user); + } + public Date getModified() { + return part.getModifiedProperty().getValue(); + } + public void setModified(Nullable date) { + part.setModifiedProperty(date); + } + public void setModified(String date) { + part.setModifiedProperty(date); + } + public String getSubject() { + return part.getSubjectProperty().getValue(); + } + public void setSubjectProperty(String subject) { + part.setSubjectProperty(subject); + } + public void setTitle(String title) { + part.setTitleProperty(title); + } + public String getTitle() { + return part.getTitleProperty().getValue(); + } + public String getRevision() { + return part.getRevisionProperty().getValue(); + } + public void setRevision(String revision) { + try { + Long.valueOf(revision); + part.setRevisionProperty(revision); + } + catch (NumberFormatException e) {} + } + + public PackagePropertiesPart getUnderlyingProperties() { + return part; + } + } + + /** + * Extended document properties + */ + public static class ExtendedProperties { + private org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument props; + private ExtendedProperties(org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument props) { + this.props = props; + } + + public org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties getUnderlyingProperties() { + return props.getProperties(); + } + + public String getTemplate() { + if (props.getProperties().isSetTemplate()) { + return props.getProperties().getTemplate(); + } + return null; + } + public String getManager() { + if (props.getProperties().isSetManager()) { + return props.getProperties().getManager(); + } + return null; + } + public String getCompany() { + if (props.getProperties().isSetCompany()) { + return props.getProperties().getCompany(); + } + return null; + } + public String getPresentationFormat() { + if (props.getProperties().isSetPresentationFormat()) { + return props.getProperties().getPresentationFormat(); + } + return null; + } + public String getApplication() { + if (props.getProperties().isSetApplication()) { + return props.getProperties().getApplication(); + } + return null; + } + public String getAppVersion() { + if (props.getProperties().isSetAppVersion()) { + return props.getProperties().getAppVersion(); + } + return null; + } + + public int getPages() { + if (props.getProperties().isSetPages()) { + return props.getProperties().getPages(); + } + return -1; + } + public int getWords() { + if (props.getProperties().isSetWords()) { + return props.getProperties().getWords(); + } + return -1; + } + public int getCharacters() { + if (props.getProperties().isSetCharacters()) { + return props.getProperties().getCharacters(); + } + return -1; + } + public int getCharactersWithSpaces() { + if (props.getProperties().isSetCharactersWithSpaces()) { + return props.getProperties().getCharactersWithSpaces(); + } + return -1; + } + public int getLines() { + if (props.getProperties().isSetLines()) { + return props.getProperties().getLines(); + } + return -1; + } + public int getParagraphs() { + if (props.getProperties().isSetParagraphs()) { + return props.getProperties().getParagraphs(); + } + return -1; + } + public int getSlides() { + if (props.getProperties().isSetSlides()) { + return props.getProperties().getSlides(); + } + return -1; + } + public int getNotes() { + if (props.getProperties().isSetNotes()) { + return props.getProperties().getNotes(); + } + return -1; + } + public int getTotalTime() { + if (props.getProperties().isSetTotalTime()) { + return props.getProperties().getTotalTime(); + } + return -1; + } + public int getHiddenSlides() { + if (props.getProperties().isSetHiddenSlides()) { + return props.getProperties().getHiddenSlides(); + } + return -1; + } + public int getMMClips() { + if (props.getProperties().isSetMMClips()) { + return props.getProperties().getMMClips(); + } + return -1; + } + + public String getHyperlinkBase() { + if (props.getProperties().isSetHyperlinkBase()) { + return props.getProperties().getHyperlinkBase(); + } + return null; + } + } + + /** + * Custom document properties + */ + public static class CustomProperties { + /** + * Each custom property element contains an fmtid attribute + * with the same GUID value ({D5CDD505-2E9C-101B-9397-08002B2CF9AE}). + */ + public static final String FORMAT_ID = "{D5CDD505-2E9C-101B-9397-08002B2CF9AE}"; + + private org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument props; + private CustomProperties(org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument props) { + this.props = props; + } + + public org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperties getUnderlyingProperties() { + return props.getProperties(); + } + + /** + * Add a new property + * + * @param name the property name + * @throws IllegalArgumentException if a property with this name already exists + */ + private CTProperty add(String name) { + if(contains(name)) { + throw new IllegalArgumentException("A property with this name " + + "already exists in the custom properties"); + } + + CTProperty p = props.getProperties().addNewProperty(); + int pid = nextPid(); + p.setPid(pid); + p.setFmtid(FORMAT_ID); + p.setName(name); + return p; + } + + /** + * Add a new string property + * + * @param name the property name + * @param value the property value + * + * @throws IllegalArgumentException if a property with this name already exists + */ + public void addProperty(String name, String value){ + CTProperty p = add(name); + p.setLpwstr(value); + } + + /** + * Add a new double property + * + * @param name the property name + * @param value the property value + * + * @throws IllegalArgumentException if a property with this name already exists + */ + public void addProperty(String name, double value){ + CTProperty p = add(name); + p.setR8(value); + } + + /** + * Add a new integer property + * + * @param name the property name + * @param value the property value + * + * @throws IllegalArgumentException if a property with this name already exists + */ + public void addProperty(String name, int value){ + CTProperty p = add(name); + p.setI4(value); + } + + /** + * Add a new boolean property + * + * @param name the property name + * @param value the property value + * + * @throws IllegalArgumentException if a property with this name already exists + */ + public void addProperty(String name, boolean value){ + CTProperty p = add(name); + p.setBool(value); + } + + /** + * Generate next id that uniquely relates a custom property + * + * @return next property id starting with 2 + */ + protected int nextPid() { + int propid = 1; + for(CTProperty p : props.getProperties().getPropertyArray()){ + if(p.getPid() > propid) propid = p.getPid(); + } + return propid + 1; + } + + /** + * Check if a property with this name already exists in the collection of custom properties + * + * @param name the name to check + * @return whether a property with the given name exists in the custom properties + */ + public boolean contains(String name) { + for(CTProperty p : props.getProperties().getPropertyArray()){ + if(p.getName().equals(name)) return true; + } + return false; + } + + /** + * Retrieve the custom property with this name, or null if none exists. + * + * You will need to test the various isSetX methods to work out + * what the type of the property is, before fetching the + * appropriate value for it. + * + * @param name the name of the property to fetch + * + * @return the custom property with this name, or null if none exists + */ + public CTProperty getProperty(String name) { + for(CTProperty p : props.getProperties().getPropertyArray()){ + if(p.getName().equals(name)) { + return p; + } + } + return null; + } + } +} diff --git a/src/ooxml/java/org/apache/poi/ooxml/POIXMLRelation.java b/src/ooxml/java/org/apache/poi/ooxml/POIXMLRelation.java new file mode 100644 index 0000000000..c661ce8e20 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/ooxml/POIXMLRelation.java @@ -0,0 +1,170 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.ooxml; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Iterator; + +import org.apache.poi.openxml4j.exceptions.InvalidFormatException; +import org.apache.poi.openxml4j.opc.PackagePart; +import org.apache.poi.openxml4j.opc.PackagePartName; +import org.apache.poi.openxml4j.opc.PackageRelationship; +import org.apache.poi.openxml4j.opc.PackageRelationshipCollection; +import org.apache.poi.openxml4j.opc.PackagingURIHelper; +import org.apache.poi.util.POILogFactory; +import org.apache.poi.util.POILogger; + +/** + * Represents a descriptor of a OOXML relation. + */ +public abstract class POIXMLRelation { + + private static final POILogger log = POILogFactory.getLogger(POIXMLRelation.class); + + /** + * Describes the content stored in a part. + */ + private String _type; + + /** + * The kind of connection between a source part and a target part in a package. + */ + private String _relation; + + /** + * The path component of a pack URI. + */ + private String _defaultName; + + /** + * Defines what object is used to construct instances of this relationship + */ + private Class _cls; + + /** + * Instantiates a POIXMLRelation. + * + * @param type content type + * @param rel relationship + * @param defaultName default item name + * @param cls defines what object is used to construct instances of this relationship + */ + public POIXMLRelation(String type, String rel, String defaultName, Class cls) { + _type = type; + _relation = rel; + _defaultName = defaultName; + _cls = cls; + } + + /** + * Instantiates a POIXMLRelation. + * + * @param type content type + * @param rel relationship + * @param defaultName default item name + */ + public POIXMLRelation(String type, String rel, String defaultName) { + this(type, rel, defaultName, null); + } + /** + * Return the content type. Content types define a media type, a subtype, and an + * optional set of parameters, as defined in RFC 2616. + * + * @return the content type + */ + public String getContentType() { + return _type; + } + + /** + * Return the relationship, the kind of connection between a source part and a target part in a package. + * Relationships make the connections between parts directly discoverable without looking at the content + * in the parts, and without altering the parts themselves. + * + * @return the relationship + */ + public String getRelation() { + return _relation; + } + + /** + * Return the default part name. Part names are used to refer to a part in the context of a + * package, typically as part of a URI. + * + * @return the default part name + */ + public String getDefaultFileName() { + return _defaultName; + } + + /** + * Returns the filename for the nth one of these, e.g. /xl/comments4.xml + * + * @param index the suffix for the document type + * @return the filename including the suffix + */ + public String getFileName(int index) { + if(! _defaultName.contains("#")) { + // Generic filename in all cases + return getDefaultFileName(); + } + return _defaultName.replace("#", Integer.toString(index)); + } + + /** + * Returns the index of the filename within the package for the given part. + * e.g. 4 for /xl/comments4.xml + * + * @param part the part to read the suffix from + * @return the suffix + */ + public Integer getFileNameIndex(POIXMLDocumentPart part) { + String regex = _defaultName.replace("#", "(\\d+)"); + return Integer.valueOf(part.getPackagePart().getPartName().getName().replaceAll(regex, "$1")); + } + + /** + * Return type of the object used to construct instances of this relationship + * + * @return the class of the object used to construct instances of this relation + */ + public Class getRelationClass(){ + return _cls; + } + + /** + * Fetches the InputStream to read the contents, based + * of the specified core part, for which we are defined + * as a suitable relationship + * + * @since 3.16-beta3 + */ + public InputStream getContents(PackagePart corePart) throws IOException, InvalidFormatException { + PackageRelationshipCollection prc = + corePart.getRelationshipsByType(getRelation()); + Iterator it = prc.iterator(); + if(it.hasNext()) { + PackageRelationship rel = it.next(); + PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI()); + PackagePart part = corePart.getPackage().getPart(relName); + return part.getInputStream(); + } + log.log(POILogger.WARN, "No part " + getDefaultFileName() + " found"); + return null; + } +} diff --git a/src/ooxml/java/org/apache/poi/ooxml/POIXMLTypeLoader.java b/src/ooxml/java/org/apache/poi/ooxml/POIXMLTypeLoader.java new file mode 100644 index 0000000000..123c0b5786 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/ooxml/POIXMLTypeLoader.java @@ -0,0 +1,166 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.ooxml; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.Reader; +import java.io.StringReader; +import java.net.URL; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +import javax.xml.stream.XMLStreamReader; + +import org.apache.poi.openxml4j.opc.PackageNamespaces; +import org.apache.poi.ooxml.util.DocumentHelper; +import org.apache.xmlbeans.SchemaType; +import org.apache.xmlbeans.SchemaTypeLoader; +import org.apache.xmlbeans.XmlBeans; +import org.apache.xmlbeans.XmlException; +import org.apache.xmlbeans.XmlObject; +import org.apache.xmlbeans.XmlOptions; +import org.apache.xmlbeans.xml.stream.XMLInputStream; +import org.apache.xmlbeans.xml.stream.XMLStreamException; +import org.w3c.dom.Document; +import org.w3c.dom.Node; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; + +@SuppressWarnings("deprecation") +public class POIXMLTypeLoader { + + private static ThreadLocal typeLoader = new ThreadLocal<>(); + + // TODO: Do these have a good home like o.a.p.openxml4j.opc.PackageNamespaces and PackageRelationshipTypes? + // These constants should be common to all of POI and easy to use by other applications such as Tika + private static final String MS_OFFICE_URN = "urn:schemas-microsoft-com:office:office"; + private static final String MS_EXCEL_URN = "urn:schemas-microsoft-com:office:excel"; + private static final String MS_WORD_URN = "urn:schemas-microsoft-com:office:word"; + private static final String MS_VML_URN = "urn:schemas-microsoft-com:vml"; + + public static final XmlOptions DEFAULT_XML_OPTIONS; + static { + DEFAULT_XML_OPTIONS = new XmlOptions(); + DEFAULT_XML_OPTIONS.setSaveOuter(); + DEFAULT_XML_OPTIONS.setUseDefaultNamespace(); + DEFAULT_XML_OPTIONS.setSaveAggressiveNamespaces(); + DEFAULT_XML_OPTIONS.setCharacterEncoding("UTF-8"); + // Piccolo is disabled for POI builts, i.e. JAXP is used for parsing + // so only user code using XmlObject/XmlToken.Factory.parse + // directly can bypass the entity check, which is probably unlikely (... and not within our responsibility :)) + // DEFAULT_XML_OPTIONS.setLoadEntityBytesLimit(4096); + + // POI is not thread-safe - so we can switch to unsynchronized xmlbeans mode - see #61350 + // Update: disabled again for now as it caused strange NPEs and other problems + // when reading properties in separate workbooks in multiple threads + // DEFAULT_XML_OPTIONS.setUnsynchronized(); + + Map map = new HashMap<>(); + map.put("http://schemas.openxmlformats.org/drawingml/2006/main", "a"); + map.put("http://schemas.openxmlformats.org/drawingml/2006/chart", "c"); + map.put("http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing", "wp"); + map.put(PackageNamespaces.MARKUP_COMPATIBILITY, "ve"); + map.put("http://schemas.openxmlformats.org/officeDocument/2006/math", "m"); + map.put("http://schemas.openxmlformats.org/officeDocument/2006/relationships", "r"); + map.put("http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes", "vt"); + map.put("http://schemas.openxmlformats.org/presentationml/2006/main", "p"); + map.put("http://schemas.openxmlformats.org/wordprocessingml/2006/main", "w"); + map.put("http://schemas.microsoft.com/office/word/2006/wordml", "wne"); + map.put(MS_OFFICE_URN, "o"); + map.put(MS_EXCEL_URN, "x"); + map.put(MS_WORD_URN, "w10"); + map.put(MS_VML_URN, "v"); + DEFAULT_XML_OPTIONS.setSaveSuggestedPrefixes(Collections.unmodifiableMap(map)); + } + + private static XmlOptions getXmlOptions(XmlOptions options) { + return options == null ? DEFAULT_XML_OPTIONS : options; + } + + private static SchemaTypeLoader getTypeLoader(SchemaType type) { + SchemaTypeLoader tl = typeLoader.get(); + if (tl == null) { + ClassLoader cl = type.getClass().getClassLoader(); + tl = XmlBeans.typeLoaderForClassLoader(cl); + typeLoader.set(tl); + } + return tl; + } + + public static XmlObject newInstance(SchemaType type, XmlOptions options) { + return getTypeLoader(type).newInstance(type, getXmlOptions(options)); + } + + public static XmlObject parse(String xmlText, SchemaType type, XmlOptions options) throws XmlException { + try { + return parse(new StringReader(xmlText), type, options); + } catch (IOException e) { + throw new XmlException("Unable to parse xml bean", e); + } + } + + public static XmlObject parse(File file, SchemaType type, XmlOptions options) throws XmlException, IOException { + try (InputStream is = new FileInputStream(file)) { + return parse(is, type, options); + } + } + + public static XmlObject parse(URL file, SchemaType type, XmlOptions options) throws XmlException, IOException { + try (InputStream is = file.openStream()) { + return parse(is, type, options); + } + } + + public static XmlObject parse(InputStream jiois, SchemaType type, XmlOptions options) throws XmlException, IOException { + try { + Document doc = DocumentHelper.readDocument(jiois); + return getTypeLoader(type).parse(doc.getDocumentElement(), type, getXmlOptions(options)); + } catch (SAXException e) { + throw new IOException("Unable to parse xml bean", e); + } + } + + public static XmlObject parse(XMLStreamReader xsr, SchemaType type, XmlOptions options) throws XmlException { + return getTypeLoader(type).parse(xsr, type, getXmlOptions(options)); + } + + public static XmlObject parse(Reader jior, SchemaType type, XmlOptions options) throws XmlException, IOException { + try { + Document doc = DocumentHelper.readDocument(new InputSource(jior)); + return getTypeLoader(type).parse(doc.getDocumentElement(), type, getXmlOptions(options)); + } catch (SAXException e) { + throw new XmlException("Unable to parse xml bean", e); + } + } + + public static XmlObject parse(Node node, SchemaType type, XmlOptions options) throws XmlException { + return getTypeLoader(type).parse(node, type, getXmlOptions(options)); + } + + public static XmlObject parse(XMLInputStream xis, SchemaType type, XmlOptions options) throws XmlException, XMLStreamException { + return getTypeLoader(type).parse(xis, type, getXmlOptions(options)); + } + + public static XMLInputStream newValidatingXMLInputStream ( XMLInputStream xis, SchemaType type, XmlOptions options ) throws XmlException, XMLStreamException { + return getTypeLoader(type).newValidatingXMLInputStream(xis, type, getXmlOptions(options)); + } +} diff --git a/src/ooxml/java/org/apache/poi/ooxml/dev/OOXMLLister.java b/src/ooxml/java/org/apache/poi/ooxml/dev/OOXMLLister.java new file mode 100644 index 0000000000..177f9f9335 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/ooxml/dev/OOXMLLister.java @@ -0,0 +1,152 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.ooxml.dev; + +import java.io.*; +import java.util.ArrayList; + +import org.apache.poi.openxml4j.exceptions.InvalidFormatException; +import org.apache.poi.openxml4j.opc.OPCPackage; +import org.apache.poi.openxml4j.opc.PackageAccess; +import org.apache.poi.openxml4j.opc.PackagePart; +import org.apache.poi.openxml4j.opc.PackageRelationship; +import org.apache.poi.openxml4j.opc.PackageRelationshipCollection; + +/** + * Prints out the contents of a OOXML container. + * Useful for seeing what parts are defined, and how + * they're all related to each other. + */ +public class OOXMLLister implements Closeable { + private final OPCPackage container; + private final PrintStream disp; + + public OOXMLLister(OPCPackage container) { + this(container, System.out); + } + public OOXMLLister(OPCPackage container, PrintStream disp) { + this.container = container; + this.disp = disp; + } + + /** + * Figures out how big a given PackagePart is. + * + * @param part the PackagePart + * @return the size of the PackagePart + * + * @throws IOException if the part can't be read + */ + public static long getSize(PackagePart part) throws IOException { + InputStream in = part.getInputStream(); + try { + byte[] b = new byte[8192]; + long size = 0; + int read = 0; + + while(read > -1) { + read = in.read(b); + if(read > 0) { + size += read; + } + } + + return size; + } finally { + in.close(); + } + } + + /** + * Displays information on all the different + * parts of the OOXML file container. + * @throws InvalidFormatException if the package relations are invalid + * @throws IOException if the package can't be read + */ + public void displayParts() throws InvalidFormatException, IOException { + ArrayList parts = container.getParts(); + for (PackagePart part : parts) { + disp.println(part.getPartName()); + disp.println("\t" + part.getContentType()); + + if(! part.getPartName().toString().equals("/docProps/core.xml")) { + disp.println("\t" + getSize(part) + " bytes"); + } + + if(! part.isRelationshipPart()) { + disp.println("\t" + part.getRelationships().size() + " relations"); + for(PackageRelationship rel : part.getRelationships()) { + displayRelation(rel, "\t "); + } + } + } + } + /** + * Displays information on all the different + * relationships between different parts + * of the OOXML file container. + */ + public void displayRelations() { + PackageRelationshipCollection rels = + container.getRelationships(); + for (PackageRelationship rel : rels) { + displayRelation(rel, ""); + } + } + + private void displayRelation(PackageRelationship rel, String indent) { + disp.println(indent+"Relationship:"); + disp.println(indent+"\tFrom: "+ rel.getSourceURI()); + disp.println(indent+"\tTo: " + rel.getTargetURI()); + disp.println(indent+"\tID: " + rel.getId()); + disp.println(indent+"\tMode: " + rel.getTargetMode()); + disp.println(indent+"\tType: " + rel.getRelationshipType()); + } + + @Override + public void close() throws IOException { + container.close(); + } + + public static void main(String[] args) throws IOException, InvalidFormatException { + if(args.length == 0) { + System.err.println("Use:"); + System.err.println("\tjava OOXMLLister "); + System.exit(1); + } + + File f = new File(args[0]); + if(! f.exists()) { + System.err.println("Error, file not found!"); + System.err.println("\t" + f); + System.exit(2); + } + + OOXMLLister lister = new OOXMLLister( + OPCPackage.open(f.toString(), PackageAccess.READ) + ); + + try { + lister.disp.println(f + "\n"); + lister.displayParts(); + lister.disp.println(); + lister.displayRelations(); + } finally { + lister.close(); + } + } +} diff --git a/src/ooxml/java/org/apache/poi/ooxml/dev/OOXMLPrettyPrint.java b/src/ooxml/java/org/apache/poi/ooxml/dev/OOXMLPrettyPrint.java new file mode 100644 index 0000000000..47ec47055e --- /dev/null +++ b/src/ooxml/java/org/apache/poi/ooxml/dev/OOXMLPrettyPrint.java @@ -0,0 +1,137 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.ooxml.dev; + +import java.io.BufferedOutputStream; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.util.Enumeration; +import java.util.zip.ZipEntry; +import java.util.zip.ZipException; +import java.util.zip.ZipFile; +import java.util.zip.ZipOutputStream; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.transform.OutputKeys; +import javax.xml.transform.Result; +import javax.xml.transform.Source; +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerException; +import javax.xml.transform.TransformerFactory; +import javax.xml.transform.dom.DOMSource; +import javax.xml.transform.stream.StreamResult; + +import org.apache.poi.openxml4j.opc.internal.ZipHelper; +import org.apache.poi.openxml4j.util.ZipSecureFile; +import org.apache.poi.util.IOUtils; +import org.w3c.dom.Document; +import org.xml.sax.InputSource; + +/** + * Reads a zipped OOXML file and produces a copy with the included + * pretty-printed XML files. + * + * This is useful for comparing OOXML files produced by different tools as the often + * use different formatting of the XML. + */ +public class OOXMLPrettyPrint { + private final DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance(); + private final DocumentBuilder documentBuilder; + + public OOXMLPrettyPrint() throws ParserConfigurationException { + // allow files with much lower inflation rate here as there is no risk of Zip Bomb attacks in this developer tool + ZipSecureFile.setMinInflateRatio(0.00001); + + documentBuilder = documentBuilderFactory.newDocumentBuilder(); + } + + public static void main(String[] args) throws Exception { + if(args.length <= 1 || args.length % 2 != 0) { + System.err.println("Use:"); + System.err.println("\tjava OOXMLPrettyPrint [ ] ..."); + System.exit(1); + } + + for(int i = 0;i < args.length;i+=2) { + File f = new File(args[i]); + if(! f.exists()) { + System.err.println("Error, file not found!"); + System.err.println("\t" + f); + System.exit(2); + } + + handleFile(f, new File(args[i+1])); + } + System.out.println("Done."); + } + + private static void handleFile(File file, File outFile) throws ZipException, + IOException, ParserConfigurationException { + System.out.println("Reading zip-file " + file + " and writing pretty-printed XML to " + outFile); + + try (ZipFile zipFile = ZipHelper.openZipFile(file)) { + try (ZipOutputStream out = new ZipOutputStream(new BufferedOutputStream(new FileOutputStream(outFile)))) { + new OOXMLPrettyPrint().handle(zipFile, out); + } + } finally { + System.out.println(); + } + } + + private void handle(ZipFile file, ZipOutputStream out) throws IOException { + Enumeration entries = file.entries(); + while(entries.hasMoreElements()) { + ZipEntry entry = entries.nextElement(); + + String name = entry.getName(); + out.putNextEntry(new ZipEntry(name)); + try { + if(name.endsWith(".xml") || name.endsWith(".rels")) { + Document document = documentBuilder.parse(new InputSource(file.getInputStream(entry))); + document.setXmlStandalone(true); + pretty(document, out, 2); + } else { + System.out.println("Not pretty-printing non-XML file " + name); + IOUtils.copy(file.getInputStream(entry), out); + } + } catch (Exception e) { + throw new IOException("While handling entry " + name, e); + } finally { + out.closeEntry(); + } + System.out.print("."); + } + } + + private static void pretty(Document document, OutputStream outputStream, int indent) throws TransformerException { + TransformerFactory transformerFactory = TransformerFactory.newInstance(); + Transformer transformer = transformerFactory.newTransformer(); + transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); + if (indent > 0) { + // set properties to indent the resulting XML nicely + transformer.setOutputProperty(OutputKeys.INDENT, "yes"); + transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", Integer.toString(indent)); + } + Result result = new StreamResult(outputStream); + Source source = new DOMSource(document); + transformer.transform(source, result); + } +} diff --git a/src/ooxml/java/org/apache/poi/ooxml/extractor/CommandLineTextExtractor.java b/src/ooxml/java/org/apache/poi/ooxml/extractor/CommandLineTextExtractor.java new file mode 100644 index 0000000000..999abd46ee --- /dev/null +++ b/src/ooxml/java/org/apache/poi/ooxml/extractor/CommandLineTextExtractor.java @@ -0,0 +1,62 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.ooxml.extractor; + +import java.io.File; + +import org.apache.poi.extractor.POITextExtractor; + +/** + * A command line wrapper around {@link ExtractorFactory}, useful + * for when debugging. + */ +public class CommandLineTextExtractor { + public static final String DIVIDER = "======================="; + + public static void main(String[] args) throws Exception { + if(args.length < 1) { + System.err.println("Use:"); + System.err.println(" CommandLineTextExtractor [filename] [filename]"); + System.exit(1); + } + + for (String arg : args) { + System.out.println(DIVIDER); + + File f = new File(arg); + System.out.println(f); + + POITextExtractor extractor = + ExtractorFactory.createExtractor(f); + try { + POITextExtractor metadataExtractor = + extractor.getMetadataTextExtractor(); + + System.out.println(" " + DIVIDER); + String metaData = metadataExtractor.getText(); + System.out.println(metaData); + System.out.println(" " + DIVIDER); + String text = extractor.getText(); + System.out.println(text); + System.out.println(DIVIDER); + System.out.println("Had " + metaData.length() + " characters of metadata and " + text.length() + " characters of text"); + } finally { + extractor.close(); + } + } + } +} diff --git a/src/ooxml/java/org/apache/poi/ooxml/extractor/ExtractorFactory.java b/src/ooxml/java/org/apache/poi/ooxml/extractor/ExtractorFactory.java new file mode 100644 index 0000000000..6603f58582 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/ooxml/extractor/ExtractorFactory.java @@ -0,0 +1,435 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.ooxml.extractor; + +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Iterator; + +import org.apache.poi.EncryptedDocumentException; +import org.apache.poi.extractor.POIOLE2TextExtractor; +import org.apache.poi.extractor.POITextExtractor; +import org.apache.poi.extractor.OLE2ExtractorFactory; +import org.apache.poi.hsmf.MAPIMessage; +import org.apache.poi.hsmf.datatypes.AttachmentChunks; +import org.apache.poi.hsmf.extractor.OutlookTextExtactor; +import org.apache.poi.hssf.extractor.ExcelExtractor; +import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey; +import org.apache.poi.hwpf.extractor.WordExtractor; +import org.apache.poi.openxml4j.exceptions.OpenXML4JException; +import org.apache.poi.openxml4j.opc.OPCPackage; +import org.apache.poi.openxml4j.opc.PackageAccess; +import org.apache.poi.openxml4j.opc.PackagePart; +import org.apache.poi.openxml4j.opc.PackageRelationshipCollection; +import org.apache.poi.openxml4j.opc.PackageRelationshipTypes; +import org.apache.poi.poifs.crypt.Decryptor; +import org.apache.poi.poifs.crypt.EncryptionInfo; +import org.apache.poi.poifs.filesystem.DirectoryEntry; +import org.apache.poi.poifs.filesystem.DirectoryNode; +import org.apache.poi.poifs.filesystem.Entry; +import org.apache.poi.poifs.filesystem.FileMagic; +import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; +import org.apache.poi.poifs.filesystem.NotOLE2FileException; +import org.apache.poi.poifs.filesystem.OPOIFSFileSystem; +import org.apache.poi.poifs.filesystem.OfficeXmlFileException; +import org.apache.poi.poifs.filesystem.POIFSFileSystem; +import org.apache.poi.sl.extractor.SlideShowExtractor; +import org.apache.poi.util.IOUtils; +import org.apache.poi.util.NotImplemented; +import org.apache.poi.util.POILogFactory; +import org.apache.poi.util.POILogger; +import org.apache.poi.util.Removal; +import org.apache.poi.xdgf.extractor.XDGFVisioExtractor; +import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor; +import org.apache.poi.xslf.usermodel.XMLSlideShow; +import org.apache.poi.xslf.usermodel.XSLFRelation; +import org.apache.poi.xssf.extractor.XSSFBEventBasedExcelExtractor; +import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor; +import org.apache.poi.xssf.extractor.XSSFExcelExtractor; +import org.apache.poi.xssf.usermodel.XSSFRelation; +import org.apache.poi.xwpf.extractor.XWPFWordExtractor; +import org.apache.poi.xwpf.usermodel.XWPFRelation; +import org.apache.xmlbeans.XmlException; + +/** + * Figures out the correct POITextExtractor for your supplied + * document, and returns it. + * + *

Note 1 - will fail for many file formats if the POI Scratchpad jar is + * not present on the runtime classpath

+ *

Note 2 - rather than using this, for most cases you would be better + * off switching to Apache Tika instead!

+ */ +@SuppressWarnings("WeakerAccess") +public class ExtractorFactory { + private static final POILogger logger = POILogFactory.getLogger(ExtractorFactory.class); + + public static final String CORE_DOCUMENT_REL = PackageRelationshipTypes.CORE_DOCUMENT; + protected static final String VISIO_DOCUMENT_REL = PackageRelationshipTypes.VISIO_CORE_DOCUMENT; + protected static final String STRICT_DOCUMENT_REL = PackageRelationshipTypes.STRICT_CORE_DOCUMENT; + + /** + * Should this thread prefer event based over usermodel based extractors? + * (usermodel extractors tend to be more accurate, but use more memory) + * Default is false. + */ + public static boolean getThreadPrefersEventExtractors() { + return OLE2ExtractorFactory.getThreadPrefersEventExtractors(); + } + + /** + * Should all threads prefer event based over usermodel based extractors? + * (usermodel extractors tend to be more accurate, but use more memory) + * Default is to use the thread level setting, which defaults to false. + */ + public static Boolean getAllThreadsPreferEventExtractors() { + return OLE2ExtractorFactory.getAllThreadsPreferEventExtractors(); + } + + /** + * Should this thread prefer event based over usermodel based extractors? + * Will only be used if the All Threads setting is null. + */ + public static void setThreadPrefersEventExtractors(boolean preferEventExtractors) { + OLE2ExtractorFactory.setThreadPrefersEventExtractors(preferEventExtractors); + } + + /** + * Should all threads prefer event based over usermodel based extractors? + * If set, will take preference over the Thread level setting. + */ + public static void setAllThreadsPreferEventExtractors(Boolean preferEventExtractors) { + OLE2ExtractorFactory.setAllThreadsPreferEventExtractors(preferEventExtractors); + } + + /** + * Should this thread use event based extractors is available? + * Checks the all-threads one first, then thread specific. + */ + public static boolean getPreferEventExtractor() { + return OLE2ExtractorFactory.getPreferEventExtractor(); + } + + public static T createExtractor(File f) throws IOException, OpenXML4JException, XmlException { + NPOIFSFileSystem fs = null; + try { + fs = new NPOIFSFileSystem(f); + if (fs.getRoot().hasEntry(Decryptor.DEFAULT_POIFS_ENTRY)) { + return (T)createEncryptedOOXMLExtractor(fs); + } + POITextExtractor extractor = createExtractor(fs); + extractor.setFilesystem(fs); + return (T)extractor; + } catch (OfficeXmlFileException e) { + // ensure file-handle release + IOUtils.closeQuietly(fs); + return (T)createExtractor(OPCPackage.open(f.toString(), PackageAccess.READ)); + } catch (NotOLE2FileException ne) { + // ensure file-handle release + IOUtils.closeQuietly(fs); + throw new IllegalArgumentException("Your File was neither an OLE2 file, nor an OOXML file"); + } catch (OpenXML4JException | Error | RuntimeException | IOException | XmlException e) { + // ensure file-handle release + IOUtils.closeQuietly(fs); + throw e; + } + } + + public static POITextExtractor createExtractor(InputStream inp) throws IOException, OpenXML4JException, XmlException { + InputStream is = FileMagic.prepareToCheckMagic(inp); + + FileMagic fm = FileMagic.valueOf(is); + + switch (fm) { + case OLE2: + NPOIFSFileSystem fs = new NPOIFSFileSystem(is); + boolean isEncrypted = fs.getRoot().hasEntry(Decryptor.DEFAULT_POIFS_ENTRY); + return isEncrypted ? createEncryptedOOXMLExtractor(fs) : createExtractor(fs); + case OOXML: + return createExtractor(OPCPackage.open(is)); + default: + throw new IllegalArgumentException("Your InputStream was neither an OLE2 stream, nor an OOXML stream"); + } + } + + /** + * Tries to determine the actual type of file and produces a matching text-extractor for it. + * + * @param pkg An {@link OPCPackage}. + * @return A {@link POIXMLTextExtractor} for the given file. + * @throws IOException If an error occurs while reading the file + * @throws OpenXML4JException If an error parsing the OpenXML file format is found. + * @throws XmlException If an XML parsing error occurs. + * @throws IllegalArgumentException If no matching file type could be found. + */ + public static POITextExtractor createExtractor(OPCPackage pkg) throws IOException, OpenXML4JException, XmlException { + try { + // Check for the normal Office core document + PackageRelationshipCollection core; + core = pkg.getRelationshipsByType(CORE_DOCUMENT_REL); + + // If nothing was found, try some of the other OOXML-based core types + if (core.size() == 0) { + // Could it be an OOXML-Strict one? + core = pkg.getRelationshipsByType(STRICT_DOCUMENT_REL); + } + if (core.size() == 0) { + // Could it be a visio one? + core = pkg.getRelationshipsByType(VISIO_DOCUMENT_REL); + if (core.size() == 1) + return new XDGFVisioExtractor(pkg); + } + + // Should just be a single core document, complain if not + if (core.size() != 1) { + throw new IllegalArgumentException("Invalid OOXML Package received - expected 1 core document, found " + core.size()); + } + + // Grab the core document part, and try to identify from that + final PackagePart corePart = pkg.getPart(core.getRelationship(0)); + final String contentType = corePart.getContentType(); + + // Is it XSSF? + for (XSSFRelation rel : XSSFExcelExtractor.SUPPORTED_TYPES) { + if ( rel.getContentType().equals( contentType ) ) { + if (getPreferEventExtractor()) { + return new XSSFEventBasedExcelExtractor(pkg); + } + return new XSSFExcelExtractor(pkg); + } + } + + // Is it XWPF? + for (XWPFRelation rel : XWPFWordExtractor.SUPPORTED_TYPES) { + if ( rel.getContentType().equals( contentType ) ) { + return new XWPFWordExtractor(pkg); + } + } + + // Is it XSLF? + for (XSLFRelation rel : XSLFPowerPointExtractor.SUPPORTED_TYPES) { + if ( rel.getContentType().equals( contentType ) ) { + return new SlideShowExtractor(new XMLSlideShow(pkg)); + } + } + + // special handling for SlideShow-Theme-files, + if (XSLFRelation.THEME_MANAGER.getContentType().equals(contentType)) { + return new SlideShowExtractor(new XMLSlideShow(pkg)); + } + + // How about xlsb? + for (XSSFRelation rel : XSSFBEventBasedExcelExtractor.SUPPORTED_TYPES) { + if (rel.getContentType().equals(contentType)) { + return new XSSFBEventBasedExcelExtractor(pkg); + } + } + + throw new IllegalArgumentException("No supported documents found in the OOXML package (found "+contentType+")"); + + } catch (IOException | Error | RuntimeException | XmlException | OpenXML4JException e) { + // ensure that we close the package again if there is an error opening it, however + // we need to revert the package to not re-write the file via close(), which is very likely not wanted for a TextExtractor! + pkg.revert(); + throw e; + } + } + + public static T createExtractor(POIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException { + return createExtractor(fs.getRoot()); + } + public static T createExtractor(NPOIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException { + return createExtractor(fs.getRoot()); + } + public static T createExtractor(OPOIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException { + return createExtractor(fs.getRoot()); + } + + public static T createExtractor(DirectoryNode poifsDir) throws IOException, OpenXML4JException, XmlException + { + // First, check for OOXML + for (String entryName : poifsDir.getEntryNames()) { + if (entryName.equals("Package")) { + OPCPackage pkg = OPCPackage.open(poifsDir.createDocumentInputStream("Package")); + return (T)createExtractor(pkg); + } + } + + // If not, ask the OLE2 code to check, with Scratchpad if possible + return (T)OLE2ExtractorFactory.createExtractor(poifsDir); + } + + /** + * Returns an array of text extractors, one for each of + * the embedded documents in the file (if there are any). + * If there are no embedded documents, you'll get back an + * empty array. Otherwise, you'll get one open + * {@link POITextExtractor} for each embedded file. + * + * @deprecated Use the method with correct "embedded" + */ + @Deprecated + @Removal(version="4.2") + public static POITextExtractor[] getEmbededDocsTextExtractors(POIOLE2TextExtractor ext) throws IOException, OpenXML4JException, XmlException { + return getEmbeddedDocsTextExtractors(ext); + } + + /** + * Returns an array of text extractors, one for each of + * the embedded documents in the file (if there are any). + * If there are no embedded documents, you'll get back an + * empty array. Otherwise, you'll get one open + * {@link POITextExtractor} for each embedded file. + */ + public static POITextExtractor[] getEmbeddedDocsTextExtractors(POIOLE2TextExtractor ext) throws IOException, OpenXML4JException, XmlException { + // All the embedded directories we spotted + ArrayList dirs = new ArrayList<>(); + // For anything else not directly held in as a POIFS directory + ArrayList nonPOIFS = new ArrayList<>(); + + // Find all the embedded directories + DirectoryEntry root = ext.getRoot(); + if (root == null) { + throw new IllegalStateException("The extractor didn't know which POIFS it came from!"); + } + + if (ext instanceof ExcelExtractor) { + // These are in MBD... under the root + Iterator it = root.getEntries(); + while (it.hasNext()) { + Entry entry = it.next(); + if (entry.getName().startsWith("MBD")) { + dirs.add(entry); + } + } + } else if (ext instanceof WordExtractor) { + // These are in ObjectPool -> _... under the root + try { + DirectoryEntry op = (DirectoryEntry) root.getEntry("ObjectPool"); + Iterator it = op.getEntries(); + while (it.hasNext()) { + Entry entry = it.next(); + if (entry.getName().startsWith("_")) { + dirs.add(entry); + } + } + } catch (FileNotFoundException e) { + logger.log(POILogger.INFO, "Ignoring FileNotFoundException while extracting Word document", e.getLocalizedMessage()); + // ignored here + } + //} else if(ext instanceof PowerPointExtractor) { + // Tricky, not stored directly in poifs + // TODO + } else if (ext instanceof OutlookTextExtactor) { + // Stored in the Attachment blocks + MAPIMessage msg = ((OutlookTextExtactor)ext).getMAPIMessage(); + for (AttachmentChunks attachment : msg.getAttachmentFiles()) { + if (attachment.getAttachData() != null) { + byte[] data = attachment.getAttachData().getValue(); + nonPOIFS.add( new ByteArrayInputStream(data) ); + } else if (attachment.getAttachmentDirectory() != null) { + dirs.add(attachment.getAttachmentDirectory().getDirectory()); + } + } + } + + // Create the extractors + if (dirs.size() == 0 && nonPOIFS.size() == 0){ + return new POITextExtractor[0]; + } + + ArrayList textExtractors = new ArrayList<>(); + for (Entry dir : dirs) { + textExtractors.add(createExtractor((DirectoryNode) dir)); + } + for (InputStream nonPOIF : nonPOIFS) { + try { + textExtractors.add(createExtractor(nonPOIF)); + } catch (IllegalArgumentException e) { + // Ignore, just means it didn't contain + // a format we support as yet + logger.log(POILogger.INFO, "Format not supported yet", e.getLocalizedMessage()); + } catch (XmlException | OpenXML4JException e) { + throw new IOException(e.getMessage(), e); + } + } + return textExtractors.toArray(new POITextExtractor[textExtractors.size()]); + } + + /** + * Returns an array of text extractors, one for each of + * the embedded documents in the file (if there are any). + * If there are no embedded documents, you'll get back an + * empty array. Otherwise, you'll get one open + * {@link POITextExtractor} for each embedded file. + * + * @deprecated Use the method with correct "embedded" + */ + @Deprecated + @Removal(version="4.2") + @NotImplemented + @SuppressWarnings({"UnusedParameters", "UnusedReturnValue"}) + public static POITextExtractor[] getEmbededDocsTextExtractors(POIXMLTextExtractor ext) { + return getEmbeddedDocsTextExtractors(ext); + } + + /** + * Returns an array of text extractors, one for each of + * the embedded documents in the file (if there are any). + * If there are no embedded documents, you'll get back an + * empty array. Otherwise, you'll get one open + * {@link POITextExtractor} for each embedded file. + */ + @NotImplemented + @SuppressWarnings({"UnusedParameters", "UnusedReturnValue"}) + public static POITextExtractor[] getEmbeddedDocsTextExtractors(POIXMLTextExtractor ext) { + throw new IllegalStateException("Not yet supported"); + } + + private static POITextExtractor createEncryptedOOXMLExtractor(NPOIFSFileSystem fs) + throws IOException { + String pass = Biff8EncryptionKey.getCurrentUserPassword(); + if (pass == null) { + pass = Decryptor.DEFAULT_PASSWORD; + } + + EncryptionInfo ei = new EncryptionInfo(fs); + Decryptor dec = ei.getDecryptor(); + InputStream is = null; + try { + if (!dec.verifyPassword(pass)) { + throw new EncryptedDocumentException("Invalid password specified - use Biff8EncryptionKey.setCurrentUserPassword() before calling extractor"); + } + is = dec.getDataStream(fs); + return createExtractor(OPCPackage.open(is)); + } catch (IOException e) { + throw e; + } catch (Exception e) { + throw new EncryptedDocumentException(e); + } finally { + IOUtils.closeQuietly(is); + + // also close the NPOIFSFileSystem here as we read all the data + // while decrypting + fs.close(); + } + } +} diff --git a/src/ooxml/java/org/apache/poi/ooxml/extractor/POIXMLPropertiesTextExtractor.java b/src/ooxml/java/org/apache/poi/ooxml/extractor/POIXMLPropertiesTextExtractor.java new file mode 100644 index 0000000000..47c37e84b4 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/ooxml/extractor/POIXMLPropertiesTextExtractor.java @@ -0,0 +1,276 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.ooxml.extractor; + +import java.math.BigDecimal; +import java.text.DateFormat; +import java.text.DateFormatSymbols; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.Locale; + +import org.apache.poi.extractor.POITextExtractor; +import org.apache.poi.ooxml.POIXMLDocument; +import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart; +import org.apache.poi.util.LocaleUtil; +import org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperty; + +/** + * A {@link POITextExtractor} for returning the textual + * content of the OOXML file properties, eg author + * and title. + */ +public class POIXMLPropertiesTextExtractor extends POIXMLTextExtractor { + + private final DateFormat dateFormat; + + /** + * Creates a new POIXMLPropertiesTextExtractor for the given open document. + * + * @param doc the given open document + */ + public POIXMLPropertiesTextExtractor(POIXMLDocument doc) { + super(doc); + DateFormatSymbols dfs = DateFormatSymbols.getInstance(Locale.ROOT); + dateFormat = new SimpleDateFormat("EEE MMM dd HH:mm:ss zzz yyyy", dfs); + dateFormat.setTimeZone(LocaleUtil.TIMEZONE_UTC); + } + + /** + * Creates a new POIXMLPropertiesTextExtractor, for the + * same file that another TextExtractor is already + * working on. + * + * @param otherExtractor the extractor referencing the given file + */ + public POIXMLPropertiesTextExtractor(POIXMLTextExtractor otherExtractor) { + this(otherExtractor.getDocument()); + } + + private void appendIfPresent(StringBuilder text, String thing, boolean value) { + appendIfPresent(text, thing, Boolean.toString(value)); + } + + private void appendIfPresent(StringBuilder text, String thing, int value) { + appendIfPresent(text, thing, Integer.toString(value)); + } + + private void appendIfPresent(StringBuilder text, String thing, Date value) { + if (value == null) { + return; + } + appendIfPresent(text, thing, dateFormat.format(value)); + } + + private void appendIfPresent(StringBuilder text, String thing, String value) { + if (value == null) { + return; + } + text.append(thing); + text.append(" = "); + text.append(value); + text.append("\n"); + } + + /** + * Returns the core document properties, eg author + * + * @return the core document properties + */ + @SuppressWarnings("resource") + public String getCorePropertiesText() { + POIXMLDocument document = getDocument(); + if (document == null) { // event based extractor does not have a document + return ""; + } + + StringBuilder text = new StringBuilder(64); + PackagePropertiesPart props = + document.getProperties().getCoreProperties().getUnderlyingProperties(); + + appendIfPresent(text, "Category", props.getCategoryProperty().getValue()); + appendIfPresent(text, "Category", props.getCategoryProperty().getValue()); + appendIfPresent(text, "ContentStatus", props.getContentStatusProperty().getValue()); + appendIfPresent(text, "ContentType", props.getContentTypeProperty().getValue()); + appendIfPresent(text, "Created", props.getCreatedProperty().getValue()); + appendIfPresent(text, "CreatedString", props.getCreatedPropertyString()); + appendIfPresent(text, "Creator", props.getCreatorProperty().getValue()); + appendIfPresent(text, "Description", props.getDescriptionProperty().getValue()); + appendIfPresent(text, "Identifier", props.getIdentifierProperty().getValue()); + appendIfPresent(text, "Keywords", props.getKeywordsProperty().getValue()); + appendIfPresent(text, "Language", props.getLanguageProperty().getValue()); + appendIfPresent(text, "LastModifiedBy", props.getLastModifiedByProperty().getValue()); + appendIfPresent(text, "LastPrinted", props.getLastPrintedProperty().getValue()); + appendIfPresent(text, "LastPrintedString", props.getLastPrintedPropertyString()); + appendIfPresent(text, "Modified", props.getModifiedProperty().getValue()); + appendIfPresent(text, "ModifiedString", props.getModifiedPropertyString()); + appendIfPresent(text, "Revision", props.getRevisionProperty().getValue()); + appendIfPresent(text, "Subject", props.getSubjectProperty().getValue()); + appendIfPresent(text, "Title", props.getTitleProperty().getValue()); + appendIfPresent(text, "Version", props.getVersionProperty().getValue()); + + return text.toString(); + } + + /** + * Returns the extended document properties, eg application + * + * @return the extended document properties + */ + @SuppressWarnings("resource") + public String getExtendedPropertiesText() { + POIXMLDocument document = getDocument(); + if (document == null) { // event based extractor does not have a document + return ""; + } + + StringBuilder text = new StringBuilder(64); + org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties + props = document.getProperties().getExtendedProperties().getUnderlyingProperties(); + + appendIfPresent(text, "Application", props.getApplication()); + appendIfPresent(text, "AppVersion", props.getAppVersion()); + appendIfPresent(text, "Characters", props.getCharacters()); + appendIfPresent(text, "CharactersWithSpaces", props.getCharactersWithSpaces()); + appendIfPresent(text, "Company", props.getCompany()); + appendIfPresent(text, "HyperlinkBase", props.getHyperlinkBase()); + appendIfPresent(text, "HyperlinksChanged", props.getHyperlinksChanged()); + appendIfPresent(text, "Lines", props.getLines()); + appendIfPresent(text, "LinksUpToDate", props.getLinksUpToDate()); + appendIfPresent(text, "Manager", props.getManager()); + appendIfPresent(text, "Pages", props.getPages()); + appendIfPresent(text, "Paragraphs", props.getParagraphs()); + appendIfPresent(text, "PresentationFormat", props.getPresentationFormat()); + appendIfPresent(text, "Template", props.getTemplate()); + appendIfPresent(text, "TotalTime", props.getTotalTime()); + + return text.toString(); + } + + /** + * Returns the custom document properties, if there are any + * + * @return the custom document properties + */ + @SuppressWarnings({"resource"}) + public String getCustomPropertiesText() { + POIXMLDocument document = getDocument(); + if (document == null) { // event based extractor does not have a document + return ""; + } + + StringBuilder text = new StringBuilder(); + org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperties + props = document.getProperties().getCustomProperties().getUnderlyingProperties(); + + for (CTProperty property : props.getPropertyArray()) { + String val = "(not implemented!)"; + + if (property.isSetLpwstr()) { + val = property.getLpwstr(); + } else if (property.isSetLpstr()) { + val = property.getLpstr(); + } else if (property.isSetDate()) { + val = property.getDate().toString(); + } else if (property.isSetFiletime()) { + val = property.getFiletime().toString(); + } else if (property.isSetBool()) { + val = Boolean.toString(property.getBool()); + } + + // Integers + else if (property.isSetI1()) { + val = Integer.toString(property.getI1()); + } else if (property.isSetI2()) { + val = Integer.toString(property.getI2()); + } else if (property.isSetI4()) { + val = Integer.toString(property.getI4()); + } else if (property.isSetI8()) { + val = Long.toString(property.getI8()); + } else if (property.isSetInt()) { + val = Integer.toString(property.getInt()); + } + + // Unsigned Integers + else if (property.isSetUi1()) { + val = Integer.toString(property.getUi1()); + } else if (property.isSetUi2()) { + val = Integer.toString(property.getUi2()); + } else if (property.isSetUi4()) { + val = Long.toString(property.getUi4()); + } else if (property.isSetUi8()) { + val = property.getUi8().toString(); + } else if (property.isSetUint()) { + val = Long.toString(property.getUint()); + } + + // Reals + else if (property.isSetR4()) { + val = Float.toString(property.getR4()); + } else if (property.isSetR8()) { + val = Double.toString(property.getR8()); + } else if (property.isSetDecimal()) { + BigDecimal d = property.getDecimal(); + if (d == null) { + val = null; + } else { + val = d.toPlainString(); + } + } + + /*else if (property.isSetArray()) { + // TODO Fetch the array values and output + } + else if (property.isSetVector()) { + // TODO Fetch the vector values and output + } + + else if (property.isSetBlob() || property.isSetOblob()) { + // TODO Decode, if possible + } + else if (property.isSetStream() || property.isSetOstream() || + property.isSetVstream()) { + // TODO Decode, if possible + } + else if (property.isSetStorage() || property.isSetOstorage()) { + // TODO Decode, if possible + }*/ + + text.append(property.getName()).append(" = ").append(val).append("\n"); + } + + return text.toString(); + } + + @Override + public String getText() { + try { + return + getCorePropertiesText() + + getExtendedPropertiesText() + + getCustomPropertiesText(); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + public POIXMLPropertiesTextExtractor getMetadataTextExtractor() { + throw new IllegalStateException("You already have the Metadata Text Extractor, not recursing!"); + } +} diff --git a/src/ooxml/java/org/apache/poi/ooxml/extractor/POIXMLTextExtractor.java b/src/ooxml/java/org/apache/poi/ooxml/extractor/POIXMLTextExtractor.java new file mode 100644 index 0000000000..ada32a1cc0 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/ooxml/extractor/POIXMLTextExtractor.java @@ -0,0 +1,123 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.ooxml.extractor; + +import java.io.IOException; + +import org.apache.poi.extractor.POITextExtractor; +import org.apache.poi.ooxml.POIXMLDocument; +import org.apache.poi.ooxml.POIXMLProperties.CoreProperties; +import org.apache.poi.ooxml.POIXMLProperties.CustomProperties; +import org.apache.poi.ooxml.POIXMLProperties.ExtendedProperties; +import org.apache.poi.openxml4j.opc.OPCPackage; +import org.apache.poi.openxml4j.util.ZipSecureFile; + +public abstract class POIXMLTextExtractor extends POITextExtractor { + /** The POIXMLDocument that's open */ + private final POIXMLDocument _document; + + /** + * Creates a new text extractor for the given document + * + * @param document the document to extract from + */ + public POIXMLTextExtractor(POIXMLDocument document) { + _document = document; + } + + /** + * Returns the core document properties + * + * @return the core document properties + */ + public CoreProperties getCoreProperties() { + return _document.getProperties().getCoreProperties(); + } + /** + * Returns the extended document properties + * + * @return the extended document properties + */ + public ExtendedProperties getExtendedProperties() { + return _document.getProperties().getExtendedProperties(); + } + /** + * Returns the custom document properties + * + * @return the custom document properties + */ + public CustomProperties getCustomProperties() { + return _document.getProperties().getCustomProperties(); + } + + /** + * Returns opened document + * + * @return the opened document + */ + @Override + public final POIXMLDocument getDocument() { + return _document; + } + + /** + * Returns the opened OPCPackage that contains the document + * + * @return the opened OPCPackage + */ + public OPCPackage getPackage() { + return _document.getPackage(); + } + + /** + * Returns an OOXML properties text extractor for the + * document properties metadata, such as title and author. + */ + @Override + public POIXMLPropertiesTextExtractor getMetadataTextExtractor() { + return new POIXMLPropertiesTextExtractor(_document); + } + + @Override + public void close() throws IOException { + // e.g. XSSFEventBaseExcelExtractor passes a null-document + if(_document != null) { + @SuppressWarnings("resource") + OPCPackage pkg = _document.getPackage(); + if(pkg != null) { + // revert the package to not re-write the file, which is very likely not wanted for a TextExtractor! + pkg.revert(); + } + } + super.close(); + } + + protected void checkMaxTextSize(CharSequence text, String string) { + if(string == null) { + return; + } + + int size = text.length() + string.length(); + if(size > ZipSecureFile.getMaxTextSize()) { + throw new IllegalStateException("The text would exceed the max allowed overall size of extracted text. " + + "By default this is prevented as some documents may exhaust available memory and it may indicate that the file is used to inflate memory usage and thus could pose a security risk. " + + "You can adjust this limit via ZipSecureFile.setMaxTextSize() if you need to work with files which have a lot of text. " + + "Size: " + size + ", limit: MAX_TEXT_SIZE: " + ZipSecureFile.getMaxTextSize()); + } + } +} diff --git a/src/ooxml/java/org/apache/poi/ooxml/util/DocumentHelper.java b/src/ooxml/java/org/apache/poi/ooxml/util/DocumentHelper.java new file mode 100644 index 0000000000..d79237d8ac --- /dev/null +++ b/src/ooxml/java/org/apache/poi/ooxml/util/DocumentHelper.java @@ -0,0 +1,185 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.ooxml.util; + +import java.io.IOException; +import java.io.InputStream; +import java.lang.reflect.Method; + +import javax.xml.XMLConstants; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.stream.events.Namespace; + +import org.apache.poi.util.POILogFactory; +import org.apache.poi.util.POILogger; +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.xml.sax.ErrorHandler; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; +import org.xml.sax.SAXParseException; + +public final class DocumentHelper { + private static POILogger logger = POILogFactory.getLogger(DocumentHelper.class); + + private DocumentHelper() {} + + private static class DocHelperErrorHandler implements ErrorHandler { + + public void warning(SAXParseException exception) throws SAXException { + printError(POILogger.WARN, exception); + } + + public void error(SAXParseException exception) throws SAXException { + printError(POILogger.ERROR, exception); + } + + public void fatalError(SAXParseException exception) throws SAXException { + printError(POILogger.FATAL, exception); + throw exception; + } + + /** Prints the error message. */ + private void printError(int type, SAXParseException ex) { + StringBuilder sb = new StringBuilder(); + + String systemId = ex.getSystemId(); + if (systemId != null) { + int index = systemId.lastIndexOf('/'); + if (index != -1) + systemId = systemId.substring(index + 1); + sb.append(systemId); + } + sb.append(':'); + sb.append(ex.getLineNumber()); + sb.append(':'); + sb.append(ex.getColumnNumber()); + sb.append(": "); + sb.append(ex.getMessage()); + + logger.log(type, sb.toString(), ex); + } + } + + /** + * Creates a new document builder, with sensible defaults + * + * @throws IllegalStateException If creating the DocumentBuilder fails, e.g. + * due to {@link ParserConfigurationException}. + */ + public static synchronized DocumentBuilder newDocumentBuilder() { + try { + DocumentBuilder documentBuilder = documentBuilderFactory.newDocumentBuilder(); + documentBuilder.setEntityResolver(SAXHelper.IGNORING_ENTITY_RESOLVER); + documentBuilder.setErrorHandler(new DocHelperErrorHandler()); + return documentBuilder; + } catch (ParserConfigurationException e) { + throw new IllegalStateException("cannot create a DocumentBuilder", e); + } + } + + private static final DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance(); + static { + documentBuilderFactory.setNamespaceAware(true); + documentBuilderFactory.setValidating(false); + trySetSAXFeature(documentBuilderFactory, XMLConstants.FEATURE_SECURE_PROCESSING, true); + trySetXercesSecurityManager(documentBuilderFactory); + } + + private static void trySetSAXFeature(DocumentBuilderFactory dbf, String feature, boolean enabled) { + try { + dbf.setFeature(feature, enabled); + } catch (Exception e) { + logger.log(POILogger.WARN, "SAX Feature unsupported", feature, e); + } catch (AbstractMethodError ame) { + logger.log(POILogger.WARN, "Cannot set SAX feature because outdated XML parser in classpath", feature, ame); + } + } + + private static void trySetXercesSecurityManager(DocumentBuilderFactory dbf) { + // Try built-in JVM one first, standalone if not + for (String securityManagerClassName : new String[]{ + //"com.sun.org.apache.xerces.internal.util.SecurityManager", + "org.apache.xerces.util.SecurityManager" + }) { + try { + Object mgr = Class.forName(securityManagerClassName).newInstance(); + Method setLimit = mgr.getClass().getMethod("setEntityExpansionLimit", Integer.TYPE); + setLimit.invoke(mgr, 4096); + dbf.setAttribute("http://apache.org/xml/properties/security-manager", mgr); + // Stop once one can be setup without error + return; + } catch (ClassNotFoundException e) { + // continue without log, this is expected in some setups + } catch (Throwable e) { // NOSONAR - also catch things like NoClassDefError here + logger.log(POILogger.WARN, "SAX Security Manager could not be setup", e); + } + } + + // separate old version of Xerces not found => use the builtin way of setting the property + dbf.setAttribute("http://www.oracle.com/xml/jaxp/properties/entityExpansionLimit", 4096); + } + + /** + * Parses the given stream via the default (sensible) + * DocumentBuilder + * @param inp Stream to read the XML data from + * @return the parsed Document + */ + public static Document readDocument(InputStream inp) throws IOException, SAXException { + return newDocumentBuilder().parse(inp); + } + + /** + * Parses the given stream via the default (sensible) + * DocumentBuilder + * @param inp sax source to read the XML data from + * @return the parsed Document + */ + public static Document readDocument(InputSource inp) throws IOException, SAXException { + return newDocumentBuilder().parse(inp); + } + + // must only be used to create empty documents, do not use it for parsing! + private static final DocumentBuilder documentBuilderSingleton = newDocumentBuilder(); + + /** + * Creates a new DOM Document + */ + public static synchronized Document createDocument() { + return documentBuilderSingleton.newDocument(); + } + + /** + * Adds a namespace declaration attribute to the given element. + */ + public static void addNamespaceDeclaration(Element element, String namespacePrefix, String namespaceURI) { + element.setAttributeNS(XMLConstants.XMLNS_ATTRIBUTE_NS_URI, + XMLConstants.XMLNS_ATTRIBUTE + ':' + namespacePrefix, + namespaceURI); + } + + /** + * Adds a namespace declaration attribute to the given element. + */ + public static void addNamespaceDeclaration(Element element, Namespace namespace) { + addNamespaceDeclaration(element, namespace.getPrefix(), namespace.getNamespaceURI()); + } +} diff --git a/src/ooxml/java/org/apache/poi/ooxml/util/IdentifierManager.java b/src/ooxml/java/org/apache/poi/ooxml/util/IdentifierManager.java new file mode 100644 index 0000000000..f367473cc3 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/ooxml/util/IdentifierManager.java @@ -0,0 +1,266 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.ooxml.util; + +import java.util.LinkedList; +import java.util.ListIterator; + +/** + *

+ * 24.08.2009
+ *

+ * + * @author Stefan Stern
+ */ + +public class IdentifierManager { + + public static final long MAX_ID = Long.MAX_VALUE - 1; + + public static final long MIN_ID = 0L; + + /** + * + */ + private final long upperbound; + + /** + * + */ + private final long lowerbound; + + /** + * List of segments of available identifiers + */ + private LinkedList segments; + + /** + * @param lowerbound the lower limit of the id-range to manage. Must be greater than or equal to {@link #MIN_ID}. + * @param upperbound the upper limit of the id-range to manage. Must be less then or equal {@link #MAX_ID}. + */ + public IdentifierManager(long lowerbound, long upperbound) { + if (lowerbound > upperbound) { + throw new IllegalArgumentException("lowerbound must not be greater than upperbound, had " + lowerbound + " and " + upperbound); + } + else if (lowerbound < MIN_ID) { + String message = "lowerbound must be greater than or equal to " + Long.toString(MIN_ID); + throw new IllegalArgumentException(message); + } + else if (upperbound > MAX_ID) { + /* + * while MAX_ID is Long.MAX_VALUE, this check is pointless. But if + * someone subclasses / tweaks the limits, this check is fine. + */ + throw new IllegalArgumentException("upperbound must be less than or equal to " + Long.toString(MAX_ID) + " but had " + upperbound); + } + this.lowerbound = lowerbound; + this.upperbound = upperbound; + this.segments = new LinkedList<>(); + segments.add(new Segment(lowerbound, upperbound)); + } + + public long reserve(long id) { + if (id < lowerbound || id > upperbound) { + throw new IllegalArgumentException("Value for parameter 'id' was out of bounds, had " + id + ", but should be within [" + lowerbound + ":" + upperbound + "]"); + } + verifyIdentifiersLeft(); + + if (id == upperbound) { + Segment lastSegment = segments.getLast(); + if (lastSegment.end == upperbound) { + lastSegment.end = upperbound - 1; + if (lastSegment.start > lastSegment.end) { + segments.removeLast(); + } + return id; + } + return reserveNew(); + } + + if (id == lowerbound) { + Segment firstSegment = segments.getFirst(); + if (firstSegment.start == lowerbound) { + firstSegment.start = lowerbound + 1; + if (firstSegment.end < firstSegment.start) { + segments.removeFirst(); + } + return id; + } + return reserveNew(); + } + + ListIterator iter = segments.listIterator(); + while (iter.hasNext()) { + Segment segment = iter.next(); + if (segment.end < id) { + continue; + } + else if (segment.start > id) { + break; + } + else if (segment.start == id) { + segment.start = id + 1; + if (segment.end < segment.start) { + iter.remove(); + } + return id; + } + else if (segment.end == id) { + segment.end = id - 1; + if (segment.start > segment.end) { + iter.remove(); + } + return id; + } + else { + iter.add(new Segment(id + 1, segment.end)); + segment.end = id - 1; + return id; + } + } + return reserveNew(); + } + + /** + * @return a new identifier. + * @throws IllegalStateException if no more identifiers are available, then an Exception is raised. + */ + public long reserveNew() { + verifyIdentifiersLeft(); + Segment segment = segments.getFirst(); + long result = segment.start; + segment.start += 1; + if (segment.start > segment.end) { + segments.removeFirst(); + } + return result; + } + + /** + * @param id + * the identifier to release. Must be greater than or equal to + * {@link #lowerbound} and must be less than or equal to {@link #upperbound} + * @return true, if the identifier was reserved and has been successfully + * released, false, if the identifier was not reserved. + */ + public boolean release(long id) { + if (id < lowerbound || id > upperbound) { + throw new IllegalArgumentException("Value for parameter 'id' was out of bounds, had " + id + ", but should be within [" + lowerbound + ":" + upperbound + "]"); + } + + if (id == upperbound) { + Segment lastSegment = segments.getLast(); + if (lastSegment.end == upperbound - 1) { + lastSegment.end = upperbound; + return true; + } else if (lastSegment.end == upperbound) { + return false; + } else { + segments.add(new Segment(upperbound, upperbound)); + return true; + } + } + + if (id == lowerbound) { + Segment firstSegment = segments.getFirst(); + if (firstSegment.start == lowerbound + 1) { + firstSegment.start = lowerbound; + return true; + } else if (firstSegment.start == lowerbound) { + return false; + } else { + segments.addFirst(new Segment(lowerbound, lowerbound)); + return true; + } + } + + long higher = id + 1; + long lower = id - 1; + ListIterator iter = segments.listIterator(); + + while (iter.hasNext()) { + Segment segment = iter.next(); + if (segment.end < lower) { + continue; + } + if (segment.start > higher) { + iter.previous(); + iter.add(new Segment(id, id)); + return true; + } + if (segment.start == higher) { + segment.start = id; + return true; + } + else if (segment.end == lower) { + segment.end = id; + /* check if releasing this elements glues two segments into one */ + if (iter.hasNext()) { + Segment next = iter.next(); + if (next.start == segment.end + 1) { + segment.end = next.end; + iter.remove(); + } + } + return true; + } + else { + /* id was not reserved, return false */ + break; + } + } + return false; + } + + public long getRemainingIdentifiers() { + long result = 0; + for (Segment segment : segments) { + result = result - segment.start; + result = result + segment.end + 1; + } + return result; + } + + /** + * + */ + private void verifyIdentifiersLeft() { + if (segments.isEmpty()) { + throw new IllegalStateException("No identifiers left"); + } + } + + private static class Segment { + + public Segment(long start, long end) { + this.start = start; + this.end = end; + } + + public long start; + public long end; + + /* + * (non-Javadoc) + * + * @see java.lang.Object#toString() + */ + public String toString() { + return "[" + start + "; " + end + "]"; + } + } +} diff --git a/src/ooxml/java/org/apache/poi/ooxml/util/PackageHelper.java b/src/ooxml/java/org/apache/poi/ooxml/util/PackageHelper.java new file mode 100644 index 0000000000..1385848428 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/ooxml/util/PackageHelper.java @@ -0,0 +1,137 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.ooxml.util; + +import org.apache.poi.openxml4j.opc.*; +import org.apache.poi.openxml4j.opc.OPCPackage; +import org.apache.poi.openxml4j.exceptions.OpenXML4JException; +import org.apache.poi.openxml4j.exceptions.InvalidFormatException; +import org.apache.poi.ooxml.POIXMLException; +import org.apache.poi.util.IOUtils; + +import java.io.*; +import java.net.URI; + +/** + * Provides handy methods to work with OOXML packages + */ +public final class PackageHelper { + + public static OPCPackage open(InputStream is) throws IOException { + try { + return OPCPackage.open(is); + } catch (InvalidFormatException e){ + throw new POIXMLException(e); + } + } + + /** + * Clone the specified package. + * + * @param pkg the package to clone + * @param file the destination file + * @return the cloned package + */ + public static OPCPackage clone(OPCPackage pkg, File file) throws OpenXML4JException, IOException { + + String path = file.getAbsolutePath(); + + OPCPackage dest = OPCPackage.create(path); + PackageRelationshipCollection rels = pkg.getRelationships(); + for (PackageRelationship rel : rels) { + PackagePart part = pkg.getPart(rel); + PackagePart part_tgt; + if (rel.getRelationshipType().equals(PackageRelationshipTypes.CORE_PROPERTIES)) { + copyProperties(pkg.getPackageProperties(), dest.getPackageProperties()); + continue; + } + dest.addRelationship(part.getPartName(), rel.getTargetMode(), rel.getRelationshipType()); + part_tgt = dest.createPart(part.getPartName(), part.getContentType()); + + OutputStream out = part_tgt.getOutputStream(); + IOUtils.copy(part.getInputStream(), out); + out.close(); + + if(part.hasRelationships()) { + copy(pkg, part, dest, part_tgt); + } + } + dest.close(); + + //the temp file will be deleted when JVM terminates + new File(path).deleteOnExit(); + return OPCPackage.open(path); + } + + /** + * Recursively copy package parts to the destination package + */ + private static void copy(OPCPackage pkg, PackagePart part, OPCPackage tgt, PackagePart part_tgt) throws OpenXML4JException, IOException { + PackageRelationshipCollection rels = part.getRelationships(); + if(rels != null) for (PackageRelationship rel : rels) { + PackagePart p; + if(rel.getTargetMode() == TargetMode.EXTERNAL){ + part_tgt.addExternalRelationship(rel.getTargetURI().toString(), rel.getRelationshipType(), rel.getId()); + //external relations don't have associated package parts + continue; + } + URI uri = rel.getTargetURI(); + + if(uri.getRawFragment() != null) { + part_tgt.addRelationship(uri, rel.getTargetMode(), rel.getRelationshipType(), rel.getId()); + continue; + } + PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI()); + p = pkg.getPart(relName); + part_tgt.addRelationship(p.getPartName(), rel.getTargetMode(), rel.getRelationshipType(), rel.getId()); + + + + + PackagePart dest; + if(!tgt.containPart(p.getPartName())){ + dest = tgt.createPart(p.getPartName(), p.getContentType()); + OutputStream out = dest.getOutputStream(); + IOUtils.copy(p.getInputStream(), out); + out.close(); + copy(pkg, p, tgt, dest); + } + } + } + + /** + * Copy core package properties + * + * @param src source properties + * @param tgt target properties + */ + private static void copyProperties(PackageProperties src, PackageProperties tgt){ + tgt.setCategoryProperty(src.getCategoryProperty().getValue()); + tgt.setContentStatusProperty(src.getContentStatusProperty().getValue()); + tgt.setContentTypeProperty(src.getContentTypeProperty().getValue()); + tgt.setCreatorProperty(src.getCreatorProperty().getValue()); + tgt.setDescriptionProperty(src.getDescriptionProperty().getValue()); + tgt.setIdentifierProperty(src.getIdentifierProperty().getValue()); + tgt.setKeywordsProperty(src.getKeywordsProperty().getValue()); + tgt.setLanguageProperty(src.getLanguageProperty().getValue()); + tgt.setRevisionProperty(src.getRevisionProperty().getValue()); + tgt.setSubjectProperty(src.getSubjectProperty().getValue()); + tgt.setTitleProperty(src.getTitleProperty().getValue()); + tgt.setVersionProperty(src.getVersionProperty().getValue()); + } +} diff --git a/src/ooxml/java/org/apache/poi/ooxml/util/SAXHelper.java b/src/ooxml/java/org/apache/poi/ooxml/util/SAXHelper.java new file mode 100644 index 0000000000..630e5540ab --- /dev/null +++ b/src/ooxml/java/org/apache/poi/ooxml/util/SAXHelper.java @@ -0,0 +1,129 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.ooxml.util; + +import java.io.IOException; +import java.io.StringReader; +import java.lang.reflect.Method; +import java.util.concurrent.TimeUnit; + +import javax.xml.XMLConstants; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.parsers.SAXParserFactory; + +import org.apache.poi.util.POILogFactory; +import org.apache.poi.util.POILogger; +import org.xml.sax.EntityResolver; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; +import org.xml.sax.XMLReader; + + +/** + * Provides handy methods for working with SAX parsers and readers + */ +public final class SAXHelper { + private static final POILogger logger = POILogFactory.getLogger(SAXHelper.class); + private static long lastLog; + + private SAXHelper() {} + + /** + * Creates a new SAX XMLReader, with sensible defaults + */ + public static synchronized XMLReader newXMLReader() throws SAXException, ParserConfigurationException { + XMLReader xmlReader = saxFactory.newSAXParser().getXMLReader(); + xmlReader.setEntityResolver(IGNORING_ENTITY_RESOLVER); + trySetSAXFeature(xmlReader, XMLConstants.FEATURE_SECURE_PROCESSING); + trySetXercesSecurityManager(xmlReader); + return xmlReader; + } + + static final EntityResolver IGNORING_ENTITY_RESOLVER = new EntityResolver() { + @Override + public InputSource resolveEntity(String publicId, String systemId) + throws SAXException, IOException { + return new InputSource(new StringReader("")); + } + }; + + private static final SAXParserFactory saxFactory; + static { + try { + saxFactory = SAXParserFactory.newInstance(); + saxFactory.setValidating(false); + saxFactory.setNamespaceAware(true); + } catch (RuntimeException | Error re) { + // this also catches NoClassDefFoundError, which may be due to a local class path issue + // This may occur if the code is run inside a web container + // or a restricted JVM + // See bug 61170: https://bz.apache.org/bugzilla/show_bug.cgi?id=61170 + logger.log(POILogger.WARN, "Failed to create SAXParserFactory", re); + throw re; + } catch (Exception e) { + logger.log(POILogger.WARN, "Failed to create SAXParserFactory", e); + throw new RuntimeException("Failed to create SAXParserFactory", e); + } + } + + private static void trySetSAXFeature(XMLReader xmlReader, String feature) { + try { + xmlReader.setFeature(feature, true); + } catch (Exception e) { + logger.log(POILogger.WARN, "SAX Feature unsupported", feature, e); + } catch (AbstractMethodError ame) { + logger.log(POILogger.WARN, "Cannot set SAX feature because outdated XML parser in classpath", feature, ame); + } + } + + private static void trySetXercesSecurityManager(XMLReader xmlReader) { + // Try built-in JVM one first, standalone if not + for (String securityManagerClassName : new String[] { + //"com.sun.org.apache.xerces.internal.util.SecurityManager", + "org.apache.xerces.util.SecurityManager" + }) { + try { + Object mgr = Class.forName(securityManagerClassName).newInstance(); + Method setLimit = mgr.getClass().getMethod("setEntityExpansionLimit", Integer.TYPE); + setLimit.invoke(mgr, 4096); + xmlReader.setProperty("http://apache.org/xml/properties/security-manager", mgr); + // Stop once one can be setup without error + return; + } catch (ClassNotFoundException e) { + // continue without log, this is expected in some setups + } catch (Throwable e) { // NOSONAR - also catch things like NoClassDefError here + // throttle the log somewhat as it can spam the log otherwise + if(System.currentTimeMillis() > lastLog + TimeUnit.MINUTES.toMillis(5)) { + logger.log(POILogger.WARN, "SAX Security Manager could not be setup [log suppressed for 5 minutes]", e); + lastLog = System.currentTimeMillis(); + } + } + } + + // separate old version of Xerces not found => use the builtin way of setting the property + try { + xmlReader.setProperty("http://www.oracle.com/xml/jaxp/properties/entityExpansionLimit", 4096); + } catch (SAXException e) { // NOSONAR - also catch things like NoClassDefError here + // throttle the log somewhat as it can spam the log otherwise + if(System.currentTimeMillis() > lastLog + TimeUnit.MINUTES.toMillis(5)) { + logger.log(POILogger.WARN, "SAX Security Manager could not be setup [log suppressed for 5 minutes]", e); + lastLog = System.currentTimeMillis(); + } + } + } +} diff --git a/src/ooxml/java/org/apache/poi/ss/extractor/EmbeddedData.java b/src/ooxml/java/org/apache/poi/ss/extractor/EmbeddedData.java deleted file mode 100644 index 0e598b3175..0000000000 --- a/src/ooxml/java/org/apache/poi/ss/extractor/EmbeddedData.java +++ /dev/null @@ -1,104 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ - -package org.apache.poi.ss.extractor; - -import org.apache.poi.ss.usermodel.Shape; - -/** - * A collection of embedded object informations and content - */ -public class EmbeddedData { - private String filename; - private byte[] embeddedData; - private Shape shape; - private String contentType = "binary/octet-stream"; - - public EmbeddedData(String filename, byte[] embeddedData, String contentType) { - setFilename(filename); - setEmbeddedData(embeddedData); - setContentType(contentType); - } - - /** - * @return the filename - */ - public String getFilename() { - return filename; - } - - /** - * Sets the filename - * - * @param filename the filename - */ - public void setFilename(String filename) { - if (filename == null) { - this.filename = "unknown.bin"; - } else { - this.filename = filename.replaceAll("[^/\\\\]*[/\\\\]", "").trim(); - } - } - - /** - * @return the embedded object byte array - */ - public byte[] getEmbeddedData() { - return embeddedData; - } - - /** - * Sets the embedded object as byte array - * - * @param embeddedData the embedded object byte array - */ - public void setEmbeddedData(byte[] embeddedData) { - this.embeddedData = (embeddedData == null) ? null : embeddedData.clone(); - } - - /** - * @return the shape which links to the embedded object - */ - public Shape getShape() { - return shape; - } - - /** - * Sets the shape which links to the embedded object - * - * @param shape the shape - */ - public void setShape(Shape shape) { - this.shape = shape; - } - - /** - * @return the content-/mime-type of the embedded object, the default (if unknown) is {@code binary/octet-stream} - */ - public String getContentType() { - return contentType; - } - - /** - * Sets the content-/mime-type - * - * @param contentType the content-type - */ - public void setContentType(String contentType) { - this.contentType = contentType; - } -} \ No newline at end of file diff --git a/src/ooxml/java/org/apache/poi/ss/extractor/EmbeddedExtractor.java b/src/ooxml/java/org/apache/poi/ss/extractor/EmbeddedExtractor.java deleted file mode 100644 index 8ea6df28a6..0000000000 --- a/src/ooxml/java/org/apache/poi/ss/extractor/EmbeddedExtractor.java +++ /dev/null @@ -1,410 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ - -package org.apache.poi.ss.extractor; - -import static org.apache.poi.util.StringUtil.endsWithIgnoreCase; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; - -import org.apache.poi.hpsf.ClassID; -import org.apache.poi.hpsf.ClassIDPredefined; -import org.apache.poi.poifs.filesystem.DirectoryNode; -import org.apache.poi.poifs.filesystem.DocumentInputStream; -import org.apache.poi.poifs.filesystem.Entry; -import org.apache.poi.poifs.filesystem.Ole10Native; -import org.apache.poi.poifs.filesystem.Ole10NativeException; -import org.apache.poi.poifs.filesystem.POIFSFileSystem; -import org.apache.poi.ss.usermodel.Drawing; -import org.apache.poi.ss.usermodel.ObjectData; -import org.apache.poi.ss.usermodel.Picture; -import org.apache.poi.ss.usermodel.PictureData; -import org.apache.poi.ss.usermodel.Shape; -import org.apache.poi.ss.usermodel.ShapeContainer; -import org.apache.poi.ss.usermodel.Sheet; -import org.apache.poi.ss.usermodel.Workbook; -import org.apache.poi.util.Beta; -import org.apache.poi.util.IOUtils; -import org.apache.poi.util.LocaleUtil; -import org.apache.poi.util.POILogFactory; -import org.apache.poi.util.POILogger; -import org.apache.poi.xssf.usermodel.XSSFObjectData; - -/** - * This extractor class tries to identify various embedded documents within Excel files - * and provide them via a common interface, i.e. the EmbeddedData instances - */ -@Beta -public class EmbeddedExtractor implements Iterable { - private static final POILogger LOG = POILogFactory.getLogger(EmbeddedExtractor.class); - //arbitrarily selected; may need to increase - private static final int MAX_RECORD_LENGTH = 1_000_000; - - // contentType - private static final String CONTENT_TYPE_BYTES = "binary/octet-stream"; - private static final String CONTENT_TYPE_PDF = "application/pdf"; - private static final String CONTENT_TYPE_DOC = "application/msword"; - private static final String CONTENT_TYPE_XLS = "application/vnd.ms-excel"; - - /** - * @return the list of known extractors, if you provide custom extractors, override this method - */ - @Override - public Iterator iterator() { - EmbeddedExtractor[] ee = { - new Ole10Extractor(), new PdfExtractor(), new BiffExtractor(), new OOXMLExtractor(), new FsExtractor() - }; - return Arrays.asList(ee).iterator(); - } - - public EmbeddedData extractOne(DirectoryNode src) throws IOException { - for (EmbeddedExtractor ee : this) { - if (ee.canExtract(src)) { - return ee.extract(src); - } - } - return null; - } - - public EmbeddedData extractOne(Picture src) throws IOException { - for (EmbeddedExtractor ee : this) { - if (ee.canExtract(src)) { - return ee.extract(src); - } - } - return null; - } - - public List extractAll(Sheet sheet) throws IOException { - Drawing patriarch = sheet.getDrawingPatriarch(); - if (null == patriarch){ - return Collections.emptyList(); - } - List embeddings = new ArrayList<>(); - extractAll(patriarch, embeddings); - return embeddings; - } - - protected void extractAll(ShapeContainer parent, List embeddings) throws IOException { - for (Shape shape : parent) { - EmbeddedData data = null; - if (shape instanceof ObjectData) { - ObjectData od = (ObjectData)shape; - try { - if (od.hasDirectoryEntry()) { - data = extractOne((DirectoryNode)od.getDirectory()); - } else { - String contentType = CONTENT_TYPE_BYTES; - if (od instanceof XSSFObjectData) { - contentType = ((XSSFObjectData)od).getObjectPart().getContentType(); - } - data = new EmbeddedData(od.getFileName(), od.getObjectData(), contentType); - } - } catch (Exception e) { - LOG.log(POILogger.WARN, "Entry not found / readable - ignoring OLE embedding", e); - } - } else if (shape instanceof Picture) { - data = extractOne((Picture)shape); - } else if (shape instanceof ShapeContainer) { - extractAll((ShapeContainer)shape, embeddings); - } - - if (data == null) { - continue; - } - - data.setShape(shape); - String filename = data.getFilename(); - String extension = (filename == null || filename.lastIndexOf('.') == -1) ? ".bin" : filename.substring(filename.lastIndexOf('.')); - - // try to find an alternative name - if (filename == null || filename.isEmpty() || filename.startsWith("MBD") || filename.startsWith("Root Entry")) { - filename = shape.getShapeName(); - if (filename != null) { - filename += extension; - } - } - // default to dummy name - if (filename == null || filename.isEmpty()) { - filename = "picture_" + embeddings.size() + extension; - } - filename = filename.trim(); - data.setFilename(filename); - - embeddings.add(data); - } - } - - - public boolean canExtract(DirectoryNode source) { - return false; - } - - public boolean canExtract(Picture source) { - return false; - } - - protected EmbeddedData extract(DirectoryNode dn) throws IOException { - assert(canExtract(dn)); - ByteArrayOutputStream bos = new ByteArrayOutputStream(20000); - try (POIFSFileSystem dest = new POIFSFileSystem()) { - copyNodes(dn, dest.getRoot()); - // start with a reasonable big size - dest.writeFilesystem(bos); - } - - return new EmbeddedData(dn.getName(), bos.toByteArray(), CONTENT_TYPE_BYTES); - } - - protected EmbeddedData extract(Picture source) throws IOException { - return null; - } - - public static class Ole10Extractor extends EmbeddedExtractor { - @Override - public boolean canExtract(DirectoryNode dn) { - ClassID clsId = dn.getStorageClsid(); - return ClassIDPredefined.lookup(clsId) == ClassIDPredefined.OLE_V1_PACKAGE; - } - - @Override - public EmbeddedData extract(DirectoryNode dn) throws IOException { - try { - // TODO: inspect the CompObj record for more details, i.e. the content type - Ole10Native ole10 = Ole10Native.createFromEmbeddedOleObject(dn); - return new EmbeddedData(ole10.getFileName(), ole10.getDataBuffer(), CONTENT_TYPE_BYTES); - } catch (Ole10NativeException e) { - throw new IOException(e); - } - } - } - - static class PdfExtractor extends EmbeddedExtractor { - static ClassID PdfClassID = new ClassID("{B801CA65-A1FC-11D0-85AD-444553540000}"); - @Override - public boolean canExtract(DirectoryNode dn) { - ClassID clsId = dn.getStorageClsid(); - return (PdfClassID.equals(clsId) || dn.hasEntry("CONTENTS")); - } - - @Override - public EmbeddedData extract(DirectoryNode dn) throws IOException { - try(ByteArrayOutputStream bos = new ByteArrayOutputStream(); - InputStream is = dn.createDocumentInputStream("CONTENTS")) { - IOUtils.copy(is, bos); - return new EmbeddedData(dn.getName() + ".pdf", bos.toByteArray(), CONTENT_TYPE_PDF); - } - } - - @Override - public boolean canExtract(Picture source) { - PictureData pd = source.getPictureData(); - return (pd != null && pd.getPictureType() == Workbook.PICTURE_TYPE_EMF); - } - - /** - * Mac Office encodes embedded objects inside the picture, e.g. PDF is part of an EMF. - * If an embedded stream is inside an EMF picture, this method extracts the payload. - * - * @return the embedded data in an EMF picture or null if none is found - */ - @Override - protected EmbeddedData extract(Picture source) throws IOException { - // check for emf+ embedded pdf (poor mans style :( ) - // Mac Excel 2011 embeds pdf files with this method. - PictureData pd = source.getPictureData(); - if (pd == null || pd.getPictureType() != Workbook.PICTURE_TYPE_EMF) { - return null; - } - - // TODO: investigate if this is just an EMF-hack or if other formats are also embedded in EMF - byte pictureBytes[] = pd.getData(); - int idxStart = indexOf(pictureBytes, 0, "%PDF-".getBytes(LocaleUtil.CHARSET_1252)); - if (idxStart == -1) { - return null; - } - - int idxEnd = indexOf(pictureBytes, idxStart, "%%EOF".getBytes(LocaleUtil.CHARSET_1252)); - if (idxEnd == -1) { - return null; - } - - int pictureBytesLen = idxEnd-idxStart+6; - byte[] pdfBytes = IOUtils.safelyAllocate(pictureBytesLen, MAX_RECORD_LENGTH); - System.arraycopy(pictureBytes, idxStart, pdfBytes, 0, pictureBytesLen); - String filename = source.getShapeName().trim(); - if (!endsWithIgnoreCase(filename, ".pdf")) { - filename += ".pdf"; - } - return new EmbeddedData(filename, pdfBytes, CONTENT_TYPE_PDF); - } - - - } - - static class OOXMLExtractor extends EmbeddedExtractor { - @Override - public boolean canExtract(DirectoryNode dn) { - return dn.hasEntry("package"); - } - - @Override - public EmbeddedData extract(DirectoryNode dn) throws IOException { - - ClassIDPredefined clsId = ClassIDPredefined.lookup(dn.getStorageClsid()); - - String contentType = null; - String ext = null; - - if (clsId != null) { - contentType = clsId.getContentType(); - ext = clsId.getFileExtension(); - } - - if (contentType == null || ext == null) { - contentType = "application/zip"; - ext = ".zip"; - } - - DocumentInputStream dis = dn.createDocumentInputStream("package"); - byte data[] = IOUtils.toByteArray(dis); - dis.close(); - - return new EmbeddedData(dn.getName()+ext, data, contentType); - } - } - - static class BiffExtractor extends EmbeddedExtractor { - @Override - public boolean canExtract(DirectoryNode dn) { - return canExtractExcel(dn) || canExtractWord(dn); - } - - protected boolean canExtractExcel(DirectoryNode dn) { - ClassIDPredefined clsId = ClassIDPredefined.lookup(dn.getStorageClsid()); - return (ClassIDPredefined.EXCEL_V7 == clsId - || ClassIDPredefined.EXCEL_V8 == clsId - || dn.hasEntry("Workbook") /*...*/); - } - - protected boolean canExtractWord(DirectoryNode dn) { - ClassIDPredefined clsId = ClassIDPredefined.lookup(dn.getStorageClsid()); - return (ClassIDPredefined.WORD_V7 == clsId - || ClassIDPredefined.WORD_V8 == clsId - || dn.hasEntry("WordDocument")); - } - - @Override - public EmbeddedData extract(DirectoryNode dn) throws IOException { - EmbeddedData ed = super.extract(dn); - if (canExtractExcel(dn)) { - ed.setFilename(dn.getName() + ".xls"); - ed.setContentType(CONTENT_TYPE_XLS); - } else if (canExtractWord(dn)) { - ed.setFilename(dn.getName() + ".doc"); - ed.setContentType(CONTENT_TYPE_DOC); - } - - return ed; - } - } - - static class FsExtractor extends EmbeddedExtractor { - @Override - public boolean canExtract(DirectoryNode dn) { - return true; - } - @Override - public EmbeddedData extract(DirectoryNode dn) throws IOException { - EmbeddedData ed = super.extract(dn); - ed.setFilename(dn.getName() + ".ole"); - // TODO: read the content type from CombObj stream - return ed; - } - } - - protected static void copyNodes(DirectoryNode src, DirectoryNode dest) throws IOException { - for (Entry e : src) { - if (e instanceof DirectoryNode) { - DirectoryNode srcDir = (DirectoryNode)e; - DirectoryNode destDir = (DirectoryNode)dest.createDirectory(srcDir.getName()); - destDir.setStorageClsid(srcDir.getStorageClsid()); - copyNodes(srcDir, destDir); - } else { - try (InputStream is = src.createDocumentInputStream(e)) { - dest.createDocument(e.getName(), is); - } - } - } - } - - - - /** - * Knuth-Morris-Pratt Algorithm for Pattern Matching - * Finds the first occurrence of the pattern in the text. - */ - private static int indexOf(byte[] data, int offset, byte[] pattern) { - int[] failure = computeFailure(pattern); - - int j = 0; - if (data.length == 0) { - return -1; - } - - for (int i = offset; i < data.length; i++) { - while (j > 0 && pattern[j] != data[i]) { - j = failure[j - 1]; - } - if (pattern[j] == data[i]) { j++; } - if (j == pattern.length) { - return i - pattern.length + 1; - } - } - return -1; - } - - /** - * Computes the failure function using a boot-strapping process, - * where the pattern is matched against itself. - */ - private static int[] computeFailure(byte[] pattern) { - int[] failure = new int[pattern.length]; - - int j = 0; - for (int i = 1; i < pattern.length; i++) { - while (j > 0 && pattern[j] != pattern[i]) { - j = failure[j - 1]; - } - if (pattern[j] == pattern[i]) { - j++; - } - failure[i] = j; - } - - return failure; - } - - -} diff --git a/src/ooxml/java/org/apache/poi/ss/usermodel/WorkbookFactory.java b/src/ooxml/java/org/apache/poi/ss/usermodel/WorkbookFactory.java deleted file mode 100644 index 1a4c2cb1d5..0000000000 --- a/src/ooxml/java/org/apache/poi/ss/usermodel/WorkbookFactory.java +++ /dev/null @@ -1,275 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi.ss.usermodel; - -import java.io.BufferedInputStream; -import java.io.File; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.InputStream; - -import org.apache.poi.EmptyFileException; -import org.apache.poi.EncryptedDocumentException; -import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey; -import org.apache.poi.hssf.usermodel.HSSFWorkbook; -import org.apache.poi.openxml4j.exceptions.InvalidFormatException; -import org.apache.poi.openxml4j.opc.OPCPackage; -import org.apache.poi.openxml4j.opc.PackageAccess; -import org.apache.poi.poifs.crypt.Decryptor; -import org.apache.poi.poifs.filesystem.DirectoryNode; -import org.apache.poi.poifs.filesystem.DocumentFactoryHelper; -import org.apache.poi.poifs.filesystem.FileMagic; -import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; -import org.apache.poi.poifs.filesystem.OfficeXmlFileException; -import org.apache.poi.poifs.filesystem.POIFSFileSystem; -import org.apache.poi.util.IOUtils; -import org.apache.poi.xssf.usermodel.XSSFWorkbook; - -/** - * Factory for creating the appropriate kind of Workbook - * (be it {@link HSSFWorkbook} or {@link XSSFWorkbook}), - * by auto-detecting from the supplied input. - */ -public class WorkbookFactory { - /** - * Creates a HSSFWorkbook from the given POIFSFileSystem - *

Note that in order to properly release resources the - * Workbook should be closed after use. - */ - public static Workbook create(POIFSFileSystem fs) throws IOException { - return new HSSFWorkbook(fs); - } - - /** - * Creates a HSSFWorkbook from the given NPOIFSFileSystem - *

Note that in order to properly release resources the - * Workbook should be closed after use. - */ - public static Workbook create(NPOIFSFileSystem fs) throws IOException { - try { - return create(fs, null); - } catch (InvalidFormatException e) { - // Special case of OOXML-in-POIFS which is broken - throw new IOException(e); - } - } - - /** - * Creates a Workbook from the given NPOIFSFileSystem, which may - * be password protected - * - * @param fs The {@link NPOIFSFileSystem} to read the document from - * @param password The password that should be used or null if no password is necessary. - * - * @return The created Workbook - * - * @throws IOException if an error occurs while reading the data - * @throws InvalidFormatException if the contents of the file cannot be parsed into a {@link Workbook} - */ - private static Workbook create(final NPOIFSFileSystem fs, String password) throws IOException, InvalidFormatException { - DirectoryNode root = fs.getRoot(); - - // Encrypted OOXML files go inside OLE2 containers, is this one? - if (root.hasEntry(Decryptor.DEFAULT_POIFS_ENTRY)) { - InputStream stream = DocumentFactoryHelper.getDecryptedStream(fs, password); - - OPCPackage pkg = OPCPackage.open(stream); - return create(pkg); - } - - // If we get here, it isn't an encrypted XLSX file - // So, treat it as a regular HSSF XLS one - boolean passwordSet = false; - if (password != null) { - Biff8EncryptionKey.setCurrentUserPassword(password); - passwordSet = true; - } - try { - return new HSSFWorkbook(root, true); - } finally { - if (passwordSet) { - Biff8EncryptionKey.setCurrentUserPassword(null); - } - } - } - - /** - * Creates a XSSFWorkbook from the given OOXML Package - * - *

Note that in order to properly release resources the - * Workbook should be closed after use.

- * - * @param pkg The {@link OPCPackage} opened for reading data. - * - * @return The created Workbook - * - * @throws IOException if an error occurs while reading the data - */ - public static Workbook create(OPCPackage pkg) throws IOException { - return new XSSFWorkbook(pkg); - } - - /** - * Creates the appropriate HSSFWorkbook / XSSFWorkbook from - * the given InputStream. - * - *

Your input stream MUST either support mark/reset, or - * be wrapped as a {@link BufferedInputStream}! Note that - * using an {@link InputStream} has a higher memory footprint - * than using a {@link File}.

- * - *

Note that in order to properly release resources the - * Workbook should be closed after use. Note also that loading - * from an InputStream requires more memory than loading - * from a File, so prefer {@link #create(File)} where possible. - * - * @param inp The {@link InputStream} to read data from. - * - * @return The created Workbook - * - * @throws IOException if an error occurs while reading the data - * @throws InvalidFormatException if the contents of the file cannot be parsed into a {@link Workbook} - * @throws EncryptedDocumentException If the workbook given is password protected - */ - public static Workbook create(InputStream inp) throws IOException, InvalidFormatException, EncryptedDocumentException { - return create(inp, null); - } - - /** - * Creates the appropriate HSSFWorkbook / XSSFWorkbook from - * the given InputStream, which may be password protected.

- * - * Note that using an {@link InputStream} has a higher memory footprint - * than using a {@link File}.

- * - * Note that in order to properly release resources the - * Workbook should be closed after use. Note also that loading - * from an InputStream requires more memory than loading - * from a File, so prefer {@link #create(File)} where possible. - * - * @param inp The {@link InputStream} to read data from. - * @param password The password that should be used or null if no password is necessary. - * - * @return The created Workbook - * - * @throws IOException if an error occurs while reading the data - * @throws InvalidFormatException if the contents of the file cannot be parsed into a {@link Workbook} - * @throws EncryptedDocumentException If the wrong password is given for a protected file - * @throws EmptyFileException If an empty stream is given - */ - public static Workbook create(InputStream inp, String password) throws IOException, InvalidFormatException, EncryptedDocumentException { - InputStream is = FileMagic.prepareToCheckMagic(inp); - - FileMagic fm = FileMagic.valueOf(is); - - switch (fm) { - case OLE2: - NPOIFSFileSystem fs = new NPOIFSFileSystem(is); - return create(fs, password); - case OOXML: - return new XSSFWorkbook(OPCPackage.open(is)); - default: - throw new InvalidFormatException("Your InputStream was neither an OLE2 stream, nor an OOXML stream"); - } - } - - /** - * Creates the appropriate HSSFWorkbook / XSSFWorkbook from - * the given File, which must exist and be readable. - *

Note that in order to properly release resources the - * Workbook should be closed after use. - * - * @param file The file to read data from. - * - * @return The created Workbook - * - * @throws IOException if an error occurs while reading the data - * @throws InvalidFormatException if the contents of the file cannot be parsed into a {@link Workbook} - * @throws EncryptedDocumentException If the workbook given is password protected - */ - public static Workbook create(File file) throws IOException, InvalidFormatException, EncryptedDocumentException { - return create(file, null); - } - - /** - * Creates the appropriate HSSFWorkbook / XSSFWorkbook from - * the given File, which must exist and be readable, and - * may be password protected - *

Note that in order to properly release resources the - * Workbook should be closed after use. - * - * @param file The file to read data from. - * @param password The password that should be used or null if no password is necessary. - * - * @return The created Workbook - * - * @throws IOException if an error occurs while reading the data - * @throws InvalidFormatException if the contents of the file cannot be parsed into a {@link Workbook} - * @throws EncryptedDocumentException If the wrong password is given for a protected file - * @throws EmptyFileException If an empty stream is given - */ - public static Workbook create(File file, String password) throws IOException, InvalidFormatException, EncryptedDocumentException { - return create(file, password, false); - } - - /** - * Creates the appropriate HSSFWorkbook / XSSFWorkbook from - * the given File, which must exist and be readable, and - * may be password protected - *

Note that in order to properly release resources the - * Workbook should be closed after use. - * - * @param file The file to read data from. - * @param password The password that should be used or null if no password is necessary. - * @param readOnly If the Workbook should be opened in read-only mode to avoid writing back - * changes when the document is closed. - * - * @return The created Workbook - * - * @throws IOException if an error occurs while reading the data - * @throws InvalidFormatException if the contents of the file cannot be parsed into a {@link Workbook} - * @throws EncryptedDocumentException If the wrong password is given for a protected file - * @throws EmptyFileException If an empty stream is given - */ - public static Workbook create(File file, String password, boolean readOnly) throws IOException, InvalidFormatException, EncryptedDocumentException { - if (! file.exists()) { - throw new FileNotFoundException(file.toString()); - } - - try (NPOIFSFileSystem fs = new NPOIFSFileSystem(file, readOnly)) { - return create(fs, password); - } catch(OfficeXmlFileException e) { - // opening as .xls failed => try opening as .xlsx - OPCPackage pkg = OPCPackage.open(file, readOnly ? PackageAccess.READ : PackageAccess.READ_WRITE); // NOSONAR - try { - return new XSSFWorkbook(pkg); - } catch (Exception ioe) { - // ensure that file handles are closed - use revert() to not re-write the file - pkg.revert(); - // do not pkg.close(); - - if (ioe instanceof IOException) { - throw (IOException)ioe; - } else if (ioe instanceof RuntimeException) { - throw (RuntimeException)ioe; - } else { - throw new IOException(ioe); - } - } - } - } -} diff --git a/src/ooxml/java/org/apache/poi/util/DocumentHelper.java b/src/ooxml/java/org/apache/poi/util/DocumentHelper.java deleted file mode 100644 index 569c5ff719..0000000000 --- a/src/ooxml/java/org/apache/poi/util/DocumentHelper.java +++ /dev/null @@ -1,183 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ - -package org.apache.poi.util; - -import java.io.IOException; -import java.io.InputStream; -import java.lang.reflect.Method; - -import javax.xml.XMLConstants; -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; -import javax.xml.parsers.ParserConfigurationException; -import javax.xml.stream.events.Namespace; - -import org.w3c.dom.Document; -import org.w3c.dom.Element; -import org.xml.sax.ErrorHandler; -import org.xml.sax.InputSource; -import org.xml.sax.SAXException; -import org.xml.sax.SAXParseException; - -public final class DocumentHelper { - private static POILogger logger = POILogFactory.getLogger(DocumentHelper.class); - - private DocumentHelper() {} - - private static class DocHelperErrorHandler implements ErrorHandler { - - public void warning(SAXParseException exception) throws SAXException { - printError(POILogger.WARN, exception); - } - - public void error(SAXParseException exception) throws SAXException { - printError(POILogger.ERROR, exception); - } - - public void fatalError(SAXParseException exception) throws SAXException { - printError(POILogger.FATAL, exception); - throw exception; - } - - /** Prints the error message. */ - private void printError(int type, SAXParseException ex) { - StringBuilder sb = new StringBuilder(); - - String systemId = ex.getSystemId(); - if (systemId != null) { - int index = systemId.lastIndexOf('/'); - if (index != -1) - systemId = systemId.substring(index + 1); - sb.append(systemId); - } - sb.append(':'); - sb.append(ex.getLineNumber()); - sb.append(':'); - sb.append(ex.getColumnNumber()); - sb.append(": "); - sb.append(ex.getMessage()); - - logger.log(type, sb.toString(), ex); - } - } - - /** - * Creates a new document builder, with sensible defaults - * - * @throws IllegalStateException If creating the DocumentBuilder fails, e.g. - * due to {@link ParserConfigurationException}. - */ - public static synchronized DocumentBuilder newDocumentBuilder() { - try { - DocumentBuilder documentBuilder = documentBuilderFactory.newDocumentBuilder(); - documentBuilder.setEntityResolver(SAXHelper.IGNORING_ENTITY_RESOLVER); - documentBuilder.setErrorHandler(new DocHelperErrorHandler()); - return documentBuilder; - } catch (ParserConfigurationException e) { - throw new IllegalStateException("cannot create a DocumentBuilder", e); - } - } - - private static final DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance(); - static { - documentBuilderFactory.setNamespaceAware(true); - documentBuilderFactory.setValidating(false); - trySetSAXFeature(documentBuilderFactory, XMLConstants.FEATURE_SECURE_PROCESSING, true); - trySetXercesSecurityManager(documentBuilderFactory); - } - - private static void trySetSAXFeature(DocumentBuilderFactory dbf, String feature, boolean enabled) { - try { - dbf.setFeature(feature, enabled); - } catch (Exception e) { - logger.log(POILogger.WARN, "SAX Feature unsupported", feature, e); - } catch (AbstractMethodError ame) { - logger.log(POILogger.WARN, "Cannot set SAX feature because outdated XML parser in classpath", feature, ame); - } - } - - private static void trySetXercesSecurityManager(DocumentBuilderFactory dbf) { - // Try built-in JVM one first, standalone if not - for (String securityManagerClassName : new String[]{ - //"com.sun.org.apache.xerces.internal.util.SecurityManager", - "org.apache.xerces.util.SecurityManager" - }) { - try { - Object mgr = Class.forName(securityManagerClassName).newInstance(); - Method setLimit = mgr.getClass().getMethod("setEntityExpansionLimit", Integer.TYPE); - setLimit.invoke(mgr, 4096); - dbf.setAttribute("http://apache.org/xml/properties/security-manager", mgr); - // Stop once one can be setup without error - return; - } catch (ClassNotFoundException e) { - // continue without log, this is expected in some setups - } catch (Throwable e) { // NOSONAR - also catch things like NoClassDefError here - logger.log(POILogger.WARN, "SAX Security Manager could not be setup", e); - } - } - - // separate old version of Xerces not found => use the builtin way of setting the property - dbf.setAttribute("http://www.oracle.com/xml/jaxp/properties/entityExpansionLimit", 4096); - } - - /** - * Parses the given stream via the default (sensible) - * DocumentBuilder - * @param inp Stream to read the XML data from - * @return the parsed Document - */ - public static Document readDocument(InputStream inp) throws IOException, SAXException { - return newDocumentBuilder().parse(inp); - } - - /** - * Parses the given stream via the default (sensible) - * DocumentBuilder - * @param inp sax source to read the XML data from - * @return the parsed Document - */ - public static Document readDocument(InputSource inp) throws IOException, SAXException { - return newDocumentBuilder().parse(inp); - } - - // must only be used to create empty documents, do not use it for parsing! - private static final DocumentBuilder documentBuilderSingleton = newDocumentBuilder(); - - /** - * Creates a new DOM Document - */ - public static synchronized Document createDocument() { - return documentBuilderSingleton.newDocument(); - } - - /** - * Adds a namespace declaration attribute to the given element. - */ - public static void addNamespaceDeclaration(Element element, String namespacePrefix, String namespaceURI) { - element.setAttributeNS(XMLConstants.XMLNS_ATTRIBUTE_NS_URI, - XMLConstants.XMLNS_ATTRIBUTE + ':' + namespacePrefix, - namespaceURI); - } - - /** - * Adds a namespace declaration attribute to the given element. - */ - public static void addNamespaceDeclaration(Element element, Namespace namespace) { - addNamespaceDeclaration(element, namespace.getPrefix(), namespace.getNamespaceURI()); - } -} diff --git a/src/ooxml/java/org/apache/poi/util/IdentifierManager.java b/src/ooxml/java/org/apache/poi/util/IdentifierManager.java deleted file mode 100644 index a863dabe60..0000000000 --- a/src/ooxml/java/org/apache/poi/util/IdentifierManager.java +++ /dev/null @@ -1,266 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi.util; - -import java.util.LinkedList; -import java.util.ListIterator; - -/** - *

- * 24.08.2009
- *

- * - * @author Stefan Stern
- */ - -public class IdentifierManager { - - public static final long MAX_ID = Long.MAX_VALUE - 1; - - public static final long MIN_ID = 0L; - - /** - * - */ - private final long upperbound; - - /** - * - */ - private final long lowerbound; - - /** - * List of segments of available identifiers - */ - private LinkedList segments; - - /** - * @param lowerbound the lower limit of the id-range to manage. Must be greater than or equal to {@link #MIN_ID}. - * @param upperbound the upper limit of the id-range to manage. Must be less then or equal {@link #MAX_ID}. - */ - public IdentifierManager(long lowerbound, long upperbound) { - if (lowerbound > upperbound) { - throw new IllegalArgumentException("lowerbound must not be greater than upperbound, had " + lowerbound + " and " + upperbound); - } - else if (lowerbound < MIN_ID) { - String message = "lowerbound must be greater than or equal to " + Long.toString(MIN_ID); - throw new IllegalArgumentException(message); - } - else if (upperbound > MAX_ID) { - /* - * while MAX_ID is Long.MAX_VALUE, this check is pointless. But if - * someone subclasses / tweaks the limits, this check is fine. - */ - throw new IllegalArgumentException("upperbound must be less than or equal to " + Long.toString(MAX_ID) + " but had " + upperbound); - } - this.lowerbound = lowerbound; - this.upperbound = upperbound; - this.segments = new LinkedList<>(); - segments.add(new Segment(lowerbound, upperbound)); - } - - public long reserve(long id) { - if (id < lowerbound || id > upperbound) { - throw new IllegalArgumentException("Value for parameter 'id' was out of bounds, had " + id + ", but should be within [" + lowerbound + ":" + upperbound + "]"); - } - verifyIdentifiersLeft(); - - if (id == upperbound) { - Segment lastSegment = segments.getLast(); - if (lastSegment.end == upperbound) { - lastSegment.end = upperbound - 1; - if (lastSegment.start > lastSegment.end) { - segments.removeLast(); - } - return id; - } - return reserveNew(); - } - - if (id == lowerbound) { - Segment firstSegment = segments.getFirst(); - if (firstSegment.start == lowerbound) { - firstSegment.start = lowerbound + 1; - if (firstSegment.end < firstSegment.start) { - segments.removeFirst(); - } - return id; - } - return reserveNew(); - } - - ListIterator iter = segments.listIterator(); - while (iter.hasNext()) { - Segment segment = iter.next(); - if (segment.end < id) { - continue; - } - else if (segment.start > id) { - break; - } - else if (segment.start == id) { - segment.start = id + 1; - if (segment.end < segment.start) { - iter.remove(); - } - return id; - } - else if (segment.end == id) { - segment.end = id - 1; - if (segment.start > segment.end) { - iter.remove(); - } - return id; - } - else { - iter.add(new Segment(id + 1, segment.end)); - segment.end = id - 1; - return id; - } - } - return reserveNew(); - } - - /** - * @return a new identifier. - * @throws IllegalStateException if no more identifiers are available, then an Exception is raised. - */ - public long reserveNew() { - verifyIdentifiersLeft(); - Segment segment = segments.getFirst(); - long result = segment.start; - segment.start += 1; - if (segment.start > segment.end) { - segments.removeFirst(); - } - return result; - } - - /** - * @param id - * the identifier to release. Must be greater than or equal to - * {@link #lowerbound} and must be less than or equal to {@link #upperbound} - * @return true, if the identifier was reserved and has been successfully - * released, false, if the identifier was not reserved. - */ - public boolean release(long id) { - if (id < lowerbound || id > upperbound) { - throw new IllegalArgumentException("Value for parameter 'id' was out of bounds, had " + id + ", but should be within [" + lowerbound + ":" + upperbound + "]"); - } - - if (id == upperbound) { - Segment lastSegment = segments.getLast(); - if (lastSegment.end == upperbound - 1) { - lastSegment.end = upperbound; - return true; - } else if (lastSegment.end == upperbound) { - return false; - } else { - segments.add(new Segment(upperbound, upperbound)); - return true; - } - } - - if (id == lowerbound) { - Segment firstSegment = segments.getFirst(); - if (firstSegment.start == lowerbound + 1) { - firstSegment.start = lowerbound; - return true; - } else if (firstSegment.start == lowerbound) { - return false; - } else { - segments.addFirst(new Segment(lowerbound, lowerbound)); - return true; - } - } - - long higher = id + 1; - long lower = id - 1; - ListIterator iter = segments.listIterator(); - - while (iter.hasNext()) { - Segment segment = iter.next(); - if (segment.end < lower) { - continue; - } - if (segment.start > higher) { - iter.previous(); - iter.add(new Segment(id, id)); - return true; - } - if (segment.start == higher) { - segment.start = id; - return true; - } - else if (segment.end == lower) { - segment.end = id; - /* check if releasing this elements glues two segments into one */ - if (iter.hasNext()) { - Segment next = iter.next(); - if (next.start == segment.end + 1) { - segment.end = next.end; - iter.remove(); - } - } - return true; - } - else { - /* id was not reserved, return false */ - break; - } - } - return false; - } - - public long getRemainingIdentifiers() { - long result = 0; - for (Segment segment : segments) { - result = result - segment.start; - result = result + segment.end + 1; - } - return result; - } - - /** - * - */ - private void verifyIdentifiersLeft() { - if (segments.isEmpty()) { - throw new IllegalStateException("No identifiers left"); - } - } - - private static class Segment { - - public Segment(long start, long end) { - this.start = start; - this.end = end; - } - - public long start; - public long end; - - /* - * (non-Javadoc) - * - * @see java.lang.Object#toString() - */ - public String toString() { - return "[" + start + "; " + end + "]"; - } - } -} diff --git a/src/ooxml/java/org/apache/poi/util/OOXMLLite.java b/src/ooxml/java/org/apache/poi/util/OOXMLLite.java deleted file mode 100644 index 06c57c464b..0000000000 --- a/src/ooxml/java/org/apache/poi/util/OOXMLLite.java +++ /dev/null @@ -1,337 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ - -package org.apache.poi.util; - -import java.io.File; -import java.io.IOException; -import java.lang.reflect.Field; -import java.lang.reflect.Method; -import java.net.URL; -import java.security.AccessController; -import java.security.CodeSource; -import java.security.PrivilegedAction; -import java.security.ProtectionDomain; -import java.util.ArrayList; -import java.util.Enumeration; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.Vector; -import java.util.jar.JarEntry; -import java.util.jar.JarFile; -import java.util.regex.Pattern; - -import junit.framework.TestCase; - -import org.junit.Test; -import org.junit.internal.TextListener; -import org.junit.runner.Description; -import org.junit.runner.JUnitCore; -import org.junit.runner.Result; - -/** - * Build a 'lite' version of the ooxml-schemas.jar - * - * @author Yegor Kozlov - */ -public final class OOXMLLite { - private static final Pattern SCHEMA_PATTERN = Pattern.compile("schemaorg_apache_xmlbeans/(system|element)/.*\\.xsb"); - - /** - * Destination directory to copy filtered classes - */ - private File _destDest; - - /** - * Directory with the compiled ooxml tests - */ - private File _testDir; - - /** - * Reference to the ooxml-schemas.jar - */ - private File _ooxmlJar; - - - OOXMLLite(String dest, String test, String ooxmlJar) { - _destDest = new File(dest); - _testDir = new File(test); - _ooxmlJar = new File(ooxmlJar); - } - - public static void main(String[] args) throws IOException { - System.out.println("Free memory (bytes): " + - Runtime.getRuntime().freeMemory()); - long maxMemory = Runtime.getRuntime().maxMemory(); - System.out.println("Maximum memory (bytes): " + - (maxMemory == Long.MAX_VALUE ? "no limit" : maxMemory)); - System.out.println("Total memory (bytes): " + - Runtime.getRuntime().totalMemory()); - - String dest = null, test = null, ooxml = null; - - for (int i = 0; i < args.length; i++) { - switch (args[i]) { - case "-dest": - dest = args[++i]; - break; - case "-test": - test = args[++i]; - break; - case "-ooxml": - ooxml = args[++i]; - break; - } - } - OOXMLLite builder = new OOXMLLite(dest, test, ooxml); - builder.build(); - } - - void build() throws IOException { - List> lst = new ArrayList<>(); - //collect unit tests - String exclude = StringUtil.join("|", - "BaseTestXWorkbook", - "BaseTestXSheet", - "BaseTestXRow", - "BaseTestXCell", - "BaseTestXSSFPivotTable", - "TestSXSSFWorkbook\\$\\d", - "TestUnfixedBugs", - "MemoryUsage", - "TestDataProvider", - "TestDataSamples", - "All.+Tests", - "ZipFileAssert", - "AesZipFileZipEntrySource", - "TempFileRecordingSXSSFWorkbookWithCustomZipEntrySource", - "PkiTestUtils", - "TestCellFormatPart\\$\\d", - "TestSignatureInfo\\$\\d", - "TestCertificateEncryption\\$CertData", - "TestPOIXMLDocument\\$OPCParser", - "TestPOIXMLDocument\\$TestFactory", - "TestXSLFTextParagraph\\$DrawTextParagraphProxy", - "TestXSSFExportToXML\\$\\d", - "TestXSSFExportToXML\\$DummyEntityResolver", - "TestFormulaEvaluatorOnXSSF\\$Result", - "TestFormulaEvaluatorOnXSSF\\$SS", - "TestMultiSheetFormulaEvaluatorOnXSSF\\$Result", - "TestMultiSheetFormulaEvaluatorOnXSSF\\$SS", - "TestXSSFBugs\\$\\d", - "AddImageBench", - "AddImageBench_jmhType_B\\d", - "AddImageBench_benchCreatePicture_jmhTest", - "TestEvilUnclosedBRFixingInputStream\\$EvilUnclosedBRFixingInputStream", - "TempFileRecordingSXSSFWorkbookWithCustomZipEntrySource\\$TempFileRecordingSheetDataWriterWithDecorator", - "TestXSSFBReader\\$1", - "TestXSSFBReader\\$TestSheetHandler", - "TestFormulaEvaluatorOnXSSF\\$1", - "TestMultiSheetFormulaEvaluatorOnXSSF\\$1", - "TestZipPackagePropertiesMarshaller\\$1", - "SLCommonUtils", - "TestPPTX2PNG\\$1", - "TestMatrixFormulasFromXMLSpreadsheet\\$1", - "TestMatrixFormulasFromXMLSpreadsheet\\$Navigator", - "TestPOIXMLDocument\\$UncaughtHandler", - "TestOleShape\\$Api", - "TestOleShape\\$1", - "TestPOIXMLDocument\\$1", - "TestXMLSlideShow\\$1", - "TestXMLSlideShow\\$BufAccessBAOS", - "TestXDDFChart\\$1", - "TestOOXMLLister\\$1", - "TestOOXMLPrettyPrint\\$1" - ); - System.out.println("Collecting unit tests from " + _testDir); - collectTests(_testDir, _testDir, lst, ".+.class$", ".+(" + exclude + ").class"); - System.out.println("Found " + lst.size() + " classes"); - - //run tests - JUnitCore jUnitCore = new JUnitCore(); - jUnitCore.addListener(new TextListener(System.out) { - private final Set classes = new HashSet<>(); - private int count; - - @Override - public void testStarted(Description description) { - // count how many test-classes we already saw - classes.add(description.getClassName()); - count++; - if(count % 100 == 0) { - System.out.println(); - System.out.println(classes.size() + "/" + lst.size() + ": " + description.getDisplayName()); - } - - super.testStarted(description); - } - }); - Result result = jUnitCore.run(lst.toArray(new Class[0])); - if (!result.wasSuccessful()) { - throw new RuntimeException("Tests did not succeed, cannot build ooxml-lite jar"); - } - - //see what classes from the ooxml-schemas.jar are loaded - System.out.println("Copying classes to " + _destDest); - Map> classes = getLoadedClasses(_ooxmlJar.getName()); - for (Class cls : classes.values()) { - String className = cls.getName(); - String classRef = className.replace('.', '/') + ".class"; - File destFile = new File(_destDest, classRef); - IOUtils.copy(cls.getResourceAsStream('/' + classRef), destFile); - - if(cls.isInterface()){ - /// Copy classes and interfaces declared as members of this class - for(Class fc : cls.getDeclaredClasses()){ - className = fc.getName(); - classRef = className.replace('.', '/') + ".class"; - destFile = new File(_destDest, classRef); - IOUtils.copy(fc.getResourceAsStream('/' + classRef), destFile); - } - } - } - - //finally copy the compiled .xsb files - System.out.println("Copying .xsb resources"); - try (JarFile jar = new JarFile(_ooxmlJar)) { - for (Enumeration e = jar.entries(); e.hasMoreElements(); ) { - JarEntry je = e.nextElement(); - if (SCHEMA_PATTERN.matcher(je.getName()).matches()) { - File destFile = new File(_destDest, je.getName()); - IOUtils.copy(jar.getInputStream(je), destFile); - } - } - } - } - - private static boolean checkForTestAnnotation(Class testclass) { - for (Method m : testclass.getDeclaredMethods()) { - if(m.isAnnotationPresent(Test.class)) { - return true; - } - } - - // also check super classes - if(testclass.getSuperclass() != null) { - for (Method m : testclass.getSuperclass().getDeclaredMethods()) { - if(m.isAnnotationPresent(Test.class)) { - return true; - } - } - } - - System.out.println("Class " + testclass.getName() + " does not derive from TestCase and does not have a @Test annotation"); - - // Should we also look at superclasses to find cases - // where we have abstract base classes with derived tests? - // if(checkForTestAnnotation(testclass.getSuperclass())) return true; - - return false; - } - - /** - * Recursively collect classes from the supplied directory - * - * @param arg the directory to search in - * @param out output - * @param ptrn the pattern (regexp) to filter found files - */ - private static void collectTests(File root, File arg, List> out, String ptrn, String exclude) { - if (arg.isDirectory()) { - File files[] = arg.listFiles(); - if (files != null) { - for (File f : files) { - collectTests(root, f, out, ptrn, exclude); - } - } - } else { - String path = arg.getAbsolutePath(); - String prefix = root.getAbsolutePath(); - String cls = path.substring(prefix.length() + 1).replace(File.separator, "."); - if(!cls.matches(ptrn)) return; - if (cls.matches(exclude)) return; - //ignore inner classes defined in tests - if (cls.indexOf('$') != -1) { - System.out.println("Inner class " + cls + " not included"); - return; - } - - cls = cls.replace(".class", ""); - - try { - Class testclass = Class.forName(cls); - if (TestCase.class.isAssignableFrom(testclass) - || checkForTestAnnotation(testclass)) { - out.add(testclass); - } - } catch (Throwable e) { // NOSONAR - System.out.println("Class " + cls + " is not in classpath"); - } - } - } - - /** - * - * @param ptrn the pattern to filter output - * @return the classes loaded by the system class loader keyed by class name - */ - @SuppressWarnings("unchecked") - private static Map> getLoadedClasses(String ptrn) { - // make the field accessible, we defer this from static initialization to here to - // allow JDKs which do not have this field (e.g. IBM JDK) to at least load the class - // without failing, see https://issues.apache.org/bugzilla/show_bug.cgi?id=56550 - final Field _classes = AccessController.doPrivileged(new PrivilegedAction() { - @SuppressForbidden("TODO: Reflection works until Java 8 on Oracle/Sun JDKs, but breaks afterwards (different classloader types, access checks)") - public Field run() { - try { - Field fld = ClassLoader.class.getDeclaredField("classes"); - fld.setAccessible(true); - return fld; - } catch (Exception e) { - throw new RuntimeException(e); - } - - } - }); - - ClassLoader appLoader = ClassLoader.getSystemClassLoader(); - try { - Vector> classes = (Vector>) _classes.get(appLoader); - Map> map = new HashMap<>(); - for (Class cls : classes) { - // e.g. proxy-classes, ... - ProtectionDomain pd = cls.getProtectionDomain(); - if (pd == null) continue; - CodeSource cs = pd.getCodeSource(); - if (cs == null) continue; - URL loc = cs.getLocation(); - if (loc == null) continue; - - String jar = loc.toString(); - if (jar.contains(ptrn)) { - map.put(cls.getName(), cls); - } - } - return map; - } catch (IllegalAccessException e) { - throw new RuntimeException(e); - } - } -} diff --git a/src/ooxml/java/org/apache/poi/util/PackageHelper.java b/src/ooxml/java/org/apache/poi/util/PackageHelper.java deleted file mode 100644 index e950323116..0000000000 --- a/src/ooxml/java/org/apache/poi/util/PackageHelper.java +++ /dev/null @@ -1,136 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ - -package org.apache.poi.util; - -import org.apache.poi.openxml4j.opc.*; -import org.apache.poi.openxml4j.opc.OPCPackage; -import org.apache.poi.openxml4j.exceptions.OpenXML4JException; -import org.apache.poi.openxml4j.exceptions.InvalidFormatException; -import org.apache.poi.POIXMLException; - -import java.io.*; -import java.net.URI; - -/** - * Provides handy methods to work with OOXML packages - */ -public final class PackageHelper { - - public static OPCPackage open(InputStream is) throws IOException { - try { - return OPCPackage.open(is); - } catch (InvalidFormatException e){ - throw new POIXMLException(e); - } - } - - /** - * Clone the specified package. - * - * @param pkg the package to clone - * @param file the destination file - * @return the cloned package - */ - public static OPCPackage clone(OPCPackage pkg, File file) throws OpenXML4JException, IOException { - - String path = file.getAbsolutePath(); - - OPCPackage dest = OPCPackage.create(path); - PackageRelationshipCollection rels = pkg.getRelationships(); - for (PackageRelationship rel : rels) { - PackagePart part = pkg.getPart(rel); - PackagePart part_tgt; - if (rel.getRelationshipType().equals(PackageRelationshipTypes.CORE_PROPERTIES)) { - copyProperties(pkg.getPackageProperties(), dest.getPackageProperties()); - continue; - } - dest.addRelationship(part.getPartName(), rel.getTargetMode(), rel.getRelationshipType()); - part_tgt = dest.createPart(part.getPartName(), part.getContentType()); - - OutputStream out = part_tgt.getOutputStream(); - IOUtils.copy(part.getInputStream(), out); - out.close(); - - if(part.hasRelationships()) { - copy(pkg, part, dest, part_tgt); - } - } - dest.close(); - - //the temp file will be deleted when JVM terminates - new File(path).deleteOnExit(); - return OPCPackage.open(path); - } - - /** - * Recursively copy package parts to the destination package - */ - private static void copy(OPCPackage pkg, PackagePart part, OPCPackage tgt, PackagePart part_tgt) throws OpenXML4JException, IOException { - PackageRelationshipCollection rels = part.getRelationships(); - if(rels != null) for (PackageRelationship rel : rels) { - PackagePart p; - if(rel.getTargetMode() == TargetMode.EXTERNAL){ - part_tgt.addExternalRelationship(rel.getTargetURI().toString(), rel.getRelationshipType(), rel.getId()); - //external relations don't have associated package parts - continue; - } - URI uri = rel.getTargetURI(); - - if(uri.getRawFragment() != null) { - part_tgt.addRelationship(uri, rel.getTargetMode(), rel.getRelationshipType(), rel.getId()); - continue; - } - PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI()); - p = pkg.getPart(relName); - part_tgt.addRelationship(p.getPartName(), rel.getTargetMode(), rel.getRelationshipType(), rel.getId()); - - - - - PackagePart dest; - if(!tgt.containPart(p.getPartName())){ - dest = tgt.createPart(p.getPartName(), p.getContentType()); - OutputStream out = dest.getOutputStream(); - IOUtils.copy(p.getInputStream(), out); - out.close(); - copy(pkg, p, tgt, dest); - } - } - } - - /** - * Copy core package properties - * - * @param src source properties - * @param tgt target properties - */ - private static void copyProperties(PackageProperties src, PackageProperties tgt){ - tgt.setCategoryProperty(src.getCategoryProperty().getValue()); - tgt.setContentStatusProperty(src.getContentStatusProperty().getValue()); - tgt.setContentTypeProperty(src.getContentTypeProperty().getValue()); - tgt.setCreatorProperty(src.getCreatorProperty().getValue()); - tgt.setDescriptionProperty(src.getDescriptionProperty().getValue()); - tgt.setIdentifierProperty(src.getIdentifierProperty().getValue()); - tgt.setKeywordsProperty(src.getKeywordsProperty().getValue()); - tgt.setLanguageProperty(src.getLanguageProperty().getValue()); - tgt.setRevisionProperty(src.getRevisionProperty().getValue()); - tgt.setSubjectProperty(src.getSubjectProperty().getValue()); - tgt.setTitleProperty(src.getTitleProperty().getValue()); - tgt.setVersionProperty(src.getVersionProperty().getValue()); - } -} diff --git a/src/ooxml/java/org/apache/poi/util/SAXHelper.java b/src/ooxml/java/org/apache/poi/util/SAXHelper.java deleted file mode 100644 index b5968d9ff9..0000000000 --- a/src/ooxml/java/org/apache/poi/util/SAXHelper.java +++ /dev/null @@ -1,127 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ - -package org.apache.poi.util; - -import java.io.IOException; -import java.io.StringReader; -import java.lang.reflect.Method; -import java.util.concurrent.TimeUnit; - -import javax.xml.XMLConstants; -import javax.xml.parsers.ParserConfigurationException; -import javax.xml.parsers.SAXParserFactory; - -import org.xml.sax.EntityResolver; -import org.xml.sax.InputSource; -import org.xml.sax.SAXException; -import org.xml.sax.XMLReader; - - -/** - * Provides handy methods for working with SAX parsers and readers - */ -public final class SAXHelper { - private static final POILogger logger = POILogFactory.getLogger(SAXHelper.class); - private static long lastLog; - - private SAXHelper() {} - - /** - * Creates a new SAX XMLReader, with sensible defaults - */ - public static synchronized XMLReader newXMLReader() throws SAXException, ParserConfigurationException { - XMLReader xmlReader = saxFactory.newSAXParser().getXMLReader(); - xmlReader.setEntityResolver(IGNORING_ENTITY_RESOLVER); - trySetSAXFeature(xmlReader, XMLConstants.FEATURE_SECURE_PROCESSING); - trySetXercesSecurityManager(xmlReader); - return xmlReader; - } - - static final EntityResolver IGNORING_ENTITY_RESOLVER = new EntityResolver() { - @Override - public InputSource resolveEntity(String publicId, String systemId) - throws SAXException, IOException { - return new InputSource(new StringReader("")); - } - }; - - private static final SAXParserFactory saxFactory; - static { - try { - saxFactory = SAXParserFactory.newInstance(); - saxFactory.setValidating(false); - saxFactory.setNamespaceAware(true); - } catch (RuntimeException | Error re) { - // this also catches NoClassDefFoundError, which may be due to a local class path issue - // This may occur if the code is run inside a web container - // or a restricted JVM - // See bug 61170: https://bz.apache.org/bugzilla/show_bug.cgi?id=61170 - logger.log(POILogger.WARN, "Failed to create SAXParserFactory", re); - throw re; - } catch (Exception e) { - logger.log(POILogger.WARN, "Failed to create SAXParserFactory", e); - throw new RuntimeException("Failed to create SAXParserFactory", e); - } - } - - private static void trySetSAXFeature(XMLReader xmlReader, String feature) { - try { - xmlReader.setFeature(feature, true); - } catch (Exception e) { - logger.log(POILogger.WARN, "SAX Feature unsupported", feature, e); - } catch (AbstractMethodError ame) { - logger.log(POILogger.WARN, "Cannot set SAX feature because outdated XML parser in classpath", feature, ame); - } - } - - private static void trySetXercesSecurityManager(XMLReader xmlReader) { - // Try built-in JVM one first, standalone if not - for (String securityManagerClassName : new String[] { - //"com.sun.org.apache.xerces.internal.util.SecurityManager", - "org.apache.xerces.util.SecurityManager" - }) { - try { - Object mgr = Class.forName(securityManagerClassName).newInstance(); - Method setLimit = mgr.getClass().getMethod("setEntityExpansionLimit", Integer.TYPE); - setLimit.invoke(mgr, 4096); - xmlReader.setProperty("http://apache.org/xml/properties/security-manager", mgr); - // Stop once one can be setup without error - return; - } catch (ClassNotFoundException e) { - // continue without log, this is expected in some setups - } catch (Throwable e) { // NOSONAR - also catch things like NoClassDefError here - // throttle the log somewhat as it can spam the log otherwise - if(System.currentTimeMillis() > lastLog + TimeUnit.MINUTES.toMillis(5)) { - logger.log(POILogger.WARN, "SAX Security Manager could not be setup [log suppressed for 5 minutes]", e); - lastLog = System.currentTimeMillis(); - } - } - } - - // separate old version of Xerces not found => use the builtin way of setting the property - try { - xmlReader.setProperty("http://www.oracle.com/xml/jaxp/properties/entityExpansionLimit", 4096); - } catch (SAXException e) { // NOSONAR - also catch things like NoClassDefError here - // throttle the log somewhat as it can spam the log otherwise - if(System.currentTimeMillis() > lastLog + TimeUnit.MINUTES.toMillis(5)) { - logger.log(POILogger.WARN, "SAX Security Manager could not be setup [log suppressed for 5 minutes]", e); - lastLog = System.currentTimeMillis(); - } - } - } -} diff --git a/src/ooxml/testcases/org/apache/poi/TestPOIXMLDocument.java b/src/ooxml/testcases/org/apache/poi/TestPOIXMLDocument.java deleted file mode 100644 index 21c003c297..0000000000 --- a/src/ooxml/testcases/org/apache/poi/TestPOIXMLDocument.java +++ /dev/null @@ -1,384 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ - -package org.apache.poi; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertSame; -import static org.junit.Assert.fail; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.lang.Thread.UncaughtExceptionHandler; -import java.lang.reflect.InvocationTargetException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; - -import org.apache.poi.POIXMLDocumentPart.RelationPart; -import org.apache.poi.openxml4j.exceptions.InvalidFormatException; -import org.apache.poi.openxml4j.exceptions.OpenXML4JRuntimeException; -import org.apache.poi.openxml4j.opc.OPCPackage; -import org.apache.poi.openxml4j.opc.PackagePart; -import org.apache.poi.openxml4j.opc.PackageRelationshipTypes; -import org.apache.poi.util.IOUtils; -import org.apache.poi.util.NullOutputStream; -import org.apache.poi.util.PackageHelper; -import org.apache.poi.util.TempFile; -import org.apache.poi.xslf.usermodel.XMLSlideShow; -import org.apache.poi.xssf.usermodel.XSSFRelation; -import org.apache.poi.xwpf.usermodel.XWPFRelation; -import org.junit.Test; - -/** - * Test recursive read and write of OPC packages - */ -public final class TestPOIXMLDocument { - - private static class OPCParser extends POIXMLDocument { - - public OPCParser(OPCPackage pkg) { - super(pkg); - } - - public OPCParser(OPCPackage pkg, String coreDocumentRel) { - super(pkg, coreDocumentRel); - } - - @Override - public List getAllEmbedds() { - throw new RuntimeException("not supported"); - } - - public void parse(POIXMLFactory factory) throws IOException{ - load(factory); - } - } - - private static final class TestFactory extends POIXMLFactory { - - public TestFactory() { - // - } - - @Override - protected POIXMLRelation getDescriptor(String relationshipType) { - return null; - } - - /** - * @since POI 3.14-Beta1 - */ - @Override - protected POIXMLDocumentPart createDocumentPart - (Class cls, Class[] classes, Object[] values) - throws SecurityException, NoSuchMethodException, InstantiationException, IllegalAccessException, InvocationTargetException { - return null; - } - } - - private static void traverse(POIXMLDocument doc) throws IOException{ - HashMap context = new HashMap<>(); - for (RelationPart p : doc.getRelationParts()){ - traverse(p, context); - } - } - - /** - * Recursively traverse a OOXML document and assert that same logical parts have the same physical instances - */ - private static void traverse(RelationPart rp, HashMap context) throws IOException{ - POIXMLDocumentPart dp = rp.getDocumentPart(); - assertEquals(rp.getRelationship().getTargetURI().toString(), dp.getPackagePart().getPartName().getName()); - - context.put(dp.getPackagePart().getPartName().getName(), dp); - for(RelationPart p : dp.getRelationParts()){ - assertNotNull(p.getRelationship().toString()); - - String uri = p.getDocumentPart().getPackagePart().getPartName().getURI().toString(); - assertEquals(uri, p.getRelationship().getTargetURI().toString()); - if (!context.containsKey(uri)) { - traverse(p, context); - } else { - POIXMLDocumentPart prev = context.get(uri); - assertSame("Duplicate POIXMLDocumentPart instance for targetURI=" + uri, prev, p.getDocumentPart()); - } - } - } - - public void assertReadWrite(OPCPackage pkg1) throws Exception { - - OPCParser doc = new OPCParser(pkg1); - doc.parse(new TestFactory()); - - traverse(doc); - - File tmp = TempFile.createTempFile("poi-ooxml", ".tmp"); - FileOutputStream out = new FileOutputStream(tmp); - doc.write(out); - out.close(); - - // Should not be able to write to an output stream that has been closed - try { - doc.write(out); - fail("Should not be able to write to an output stream that has been closed."); - } catch (final OpenXML4JRuntimeException e) { - // FIXME: A better exception class (IOException?) and message should be raised - // indicating that the document could not be written because the output stream is closed. - // see {@link org.apache.poi.openxml4j.opc.ZipPackage#saveImpl(java.io.OutputStream)} - if (e.getMessage().matches("Fail to save: an error occurs while saving the package : The part .+ failed to be saved in the stream with marshaller .+")) { - // expected - } else { - throw e; - } - } - - // Should not be able to write a document that has been closed - doc.close(); - try { - doc.write(new NullOutputStream()); - fail("Should not be able to write a document that has been closed."); - } catch (final IOException e) { - if (e.getMessage().equals("Cannot write data, document seems to have been closed already")) { - // expected - } else { - throw e; - } - } - - // Should be able to close a document multiple times, though subsequent closes will have no effect. - doc.close(); - - - @SuppressWarnings("resource") - OPCPackage pkg2 = OPCPackage.open(tmp.getAbsolutePath()); - doc = new OPCParser(pkg1); - try { - doc.parse(new TestFactory()); - traverse(doc); - - assertEquals(pkg1.getRelationships().size(), pkg2.getRelationships().size()); - - ArrayList l1 = pkg1.getParts(); - ArrayList l2 = pkg2.getParts(); - - assertEquals(l1.size(), l2.size()); - for (int i=0; i < l1.size(); i++){ - PackagePart p1 = l1.get(i); - PackagePart p2 = l2.get(i); - - assertEquals(p1.getContentType(), p2.getContentType()); - assertEquals(p1.hasRelationships(), p2.hasRelationships()); - if(p1.hasRelationships()){ - assertEquals(p1.getRelationships().size(), p2.getRelationships().size()); - } - assertEquals(p1.getPartName(), p2.getPartName()); - } - } finally { - doc.close(); - pkg1.close(); - pkg2.close(); - } - } - - @Test - public void testPPTX() throws Exception { - POIDataSamples pds = POIDataSamples.getSlideShowInstance(); - assertReadWrite(PackageHelper.open(pds.openResourceAsStream("PPTWithAttachments.pptm"))); - } - - @Test - public void testXLSX() throws Exception { - POIDataSamples pds = POIDataSamples.getSpreadSheetInstance(); - assertReadWrite(PackageHelper.open(pds.openResourceAsStream("ExcelWithAttachments.xlsm"))); - } - - @Test - public void testDOCX() throws Exception { - POIDataSamples pds = POIDataSamples.getDocumentInstance(); - assertReadWrite(PackageHelper.open(pds.openResourceAsStream("WordWithAttachments.docx"))); - } - - @Test - public void testRelationOrder() throws Exception { - POIDataSamples pds = POIDataSamples.getDocumentInstance(); - @SuppressWarnings("resource") - OPCPackage pkg = PackageHelper.open(pds.openResourceAsStream("WordWithAttachments.docx")); - OPCParser doc = new OPCParser(pkg); - try { - doc.parse(new TestFactory()); - - for(POIXMLDocumentPart rel : doc.getRelations()){ - //TODO finish me - assertNotNull(rel); - } - } finally { - doc.close(); - } - } - - @Test - public void testGetNextPartNumber() throws Exception { - POIDataSamples pds = POIDataSamples.getDocumentInstance(); - @SuppressWarnings("resource") - OPCPackage pkg = PackageHelper.open(pds.openResourceAsStream("WordWithAttachments.docx")); - OPCParser doc = new OPCParser(pkg); - try { - doc.parse(new TestFactory()); - - // Non-indexed parts: Word is taken, Excel is not - assertEquals(-1, doc.getNextPartNumber(XWPFRelation.DOCUMENT, 0)); - assertEquals(-1, doc.getNextPartNumber(XWPFRelation.DOCUMENT, -1)); - assertEquals(-1, doc.getNextPartNumber(XWPFRelation.DOCUMENT, 99)); - assertEquals(0, doc.getNextPartNumber(XSSFRelation.WORKBOOK, 0)); - assertEquals(0, doc.getNextPartNumber(XSSFRelation.WORKBOOK, -1)); - assertEquals(0, doc.getNextPartNumber(XSSFRelation.WORKBOOK, 99)); - - // Indexed parts: - // Has 2 headers - assertEquals(0, doc.getNextPartNumber(XWPFRelation.HEADER, 0)); - assertEquals(3, doc.getNextPartNumber(XWPFRelation.HEADER, -1)); - assertEquals(3, doc.getNextPartNumber(XWPFRelation.HEADER, 1)); - assertEquals(8, doc.getNextPartNumber(XWPFRelation.HEADER, 8)); - - // Has no Excel Sheets - assertEquals(0, doc.getNextPartNumber(XSSFRelation.WORKSHEET, 0)); - assertEquals(1, doc.getNextPartNumber(XSSFRelation.WORKSHEET, -1)); - assertEquals(1, doc.getNextPartNumber(XSSFRelation.WORKSHEET, 1)); - } finally { - doc.close(); - } - } - - @Test - public void testCommitNullPart() throws IOException, InvalidFormatException { - POIXMLDocumentPart part = new POIXMLDocumentPart(); - part.prepareForCommit(); - part.commit(); - part.onSave(new HashSet<>()); - - assertNull(part.getRelationById(null)); - assertNull(part.getRelationId(null)); - assertFalse(part.removeRelation(null, true)); - part.removeRelation((POIXMLDocumentPart)null); - assertEquals("",part.toString()); - part.onDocumentCreate(); - //part.getTargetPart(null); - } - - @Test - public void testVSDX() throws Exception { - POIDataSamples pds = POIDataSamples.getDiagramInstance(); - @SuppressWarnings("resource") - OPCPackage open = PackageHelper.open(pds.openResourceAsStream("test.vsdx")); - POIXMLDocument part = new OPCParser(open, PackageRelationshipTypes.VISIO_CORE_DOCUMENT); - - assertNotNull(part); - assertEquals(0, part.getRelationCounter()); - part.close(); - } - - @Test - public void testVSDXPart() throws IOException { - POIDataSamples pds = POIDataSamples.getDiagramInstance(); - OPCPackage open = PackageHelper.open(pds.openResourceAsStream("test.vsdx")); - - POIXMLDocumentPart part = new POIXMLDocumentPart(open, PackageRelationshipTypes.VISIO_CORE_DOCUMENT); - - assertNotNull(part); - assertEquals(0, part.getRelationCounter()); - - open.close(); - } - - @Test(expected=POIXMLException.class) - public void testInvalidCoreRel() throws IOException { - POIDataSamples pds = POIDataSamples.getDiagramInstance(); - OPCPackage open = PackageHelper.open(pds.openResourceAsStream("test.vsdx")); - - try { - new POIXMLDocumentPart(open, "somethingillegal"); - } finally { - open.close(); - } - } - - @Test - public void testOSGIClassLoading() { - // the schema type loader is cached per thread in POIXMLTypeLoader. - // So create a new Thread and change the context class loader (which would normally be used) - // to not contain the OOXML classes - Runnable run = new Runnable() { - public void run() { - InputStream is = POIDataSamples.getSlideShowInstance().openResourceAsStream("table_test.pptx"); - XMLSlideShow ppt = null; - try { - ppt = new XMLSlideShow(is); - ppt.getSlides().get(0).getShapes(); - } catch (IOException e) { - fail("failed to load XMLSlideShow"); - } finally { - IOUtils.closeQuietly(ppt); - IOUtils.closeQuietly(is); - } - } - }; - - Thread thread = Thread.currentThread(); - ClassLoader cl = thread.getContextClassLoader(); - UncaughtHandler uh = new UncaughtHandler(); - - // check schema type loading and check if we could run in an OOM - Thread ta[] = new Thread[30]; - for (int j=0; j<10; j++) { - for (int i=0; i(dateCreated)); - assertEquals(dateCreated, cp.getCreated()); - - XWPFDocument doc2 = XWPFTestDataSamples.writeOutAndReadBack(doc); - doc.close(); - cp = doc2.getProperties().getCoreProperties(); - Date dt3 = cp.getCreated(); - assertEquals(dateCreated, dt3); - doc2.close(); - } - - @Test - public void testGetSetRevision() { - String revision = _coreProperties.getRevision(); - assertTrue("Revision number is 1", Integer.parseInt(revision) > 1); - _coreProperties.setRevision("20"); - assertEquals("20", _coreProperties.getRevision()); - _coreProperties.setRevision("20xx"); - assertEquals("20", _coreProperties.getRevision()); - } - - @Test - public void testLastModifiedByUserProperty() { - String lastModifiedByUser = _coreProperties.getLastModifiedByUser(); - assertEquals("Paolo Mottadelli", lastModifiedByUser); - _coreProperties.setLastModifiedByUser("Test User"); - assertEquals("Test User", _coreProperties.getLastModifiedByUser()); - } - - public static boolean dateTimeEqualToUTCString(Date dateTime, String utcString) { - Calendar utcCalendar = LocaleUtil.getLocaleCalendar(LocaleUtil.TIMEZONE_UTC); - utcCalendar.setTimeInMillis(dateTime.getTime()); - String dateTimeUtcString = utcCalendar.get(Calendar.YEAR) + "-" + - zeroPad((utcCalendar.get(Calendar.MONTH)+1)) + "-" + - zeroPad(utcCalendar.get(Calendar.DAY_OF_MONTH)) + "T" + - zeroPad(utcCalendar.get(Calendar.HOUR_OF_DAY)) + ":" + - zeroPad(utcCalendar.get(Calendar.MINUTE)) + ":" + - zeroPad(utcCalendar.get(Calendar.SECOND)) + "Z"; - - return utcString.equals(dateTimeUtcString); - } - - @Ignore("Fails to add some of the thumbnails, needs more investigation") - @Test - public void testThumbnails() throws Exception { - POIXMLProperties noThumbProps = sampleNoThumb.getProperties(); - - assertNotNull(_props.getThumbnailPart()); - assertNull(noThumbProps.getThumbnailPart()); - - assertNotNull(_props.getThumbnailFilename()); - assertNull(noThumbProps.getThumbnailFilename()); - - assertNotNull(_props.getThumbnailImage()); - assertNull(noThumbProps.getThumbnailImage()); - - assertEquals("/thumbnail.jpeg", _props.getThumbnailFilename()); - - - // Adding / changing - ByteArrayInputStream imageData = new ByteArrayInputStream(new byte[1]); - noThumbProps.setThumbnail("Testing.png", imageData); - assertNotNull(noThumbProps.getThumbnailPart()); - assertEquals("/Testing.png", noThumbProps.getThumbnailFilename()); - assertNotNull(noThumbProps.getThumbnailImage()); - assertEquals(1, IOUtils.toByteArray(noThumbProps.getThumbnailImage()).length); - - imageData = new ByteArrayInputStream(new byte[2]); - noThumbProps.setThumbnail("Testing2.png", imageData); - assertNotNull(noThumbProps.getThumbnailPart()); - assertEquals("/Testing.png", noThumbProps.getThumbnailFilename()); - assertNotNull(noThumbProps.getThumbnailImage()); - assertEquals(2, IOUtils.toByteArray(noThumbProps.getThumbnailImage()).length); - } - - private static String zeroPad(long i) { - if (i >= 0 && i <=9) { - return "0" + i; - } else { - return String.valueOf(i); - } - } -} diff --git a/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java b/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java deleted file mode 100644 index 0a885377a6..0000000000 --- a/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java +++ /dev/null @@ -1,503 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi.extractor; - -import static org.apache.poi.POITestCase.assertContains; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; - -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.util.Locale; - -import org.apache.poi.POIDataSamples; -import org.apache.poi.POIOLE2TextExtractor; -import org.apache.poi.POITextExtractor; -import org.apache.poi.POIXMLTextExtractor; -import org.apache.poi.UnsupportedFileFormatException; -import org.apache.poi.hdgf.extractor.VisioTextExtractor; -import org.apache.poi.hpbf.extractor.PublisherTextExtractor; -import org.apache.poi.hsmf.extractor.OutlookTextExtactor; -import org.apache.poi.hssf.HSSFTestDataSamples; -import org.apache.poi.hssf.OldExcelFormatException; -import org.apache.poi.hssf.extractor.EventBasedExcelExtractor; -import org.apache.poi.hssf.extractor.ExcelExtractor; -import org.apache.poi.hwpf.extractor.Word6Extractor; -import org.apache.poi.hwpf.extractor.WordExtractor; -import org.apache.poi.openxml4j.exceptions.OpenXML4JException; -import org.apache.poi.openxml4j.opc.OPCPackage; -import org.apache.poi.openxml4j.opc.PackageAccess; -import org.apache.poi.poifs.filesystem.OPOIFSFileSystem; -import org.apache.poi.poifs.filesystem.POIFSFileSystem; -import org.apache.poi.sl.extractor.SlideShowExtractor; -import org.apache.poi.util.POILogFactory; -import org.apache.poi.util.POILogger; -import org.apache.poi.xdgf.extractor.XDGFVisioExtractor; -import org.apache.poi.xssf.extractor.XSSFBEventBasedExcelExtractor; -import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor; -import org.apache.poi.xssf.extractor.XSSFExcelExtractor; -import org.apache.poi.xwpf.extractor.XWPFWordExtractor; -import org.apache.xmlbeans.XmlException; -import org.junit.Test; - -/** - * Test that the extractor factory plays nicely - */ -public class TestExtractorFactory { - - private static final POILogger LOG = POILogFactory.getLogger(TestExtractorFactory.class); - - private static final POIDataSamples ssTests = POIDataSamples.getSpreadSheetInstance(); - private static final File xls = getFileAndCheck(ssTests, "SampleSS.xls"); - private static final File xlsx = getFileAndCheck(ssTests, "SampleSS.xlsx"); - private static final File xlsxStrict = getFileAndCheck(ssTests, "SampleSS.strict.xlsx"); - private static final File xltx = getFileAndCheck(ssTests, "test.xltx"); - private static final File xlsEmb = getFileAndCheck(ssTests, "excel_with_embeded.xls"); - private static final File xlsb = getFileAndCheck(ssTests, "testVarious.xlsb"); - - private static final POIDataSamples wpTests = POIDataSamples.getDocumentInstance(); - private static final File doc = getFileAndCheck(wpTests, "SampleDoc.doc"); - private static final File doc6 = getFileAndCheck(wpTests, "Word6.doc"); - private static final File doc95 = getFileAndCheck(wpTests, "Word95.doc"); - private static final File docx = getFileAndCheck(wpTests, "SampleDoc.docx"); - private static final File dotx = getFileAndCheck(wpTests, "test.dotx"); - private static final File docEmb = getFileAndCheck(wpTests, "word_with_embeded.doc"); - private static final File docEmbOOXML = getFileAndCheck(wpTests, "word_with_embeded_ooxml.doc"); - - private static final POIDataSamples slTests = POIDataSamples.getSlideShowInstance(); - private static final File ppt = getFileAndCheck(slTests, "SampleShow.ppt"); - private static final File pptx = getFileAndCheck(slTests, "SampleShow.pptx"); - private static final File txt = getFileAndCheck(slTests, "SampleShow.txt"); - - private static final POIDataSamples olTests = POIDataSamples.getHSMFInstance(); - private static final File msg = getFileAndCheck(olTests, "quick.msg"); - private static final File msgEmb = getFileAndCheck(olTests, "attachment_test_msg.msg"); - private static final File msgEmbMsg = getFileAndCheck(olTests, "attachment_msg_pdf.msg"); - - private static final POIDataSamples dgTests = POIDataSamples.getDiagramInstance(); - private static final File vsd = getFileAndCheck(dgTests, "Test_Visio-Some_Random_Text.vsd"); - private static final File vsdx = getFileAndCheck(dgTests, "test.vsdx"); - - private static POIDataSamples pubTests = POIDataSamples.getPublisherInstance(); - private static File pub = getFileAndCheck(pubTests, "Simple.pub"); - - private static File getFileAndCheck(POIDataSamples samples, String name) { - File file = samples.getFile(name); - - assertNotNull("Did not get a file for " + name, file); - assertTrue("Did not get a type file for " + name, file.isFile()); - assertTrue("File did not exist: " + name, file.exists()); - - return file; - } - - private static final Object[] TEST_SET = { - "Excel", xls, ExcelExtractor.class, 200, - "Excel - xlsx", xlsx, XSSFExcelExtractor.class, 200, - "Excel - xltx", xltx, XSSFExcelExtractor.class, -1, - "Excel - xlsb", xlsb, XSSFBEventBasedExcelExtractor.class, -1, - "Word", doc, WordExtractor.class, 120, - "Word - docx", docx, XWPFWordExtractor.class, 120, - "Word - dotx", dotx, XWPFWordExtractor.class, -1, - "Word 6", doc6, Word6Extractor.class, 20, - "Word 95", doc95, Word6Extractor.class, 120, - "PowerPoint", ppt, SlideShowExtractor.class, 120, - "PowerPoint - pptx", pptx, SlideShowExtractor.class, 120, - "Visio", vsd, VisioTextExtractor.class, 50, - "Visio - vsdx", vsdx, XDGFVisioExtractor.class, 20, - "Publisher", pub, PublisherTextExtractor.class, 50, - "Outlook msg", msg, OutlookTextExtactor.class, 50, - - // TODO Support OOXML-Strict, see bug #57699 - // xlsxStrict - }; - - @FunctionalInterface - interface FunctionEx { - R apply(T t) throws IOException, OpenXML4JException, XmlException; - } - - - @Test - public void testFile() throws Exception { - for (int i = 0; i < TEST_SET.length; i += 4) { - try (POITextExtractor ext = ExtractorFactory.createExtractor((File) TEST_SET[i + 1])) { - testExtractor(ext, (String) TEST_SET[i], (Class) TEST_SET[i + 2], (Integer) TEST_SET[i + 3]); - } - } - } - - @Test(expected = IllegalArgumentException.class) - public void testFileInvalid() throws Exception { - // Text - try (POITextExtractor te = ExtractorFactory.createExtractor(txt)) {} - } - - @Test - public void testInputStream() throws Exception { - testStream((f) -> ExtractorFactory.createExtractor(f), true); - } - - @Test(expected = IllegalArgumentException.class) - public void testInputStreamInvalid() throws Exception { - testInvalid((f) -> ExtractorFactory.createExtractor(f)); - } - - @Test - public void testPOIFS() throws Exception { - testStream((f) -> ExtractorFactory.createExtractor(new POIFSFileSystem(f)), false); - } - - @Test(expected = IOException.class) - public void testPOIFSInvalid() throws Exception { - testInvalid((f) -> ExtractorFactory.createExtractor(new POIFSFileSystem(f))); - } - - @Test - public void testOPOIFS() throws Exception { - testStream((f) -> ExtractorFactory.createExtractor(new OPOIFSFileSystem(f)), false); - } - - @Test(expected = IOException.class) - public void testOPOIFSInvalid() throws Exception { - testInvalid((f) -> ExtractorFactory.createExtractor(new OPOIFSFileSystem(f))); - } - - - private void testStream(final FunctionEx poifsIS, final boolean loadOOXML) - throws IOException, OpenXML4JException, XmlException { - for (int i = 0; i < TEST_SET.length; i += 4) { - File testFile = (File) TEST_SET[i + 1]; - if (!loadOOXML && (testFile.getName().endsWith("x") || testFile.getName().endsWith("xlsb"))) { - continue; - } - try (FileInputStream fis = new FileInputStream(testFile); - POITextExtractor ext = poifsIS.apply(fis)) { - testExtractor(ext, (String) TEST_SET[i], (Class) TEST_SET[i + 2], (Integer) TEST_SET[i + 3]); - } catch (IllegalArgumentException e) { - fail("failed to process "+testFile); - } - } - } - - private void testExtractor(final POITextExtractor ext, final String testcase, final Class extrClass, final Integer minLength) { - assertTrue("invalid extractor for " + testcase, extrClass.isInstance(ext)); - final String actual = ext.getText(); - if (minLength == -1) { - assertContains(actual.toLowerCase(Locale.ROOT), "test"); - } else { - assertTrue("extracted content too short for " + testcase, actual.length() > minLength); - } - } - - private void testInvalid(FunctionEx poifs) throws IOException, OpenXML4JException, XmlException { - // Text - try (FileInputStream fis = new FileInputStream(txt); - POITextExtractor te = poifs.apply(fis)) { - } - } - - @Test - public void testPackage() throws Exception { - for (int i = 0; i < TEST_SET.length; i += 4) { - final File testFile = (File) TEST_SET[i + 1]; - if (!testFile.getName().endsWith("x")) { - continue; - } - - try (final OPCPackage pkg = OPCPackage.open(testFile, PackageAccess.READ); - final POITextExtractor ext = ExtractorFactory.createExtractor(pkg)) { - testExtractor(ext, (String) TEST_SET[i], (Class) TEST_SET[i + 2], (Integer) TEST_SET[i + 3]); - pkg.revert(); - } - } - } - - @Test(expected = UnsupportedFileFormatException.class) - public void testPackageInvalid() throws Exception { - // Text - try (final OPCPackage pkg = OPCPackage.open(txt, PackageAccess.READ); - final POITextExtractor te = ExtractorFactory.createExtractor(pkg)) {} - } - - @Test - public void testPreferEventBased() throws Exception { - assertFalse(ExtractorFactory.getPreferEventExtractor()); - assertFalse(ExtractorFactory.getThreadPrefersEventExtractors()); - assertNull(ExtractorFactory.getAllThreadsPreferEventExtractors()); - - ExtractorFactory.setThreadPrefersEventExtractors(true); - - assertTrue(ExtractorFactory.getPreferEventExtractor()); - assertTrue(ExtractorFactory.getThreadPrefersEventExtractors()); - assertNull(ExtractorFactory.getAllThreadsPreferEventExtractors()); - - ExtractorFactory.setAllThreadsPreferEventExtractors(false); - - assertFalse(ExtractorFactory.getPreferEventExtractor()); - assertTrue(ExtractorFactory.getThreadPrefersEventExtractors()); - assertEquals(Boolean.FALSE, ExtractorFactory.getAllThreadsPreferEventExtractors()); - - ExtractorFactory.setAllThreadsPreferEventExtractors(null); - - assertTrue(ExtractorFactory.getPreferEventExtractor()); - assertTrue(ExtractorFactory.getThreadPrefersEventExtractors()); - assertNull(ExtractorFactory.getAllThreadsPreferEventExtractors()); - - - // Check we get the right extractors now - POITextExtractor extractor = ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls))); - assertTrue( - extractor - instanceof EventBasedExcelExtractor - ); - extractor.close(); - extractor = ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls))); - assertTrue( - extractor.getText().length() > 200 - ); - extractor.close(); - - extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString(), PackageAccess.READ)); - assertTrue(extractor instanceof XSSFEventBasedExcelExtractor); - extractor.close(); - - extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString(), PackageAccess.READ)); - assertTrue( - extractor.getText().length() > 200 - ); - extractor.close(); - - - // Put back to normal - ExtractorFactory.setThreadPrefersEventExtractors(false); - assertFalse(ExtractorFactory.getPreferEventExtractor()); - assertFalse(ExtractorFactory.getThreadPrefersEventExtractors()); - assertNull(ExtractorFactory.getAllThreadsPreferEventExtractors()); - - // And back - extractor = ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls))); - assertTrue( - extractor - instanceof ExcelExtractor - ); - extractor.close(); - extractor = ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls))); - assertTrue( - extractor.getText().length() > 200 - ); - extractor.close(); - - extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString(), PackageAccess.READ)); - assertTrue( - extractor - instanceof XSSFExcelExtractor - ); - extractor.close(); - extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString())); - assertTrue( - extractor.getText().length() > 200 - ); - extractor.close(); - } - - /** - * Test embedded docs text extraction. For now, only - * does poifs embedded, but will do ooxml ones - * at some point. - */ - @Test - public void testEmbedded() throws Exception { - final Object[] testObj = { - "No embeddings", xls, "0-0-0-0-0-0", - "Excel", xlsEmb, "6-2-2-2-0-0", - "Word", docEmb, "4-1-2-1-0-0", - "Word which contains an OOXML file", docEmbOOXML, "3-0-1-1-0-1", - "Outlook", msgEmb, "1-1-0-0-0-0", - "Outlook with another outlook file in it", msgEmbMsg, "1-0-0-0-1-0", - }; - - for (int i=0; i 20); - if (embed instanceof SlideShowExtractor) { - numPpt++; - } else if (embed instanceof ExcelExtractor) { - numXls++; - } else if (embed instanceof WordExtractor) { - numWord++; - } else if (embed instanceof OutlookTextExtactor) { - numMsg++; - } else if (embed instanceof XWPFWordExtractor) { - numWordX++; - } - } - - final String actual = embeds.length+"-"+numWord+"-"+numXls+"-"+numPpt+"-"+numMsg+"-"+numWordX; - final String expected = (String)testObj[i+2]; - assertEquals("invalid number of embeddings - "+testObj[i], expected, actual); - } - } - - // TODO - PowerPoint - // TODO - Publisher - // TODO - Visio - } - - private static final String[] EXPECTED_FAILURES = { - // password protected files - "spreadsheet/password.xls", - "spreadsheet/protected_passtika.xlsx", - "spreadsheet/51832.xls", - "document/PasswordProtected.doc", - "slideshow/Password_Protected-hello.ppt", - "slideshow/Password_Protected-56-hello.ppt", - "slideshow/Password_Protected-np-hello.ppt", - "slideshow/cryptoapi-proc2356.ppt", - //"document/bug53475-password-is-pass.docx", - //"document/bug53475-password-is-solrcell.docx", - "spreadsheet/xor-encryption-abc.xls", - "spreadsheet/35897-type4.xls", - //"poifs/protect.xlsx", - //"poifs/protected_sha512.xlsx", - //"poifs/extenxls_pwd123.xlsx", - //"poifs/protected_agile.docx", - "spreadsheet/58616.xlsx", - - // TODO: fails XMLExportTest, is this ok? - "spreadsheet/CustomXMLMapping-singleattributenamespace.xlsx", - "spreadsheet/55864.xlsx", - "spreadsheet/57890.xlsx", - - // TODO: these fail now with some NPE/file read error because we now try to compute every value via Cell.toString()! - "spreadsheet/44958.xls", - "spreadsheet/44958_1.xls", - "spreadsheet/testArraysAndTables.xls", - - // TODO: good to ignore? - "spreadsheet/sample-beta.xlsx", - - // This is actually a spreadsheet! - "hpsf/TestRobert_Flaherty.doc", - - // some files that are broken, eg Word 95, ... - "spreadsheet/43493.xls", - "spreadsheet/46904.xls", - "document/Bug50955.doc", - "slideshow/PPT95.ppt", - "openxml4j/OPCCompliance_CoreProperties_DCTermsNamespaceLimitedUseFAIL.docx", - "openxml4j/OPCCompliance_CoreProperties_DoNotUseCompatibilityMarkupFAIL.docx", - "openxml4j/OPCCompliance_CoreProperties_LimitedXSITypeAttribute_NotPresentFAIL.docx", - "openxml4j/OPCCompliance_CoreProperties_LimitedXSITypeAttribute_PresentWithUnauthorizedValueFAIL.docx", - "openxml4j/OPCCompliance_CoreProperties_OnlyOneCorePropertiesPartFAIL.docx", - "openxml4j/OPCCompliance_CoreProperties_UnauthorizedXMLLangAttributeFAIL.docx", - "openxml4j/OPCCompliance_DerivedPartNameFAIL.docx", - "openxml4j/invalid.xlsx", - "spreadsheet/54764-2.xlsx", // see TestXSSFBugs.bug54764() - "spreadsheet/54764.xlsx", // see TestXSSFBugs.bug54764() - "spreadsheet/Simple.xlsb", - "poifs/unknown_properties.msg", // POIFS properties corrupted - "poifs/only-zero-byte-streams.ole2", // No actual contents - "spreadsheet/poc-xmlbomb.xlsx", // contains xml-entity-expansion - "spreadsheet/poc-xmlbomb-empty.xlsx", // contains xml-entity-expansion - "spreadsheet/poc-shared-strings.xlsx", // contains shared-string-entity-expansion - - // old Excel files, which we only support simple text extraction of - "spreadsheet/testEXCEL_2.xls", - "spreadsheet/testEXCEL_3.xls", - "spreadsheet/testEXCEL_4.xls", - "spreadsheet/testEXCEL_5.xls", - "spreadsheet/testEXCEL_95.xls", - - // OOXML Strict is not yet supported, see bug #57699 - "spreadsheet/SampleSS.strict.xlsx", - "spreadsheet/SimpleStrict.xlsx", - "spreadsheet/sample.strict.xlsx", - - // non-TNEF files - "ddf/Container.dat", - "ddf/47143.dat", - - // sheet cloning errors - "spreadsheet/47813.xlsx", - "spreadsheet/56450.xls", - "spreadsheet/57231_MixedGasReport.xls", - "spreadsheet/OddStyleRecord.xls", - "spreadsheet/WithChartSheet.xlsx", - "spreadsheet/chart_sheet.xlsx", - }; - - @Test - public void testFileLeak() throws Exception { - // run a number of files that might fail in order to catch - // leaked file resources when using file-leak-detector while - // running the test - - for(String file : EXPECTED_FAILURES) { - try { - ExtractorFactory.createExtractor(POIDataSamples.getSpreadSheetInstance().getFile(file)); - } catch (Exception e) { - // catch all exceptions here as we are only interested in file-handle leaks - } - } - } - - /** - * #59074 - Excel 95 files should give a helpful message, not just - * "No supported documents found in the OLE2 stream" - */ - @Test(expected = OldExcelFormatException.class) - public void bug59074() throws Exception { - ExtractorFactory.createExtractor( - POIDataSamples.getSpreadSheetInstance().getFile("59074.xls")); - } - - @SuppressWarnings("deprecation") - @Test(expected = IllegalStateException.class) - public void testGetEmbedFromXMLExtractor() { - // currently not implemented - ExtractorFactory.getEmbededDocsTextExtractors((POIXMLTextExtractor) null); - } - - @SuppressWarnings("deprecation") - @Test(expected = IllegalStateException.class) - public void testGetEmbeddedFromXMLExtractor() { - // currently not implemented - ExtractorFactory.getEmbeddedDocsTextExtractors((POIXMLTextExtractor)null); - } - - // This bug is currently open. This test will fail with "expected error not thrown" when the bug has been fixed. - // When this happens, change this from @Test(expected=...) to @Test - // bug 45565: text within TextBoxes is extracted by ExcelExtractor and WordExtractor - @Test(expected=AssertionError.class) - public void test45565() throws Exception { - try (POITextExtractor extractor = ExtractorFactory.createExtractor(HSSFTestDataSamples.getSampleFile("45565.xls"))) { - String text = extractor.getText(); - assertContains(text, "testdoc"); - assertContains(text, "test phrase"); - } - } -} diff --git a/src/ooxml/testcases/org/apache/poi/extractor/ooxml/TestExtractorFactory.java b/src/ooxml/testcases/org/apache/poi/extractor/ooxml/TestExtractorFactory.java new file mode 100644 index 0000000000..be58e3d699 --- /dev/null +++ b/src/ooxml/testcases/org/apache/poi/extractor/ooxml/TestExtractorFactory.java @@ -0,0 +1,504 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.extractor.ooxml; + +import static org.apache.poi.POITestCase.assertContains; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.util.Locale; + +import org.apache.poi.POIDataSamples; +import org.apache.poi.extractor.POIOLE2TextExtractor; +import org.apache.poi.extractor.POITextExtractor; +import org.apache.poi.ooxml.extractor.POIXMLTextExtractor; +import org.apache.poi.UnsupportedFileFormatException; +import org.apache.poi.hdgf.extractor.VisioTextExtractor; +import org.apache.poi.hpbf.extractor.PublisherTextExtractor; +import org.apache.poi.hsmf.extractor.OutlookTextExtactor; +import org.apache.poi.hssf.HSSFTestDataSamples; +import org.apache.poi.hssf.OldExcelFormatException; +import org.apache.poi.hssf.extractor.EventBasedExcelExtractor; +import org.apache.poi.hssf.extractor.ExcelExtractor; +import org.apache.poi.hwpf.extractor.Word6Extractor; +import org.apache.poi.hwpf.extractor.WordExtractor; +import org.apache.poi.ooxml.extractor.ExtractorFactory; +import org.apache.poi.openxml4j.exceptions.OpenXML4JException; +import org.apache.poi.openxml4j.opc.OPCPackage; +import org.apache.poi.openxml4j.opc.PackageAccess; +import org.apache.poi.poifs.filesystem.OPOIFSFileSystem; +import org.apache.poi.poifs.filesystem.POIFSFileSystem; +import org.apache.poi.sl.extractor.SlideShowExtractor; +import org.apache.poi.util.POILogFactory; +import org.apache.poi.util.POILogger; +import org.apache.poi.xdgf.extractor.XDGFVisioExtractor; +import org.apache.poi.xssf.extractor.XSSFBEventBasedExcelExtractor; +import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor; +import org.apache.poi.xssf.extractor.XSSFExcelExtractor; +import org.apache.poi.xwpf.extractor.XWPFWordExtractor; +import org.apache.xmlbeans.XmlException; +import org.junit.Test; + +/** + * Test that the extractor factory plays nicely + */ +public class TestExtractorFactory { + + private static final POILogger LOG = POILogFactory.getLogger(TestExtractorFactory.class); + + private static final POIDataSamples ssTests = POIDataSamples.getSpreadSheetInstance(); + private static final File xls = getFileAndCheck(ssTests, "SampleSS.xls"); + private static final File xlsx = getFileAndCheck(ssTests, "SampleSS.xlsx"); + private static final File xlsxStrict = getFileAndCheck(ssTests, "SampleSS.strict.xlsx"); + private static final File xltx = getFileAndCheck(ssTests, "test.xltx"); + private static final File xlsEmb = getFileAndCheck(ssTests, "excel_with_embeded.xls"); + private static final File xlsb = getFileAndCheck(ssTests, "testVarious.xlsb"); + + private static final POIDataSamples wpTests = POIDataSamples.getDocumentInstance(); + private static final File doc = getFileAndCheck(wpTests, "SampleDoc.doc"); + private static final File doc6 = getFileAndCheck(wpTests, "Word6.doc"); + private static final File doc95 = getFileAndCheck(wpTests, "Word95.doc"); + private static final File docx = getFileAndCheck(wpTests, "SampleDoc.docx"); + private static final File dotx = getFileAndCheck(wpTests, "test.dotx"); + private static final File docEmb = getFileAndCheck(wpTests, "word_with_embeded.doc"); + private static final File docEmbOOXML = getFileAndCheck(wpTests, "word_with_embeded_ooxml.doc"); + + private static final POIDataSamples slTests = POIDataSamples.getSlideShowInstance(); + private static final File ppt = getFileAndCheck(slTests, "SampleShow.ppt"); + private static final File pptx = getFileAndCheck(slTests, "SampleShow.pptx"); + private static final File txt = getFileAndCheck(slTests, "SampleShow.txt"); + + private static final POIDataSamples olTests = POIDataSamples.getHSMFInstance(); + private static final File msg = getFileAndCheck(olTests, "quick.msg"); + private static final File msgEmb = getFileAndCheck(olTests, "attachment_test_msg.msg"); + private static final File msgEmbMsg = getFileAndCheck(olTests, "attachment_msg_pdf.msg"); + + private static final POIDataSamples dgTests = POIDataSamples.getDiagramInstance(); + private static final File vsd = getFileAndCheck(dgTests, "Test_Visio-Some_Random_Text.vsd"); + private static final File vsdx = getFileAndCheck(dgTests, "test.vsdx"); + + private static POIDataSamples pubTests = POIDataSamples.getPublisherInstance(); + private static File pub = getFileAndCheck(pubTests, "Simple.pub"); + + private static File getFileAndCheck(POIDataSamples samples, String name) { + File file = samples.getFile(name); + + assertNotNull("Did not get a file for " + name, file); + assertTrue("Did not get a type file for " + name, file.isFile()); + assertTrue("File did not exist: " + name, file.exists()); + + return file; + } + + private static final Object[] TEST_SET = { + "Excel", xls, ExcelExtractor.class, 200, + "Excel - xlsx", xlsx, XSSFExcelExtractor.class, 200, + "Excel - xltx", xltx, XSSFExcelExtractor.class, -1, + "Excel - xlsb", xlsb, XSSFBEventBasedExcelExtractor.class, -1, + "Word", doc, WordExtractor.class, 120, + "Word - docx", docx, XWPFWordExtractor.class, 120, + "Word - dotx", dotx, XWPFWordExtractor.class, -1, + "Word 6", doc6, Word6Extractor.class, 20, + "Word 95", doc95, Word6Extractor.class, 120, + "PowerPoint", ppt, SlideShowExtractor.class, 120, + "PowerPoint - pptx", pptx, SlideShowExtractor.class, 120, + "Visio", vsd, VisioTextExtractor.class, 50, + "Visio - vsdx", vsdx, XDGFVisioExtractor.class, 20, + "Publisher", pub, PublisherTextExtractor.class, 50, + "Outlook msg", msg, OutlookTextExtactor.class, 50, + + // TODO Support OOXML-Strict, see bug #57699 + // xlsxStrict + }; + + @FunctionalInterface + interface FunctionEx { + R apply(T t) throws IOException, OpenXML4JException, XmlException; + } + + + @Test + public void testFile() throws Exception { + for (int i = 0; i < TEST_SET.length; i += 4) { + try (POITextExtractor ext = ExtractorFactory.createExtractor((File) TEST_SET[i + 1])) { + testExtractor(ext, (String) TEST_SET[i], (Class) TEST_SET[i + 2], (Integer) TEST_SET[i + 3]); + } + } + } + + @Test(expected = IllegalArgumentException.class) + public void testFileInvalid() throws Exception { + // Text + try (POITextExtractor te = ExtractorFactory.createExtractor(txt)) {} + } + + @Test + public void testInputStream() throws Exception { + testStream((f) -> ExtractorFactory.createExtractor(f), true); + } + + @Test(expected = IllegalArgumentException.class) + public void testInputStreamInvalid() throws Exception { + testInvalid((f) -> ExtractorFactory.createExtractor(f)); + } + + @Test + public void testPOIFS() throws Exception { + testStream((f) -> ExtractorFactory.createExtractor(new POIFSFileSystem(f)), false); + } + + @Test(expected = IOException.class) + public void testPOIFSInvalid() throws Exception { + testInvalid((f) -> ExtractorFactory.createExtractor(new POIFSFileSystem(f))); + } + + @Test + public void testOPOIFS() throws Exception { + testStream((f) -> ExtractorFactory.createExtractor(new OPOIFSFileSystem(f)), false); + } + + @Test(expected = IOException.class) + public void testOPOIFSInvalid() throws Exception { + testInvalid((f) -> ExtractorFactory.createExtractor(new OPOIFSFileSystem(f))); + } + + + private void testStream(final FunctionEx poifsIS, final boolean loadOOXML) + throws IOException, OpenXML4JException, XmlException { + for (int i = 0; i < TEST_SET.length; i += 4) { + File testFile = (File) TEST_SET[i + 1]; + if (!loadOOXML && (testFile.getName().endsWith("x") || testFile.getName().endsWith("xlsb"))) { + continue; + } + try (FileInputStream fis = new FileInputStream(testFile); + POITextExtractor ext = poifsIS.apply(fis)) { + testExtractor(ext, (String) TEST_SET[i], (Class) TEST_SET[i + 2], (Integer) TEST_SET[i + 3]); + } catch (IllegalArgumentException e) { + fail("failed to process "+testFile); + } + } + } + + private void testExtractor(final POITextExtractor ext, final String testcase, final Class extrClass, final Integer minLength) { + assertTrue("invalid extractor for " + testcase, extrClass.isInstance(ext)); + final String actual = ext.getText(); + if (minLength == -1) { + assertContains(actual.toLowerCase(Locale.ROOT), "test"); + } else { + assertTrue("extracted content too short for " + testcase, actual.length() > minLength); + } + } + + private void testInvalid(FunctionEx poifs) throws IOException, OpenXML4JException, XmlException { + // Text + try (FileInputStream fis = new FileInputStream(txt); + POITextExtractor te = poifs.apply(fis)) { + } + } + + @Test + public void testPackage() throws Exception { + for (int i = 0; i < TEST_SET.length; i += 4) { + final File testFile = (File) TEST_SET[i + 1]; + if (!testFile.getName().endsWith("x")) { + continue; + } + + try (final OPCPackage pkg = OPCPackage.open(testFile, PackageAccess.READ); + final POITextExtractor ext = ExtractorFactory.createExtractor(pkg)) { + testExtractor(ext, (String) TEST_SET[i], (Class) TEST_SET[i + 2], (Integer) TEST_SET[i + 3]); + pkg.revert(); + } + } + } + + @Test(expected = UnsupportedFileFormatException.class) + public void testPackageInvalid() throws Exception { + // Text + try (final OPCPackage pkg = OPCPackage.open(txt, PackageAccess.READ); + final POITextExtractor te = ExtractorFactory.createExtractor(pkg)) {} + } + + @Test + public void testPreferEventBased() throws Exception { + assertFalse(ExtractorFactory.getPreferEventExtractor()); + assertFalse(ExtractorFactory.getThreadPrefersEventExtractors()); + assertNull(ExtractorFactory.getAllThreadsPreferEventExtractors()); + + ExtractorFactory.setThreadPrefersEventExtractors(true); + + assertTrue(ExtractorFactory.getPreferEventExtractor()); + assertTrue(ExtractorFactory.getThreadPrefersEventExtractors()); + assertNull(ExtractorFactory.getAllThreadsPreferEventExtractors()); + + ExtractorFactory.setAllThreadsPreferEventExtractors(false); + + assertFalse(ExtractorFactory.getPreferEventExtractor()); + assertTrue(ExtractorFactory.getThreadPrefersEventExtractors()); + assertEquals(Boolean.FALSE, ExtractorFactory.getAllThreadsPreferEventExtractors()); + + ExtractorFactory.setAllThreadsPreferEventExtractors(null); + + assertTrue(ExtractorFactory.getPreferEventExtractor()); + assertTrue(ExtractorFactory.getThreadPrefersEventExtractors()); + assertNull(ExtractorFactory.getAllThreadsPreferEventExtractors()); + + + // Check we get the right extractors now + POITextExtractor extractor = ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls))); + assertTrue( + extractor + instanceof EventBasedExcelExtractor + ); + extractor.close(); + extractor = ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls))); + assertTrue( + extractor.getText().length() > 200 + ); + extractor.close(); + + extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString(), PackageAccess.READ)); + assertTrue(extractor instanceof XSSFEventBasedExcelExtractor); + extractor.close(); + + extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString(), PackageAccess.READ)); + assertTrue( + extractor.getText().length() > 200 + ); + extractor.close(); + + + // Put back to normal + ExtractorFactory.setThreadPrefersEventExtractors(false); + assertFalse(ExtractorFactory.getPreferEventExtractor()); + assertFalse(ExtractorFactory.getThreadPrefersEventExtractors()); + assertNull(ExtractorFactory.getAllThreadsPreferEventExtractors()); + + // And back + extractor = ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls))); + assertTrue( + extractor + instanceof ExcelExtractor + ); + extractor.close(); + extractor = ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls))); + assertTrue( + extractor.getText().length() > 200 + ); + extractor.close(); + + extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString(), PackageAccess.READ)); + assertTrue( + extractor + instanceof XSSFExcelExtractor + ); + extractor.close(); + extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString())); + assertTrue( + extractor.getText().length() > 200 + ); + extractor.close(); + } + + /** + * Test embedded docs text extraction. For now, only + * does poifs embedded, but will do ooxml ones + * at some point. + */ + @Test + public void testEmbedded() throws Exception { + final Object[] testObj = { + "No embeddings", xls, "0-0-0-0-0-0", + "Excel", xlsEmb, "6-2-2-2-0-0", + "Word", docEmb, "4-1-2-1-0-0", + "Word which contains an OOXML file", docEmbOOXML, "3-0-1-1-0-1", + "Outlook", msgEmb, "1-1-0-0-0-0", + "Outlook with another outlook file in it", msgEmbMsg, "1-0-0-0-1-0", + }; + + for (int i=0; i 20); + if (embed instanceof SlideShowExtractor) { + numPpt++; + } else if (embed instanceof ExcelExtractor) { + numXls++; + } else if (embed instanceof WordExtractor) { + numWord++; + } else if (embed instanceof OutlookTextExtactor) { + numMsg++; + } else if (embed instanceof XWPFWordExtractor) { + numWordX++; + } + } + + final String actual = embeds.length+"-"+numWord+"-"+numXls+"-"+numPpt+"-"+numMsg+"-"+numWordX; + final String expected = (String)testObj[i+2]; + assertEquals("invalid number of embeddings - "+testObj[i], expected, actual); + } + } + + // TODO - PowerPoint + // TODO - Publisher + // TODO - Visio + } + + private static final String[] EXPECTED_FAILURES = { + // password protected files + "spreadsheet/password.xls", + "spreadsheet/protected_passtika.xlsx", + "spreadsheet/51832.xls", + "document/PasswordProtected.doc", + "slideshow/Password_Protected-hello.ppt", + "slideshow/Password_Protected-56-hello.ppt", + "slideshow/Password_Protected-np-hello.ppt", + "slideshow/cryptoapi-proc2356.ppt", + //"document/bug53475-password-is-pass.docx", + //"document/bug53475-password-is-solrcell.docx", + "spreadsheet/xor-encryption-abc.xls", + "spreadsheet/35897-type4.xls", + //"poifs/protect.xlsx", + //"poifs/protected_sha512.xlsx", + //"poifs/extenxls_pwd123.xlsx", + //"poifs/protected_agile.docx", + "spreadsheet/58616.xlsx", + + // TODO: fails XMLExportTest, is this ok? + "spreadsheet/CustomXMLMapping-singleattributenamespace.xlsx", + "spreadsheet/55864.xlsx", + "spreadsheet/57890.xlsx", + + // TODO: these fail now with some NPE/file read error because we now try to compute every value via Cell.toString()! + "spreadsheet/44958.xls", + "spreadsheet/44958_1.xls", + "spreadsheet/testArraysAndTables.xls", + + // TODO: good to ignore? + "spreadsheet/sample-beta.xlsx", + + // This is actually a spreadsheet! + "hpsf/TestRobert_Flaherty.doc", + + // some files that are broken, eg Word 95, ... + "spreadsheet/43493.xls", + "spreadsheet/46904.xls", + "document/Bug50955.doc", + "slideshow/PPT95.ppt", + "openxml4j/OPCCompliance_CoreProperties_DCTermsNamespaceLimitedUseFAIL.docx", + "openxml4j/OPCCompliance_CoreProperties_DoNotUseCompatibilityMarkupFAIL.docx", + "openxml4j/OPCCompliance_CoreProperties_LimitedXSITypeAttribute_NotPresentFAIL.docx", + "openxml4j/OPCCompliance_CoreProperties_LimitedXSITypeAttribute_PresentWithUnauthorizedValueFAIL.docx", + "openxml4j/OPCCompliance_CoreProperties_OnlyOneCorePropertiesPartFAIL.docx", + "openxml4j/OPCCompliance_CoreProperties_UnauthorizedXMLLangAttributeFAIL.docx", + "openxml4j/OPCCompliance_DerivedPartNameFAIL.docx", + "openxml4j/invalid.xlsx", + "spreadsheet/54764-2.xlsx", // see TestXSSFBugs.bug54764() + "spreadsheet/54764.xlsx", // see TestXSSFBugs.bug54764() + "spreadsheet/Simple.xlsb", + "poifs/unknown_properties.msg", // POIFS properties corrupted + "poifs/only-zero-byte-streams.ole2", // No actual contents + "spreadsheet/poc-xmlbomb.xlsx", // contains xml-entity-expansion + "spreadsheet/poc-xmlbomb-empty.xlsx", // contains xml-entity-expansion + "spreadsheet/poc-shared-strings.xlsx", // contains shared-string-entity-expansion + + // old Excel files, which we only support simple text extraction of + "spreadsheet/testEXCEL_2.xls", + "spreadsheet/testEXCEL_3.xls", + "spreadsheet/testEXCEL_4.xls", + "spreadsheet/testEXCEL_5.xls", + "spreadsheet/testEXCEL_95.xls", + + // OOXML Strict is not yet supported, see bug #57699 + "spreadsheet/SampleSS.strict.xlsx", + "spreadsheet/SimpleStrict.xlsx", + "spreadsheet/sample.strict.xlsx", + + // non-TNEF files + "ddf/Container.dat", + "ddf/47143.dat", + + // sheet cloning errors + "spreadsheet/47813.xlsx", + "spreadsheet/56450.xls", + "spreadsheet/57231_MixedGasReport.xls", + "spreadsheet/OddStyleRecord.xls", + "spreadsheet/WithChartSheet.xlsx", + "spreadsheet/chart_sheet.xlsx", + }; + + @Test + public void testFileLeak() throws Exception { + // run a number of files that might fail in order to catch + // leaked file resources when using file-leak-detector while + // running the test + + for(String file : EXPECTED_FAILURES) { + try { + ExtractorFactory.createExtractor(POIDataSamples.getSpreadSheetInstance().getFile(file)); + } catch (Exception e) { + // catch all exceptions here as we are only interested in file-handle leaks + } + } + } + + /** + * #59074 - Excel 95 files should give a helpful message, not just + * "No supported documents found in the OLE2 stream" + */ + @Test(expected = OldExcelFormatException.class) + public void bug59074() throws Exception { + ExtractorFactory.createExtractor( + POIDataSamples.getSpreadSheetInstance().getFile("59074.xls")); + } + + @SuppressWarnings("deprecation") + @Test(expected = IllegalStateException.class) + public void testGetEmbedFromXMLExtractor() { + // currently not implemented + ExtractorFactory.getEmbededDocsTextExtractors((POIXMLTextExtractor) null); + } + + @SuppressWarnings("deprecation") + @Test(expected = IllegalStateException.class) + public void testGetEmbeddedFromXMLExtractor() { + // currently not implemented + ExtractorFactory.getEmbeddedDocsTextExtractors((POIXMLTextExtractor)null); + } + + // This bug is currently open. This test will fail with "expected error not thrown" when the bug has been fixed. + // When this happens, change this from @Test(expected=...) to @Test + // bug 45565: text within TextBoxes is extracted by ExcelExtractor and WordExtractor + @Test(expected=AssertionError.class) + public void test45565() throws Exception { + try (POITextExtractor extractor = ExtractorFactory.createExtractor(HSSFTestDataSamples.getSampleFile("45565.xls"))) { + String text = extractor.getText(); + assertContains(text, "testdoc"); + assertContains(text, "test phrase"); + } + } +} diff --git a/src/ooxml/testcases/org/apache/poi/ooxml/TestPOIXMLDocument.java b/src/ooxml/testcases/org/apache/poi/ooxml/TestPOIXMLDocument.java new file mode 100644 index 0000000000..d27da22bf0 --- /dev/null +++ b/src/ooxml/testcases/org/apache/poi/ooxml/TestPOIXMLDocument.java @@ -0,0 +1,385 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.ooxml; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertSame; +import static org.junit.Assert.fail; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.lang.Thread.UncaughtExceptionHandler; +import java.lang.reflect.InvocationTargetException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; + +import org.apache.poi.POIDataSamples; +import org.apache.poi.ooxml.POIXMLDocumentPart.RelationPart; +import org.apache.poi.openxml4j.exceptions.InvalidFormatException; +import org.apache.poi.openxml4j.exceptions.OpenXML4JRuntimeException; +import org.apache.poi.openxml4j.opc.OPCPackage; +import org.apache.poi.openxml4j.opc.PackagePart; +import org.apache.poi.openxml4j.opc.PackageRelationshipTypes; +import org.apache.poi.util.IOUtils; +import org.apache.poi.util.NullOutputStream; +import org.apache.poi.ooxml.util.PackageHelper; +import org.apache.poi.util.TempFile; +import org.apache.poi.xslf.usermodel.XMLSlideShow; +import org.apache.poi.xssf.usermodel.XSSFRelation; +import org.apache.poi.xwpf.usermodel.XWPFRelation; +import org.junit.Test; + +/** + * Test recursive read and write of OPC packages + */ +public final class TestPOIXMLDocument { + + private static class OPCParser extends POIXMLDocument { + + public OPCParser(OPCPackage pkg) { + super(pkg); + } + + public OPCParser(OPCPackage pkg, String coreDocumentRel) { + super(pkg, coreDocumentRel); + } + + @Override + public List getAllEmbedds() { + throw new RuntimeException("not supported"); + } + + public void parse(POIXMLFactory factory) throws IOException{ + load(factory); + } + } + + private static final class TestFactory extends POIXMLFactory { + + public TestFactory() { + // + } + + @Override + protected POIXMLRelation getDescriptor(String relationshipType) { + return null; + } + + /** + * @since POI 3.14-Beta1 + */ + @Override + protected POIXMLDocumentPart createDocumentPart + (Class cls, Class[] classes, Object[] values) + throws SecurityException, NoSuchMethodException, InstantiationException, IllegalAccessException, InvocationTargetException { + return null; + } + } + + private static void traverse(POIXMLDocument doc) throws IOException{ + HashMap context = new HashMap<>(); + for (RelationPart p : doc.getRelationParts()){ + traverse(p, context); + } + } + + /** + * Recursively traverse a OOXML document and assert that same logical parts have the same physical instances + */ + private static void traverse(RelationPart rp, HashMap context) throws IOException{ + POIXMLDocumentPart dp = rp.getDocumentPart(); + assertEquals(rp.getRelationship().getTargetURI().toString(), dp.getPackagePart().getPartName().getName()); + + context.put(dp.getPackagePart().getPartName().getName(), dp); + for(RelationPart p : dp.getRelationParts()){ + assertNotNull(p.getRelationship().toString()); + + String uri = p.getDocumentPart().getPackagePart().getPartName().getURI().toString(); + assertEquals(uri, p.getRelationship().getTargetURI().toString()); + if (!context.containsKey(uri)) { + traverse(p, context); + } else { + POIXMLDocumentPart prev = context.get(uri); + assertSame("Duplicate POIXMLDocumentPart instance for targetURI=" + uri, prev, p.getDocumentPart()); + } + } + } + + public void assertReadWrite(OPCPackage pkg1) throws Exception { + + OPCParser doc = new OPCParser(pkg1); + doc.parse(new TestFactory()); + + traverse(doc); + + File tmp = TempFile.createTempFile("poi-ooxml", ".tmp"); + FileOutputStream out = new FileOutputStream(tmp); + doc.write(out); + out.close(); + + // Should not be able to write to an output stream that has been closed + try { + doc.write(out); + fail("Should not be able to write to an output stream that has been closed."); + } catch (final OpenXML4JRuntimeException e) { + // FIXME: A better exception class (IOException?) and message should be raised + // indicating that the document could not be written because the output stream is closed. + // see {@link org.apache.poi.openxml4j.opc.ZipPackage#saveImpl(java.io.OutputStream)} + if (e.getMessage().matches("Fail to save: an error occurs while saving the package : The part .+ failed to be saved in the stream with marshaller .+")) { + // expected + } else { + throw e; + } + } + + // Should not be able to write a document that has been closed + doc.close(); + try { + doc.write(new NullOutputStream()); + fail("Should not be able to write a document that has been closed."); + } catch (final IOException e) { + if (e.getMessage().equals("Cannot write data, document seems to have been closed already")) { + // expected + } else { + throw e; + } + } + + // Should be able to close a document multiple times, though subsequent closes will have no effect. + doc.close(); + + + @SuppressWarnings("resource") + OPCPackage pkg2 = OPCPackage.open(tmp.getAbsolutePath()); + doc = new OPCParser(pkg1); + try { + doc.parse(new TestFactory()); + traverse(doc); + + assertEquals(pkg1.getRelationships().size(), pkg2.getRelationships().size()); + + ArrayList l1 = pkg1.getParts(); + ArrayList l2 = pkg2.getParts(); + + assertEquals(l1.size(), l2.size()); + for (int i=0; i < l1.size(); i++){ + PackagePart p1 = l1.get(i); + PackagePart p2 = l2.get(i); + + assertEquals(p1.getContentType(), p2.getContentType()); + assertEquals(p1.hasRelationships(), p2.hasRelationships()); + if(p1.hasRelationships()){ + assertEquals(p1.getRelationships().size(), p2.getRelationships().size()); + } + assertEquals(p1.getPartName(), p2.getPartName()); + } + } finally { + doc.close(); + pkg1.close(); + pkg2.close(); + } + } + + @Test + public void testPPTX() throws Exception { + POIDataSamples pds = POIDataSamples.getSlideShowInstance(); + assertReadWrite(PackageHelper.open(pds.openResourceAsStream("PPTWithAttachments.pptm"))); + } + + @Test + public void testXLSX() throws Exception { + POIDataSamples pds = POIDataSamples.getSpreadSheetInstance(); + assertReadWrite(PackageHelper.open(pds.openResourceAsStream("ExcelWithAttachments.xlsm"))); + } + + @Test + public void testDOCX() throws Exception { + POIDataSamples pds = POIDataSamples.getDocumentInstance(); + assertReadWrite(PackageHelper.open(pds.openResourceAsStream("WordWithAttachments.docx"))); + } + + @Test + public void testRelationOrder() throws Exception { + POIDataSamples pds = POIDataSamples.getDocumentInstance(); + @SuppressWarnings("resource") + OPCPackage pkg = PackageHelper.open(pds.openResourceAsStream("WordWithAttachments.docx")); + OPCParser doc = new OPCParser(pkg); + try { + doc.parse(new TestFactory()); + + for(POIXMLDocumentPart rel : doc.getRelations()){ + //TODO finish me + assertNotNull(rel); + } + } finally { + doc.close(); + } + } + + @Test + public void testGetNextPartNumber() throws Exception { + POIDataSamples pds = POIDataSamples.getDocumentInstance(); + @SuppressWarnings("resource") + OPCPackage pkg = PackageHelper.open(pds.openResourceAsStream("WordWithAttachments.docx")); + OPCParser doc = new OPCParser(pkg); + try { + doc.parse(new TestFactory()); + + // Non-indexed parts: Word is taken, Excel is not + assertEquals(-1, doc.getNextPartNumber(XWPFRelation.DOCUMENT, 0)); + assertEquals(-1, doc.getNextPartNumber(XWPFRelation.DOCUMENT, -1)); + assertEquals(-1, doc.getNextPartNumber(XWPFRelation.DOCUMENT, 99)); + assertEquals(0, doc.getNextPartNumber(XSSFRelation.WORKBOOK, 0)); + assertEquals(0, doc.getNextPartNumber(XSSFRelation.WORKBOOK, -1)); + assertEquals(0, doc.getNextPartNumber(XSSFRelation.WORKBOOK, 99)); + + // Indexed parts: + // Has 2 headers + assertEquals(0, doc.getNextPartNumber(XWPFRelation.HEADER, 0)); + assertEquals(3, doc.getNextPartNumber(XWPFRelation.HEADER, -1)); + assertEquals(3, doc.getNextPartNumber(XWPFRelation.HEADER, 1)); + assertEquals(8, doc.getNextPartNumber(XWPFRelation.HEADER, 8)); + + // Has no Excel Sheets + assertEquals(0, doc.getNextPartNumber(XSSFRelation.WORKSHEET, 0)); + assertEquals(1, doc.getNextPartNumber(XSSFRelation.WORKSHEET, -1)); + assertEquals(1, doc.getNextPartNumber(XSSFRelation.WORKSHEET, 1)); + } finally { + doc.close(); + } + } + + @Test + public void testCommitNullPart() throws IOException, InvalidFormatException { + POIXMLDocumentPart part = new POIXMLDocumentPart(); + part.prepareForCommit(); + part.commit(); + part.onSave(new HashSet<>()); + + assertNull(part.getRelationById(null)); + assertNull(part.getRelationId(null)); + assertFalse(part.removeRelation(null, true)); + part.removeRelation((POIXMLDocumentPart)null); + assertEquals("",part.toString()); + part.onDocumentCreate(); + //part.getTargetPart(null); + } + + @Test + public void testVSDX() throws Exception { + POIDataSamples pds = POIDataSamples.getDiagramInstance(); + @SuppressWarnings("resource") + OPCPackage open = PackageHelper.open(pds.openResourceAsStream("test.vsdx")); + POIXMLDocument part = new OPCParser(open, PackageRelationshipTypes.VISIO_CORE_DOCUMENT); + + assertNotNull(part); + assertEquals(0, part.getRelationCounter()); + part.close(); + } + + @Test + public void testVSDXPart() throws IOException { + POIDataSamples pds = POIDataSamples.getDiagramInstance(); + OPCPackage open = PackageHelper.open(pds.openResourceAsStream("test.vsdx")); + + POIXMLDocumentPart part = new POIXMLDocumentPart(open, PackageRelationshipTypes.VISIO_CORE_DOCUMENT); + + assertNotNull(part); + assertEquals(0, part.getRelationCounter()); + + open.close(); + } + + @Test(expected=POIXMLException.class) + public void testInvalidCoreRel() throws IOException { + POIDataSamples pds = POIDataSamples.getDiagramInstance(); + OPCPackage open = PackageHelper.open(pds.openResourceAsStream("test.vsdx")); + + try { + new POIXMLDocumentPart(open, "somethingillegal"); + } finally { + open.close(); + } + } + + @Test + public void testOSGIClassLoading() { + // the schema type loader is cached per thread in POIXMLTypeLoader. + // So create a new Thread and change the context class loader (which would normally be used) + // to not contain the OOXML classes + Runnable run = new Runnable() { + public void run() { + InputStream is = POIDataSamples.getSlideShowInstance().openResourceAsStream("table_test.pptx"); + XMLSlideShow ppt = null; + try { + ppt = new XMLSlideShow(is); + ppt.getSlides().get(0).getShapes(); + } catch (IOException e) { + fail("failed to load XMLSlideShow"); + } finally { + IOUtils.closeQuietly(ppt); + IOUtils.closeQuietly(is); + } + } + }; + + Thread thread = Thread.currentThread(); + ClassLoader cl = thread.getContextClassLoader(); + UncaughtHandler uh = new UncaughtHandler(); + + // check schema type loading and check if we could run in an OOM + Thread ta[] = new Thread[30]; + for (int j=0; j<10; j++) { + for (int i=0; i(dateCreated)); + assertEquals(dateCreated, cp.getCreated()); + + XWPFDocument doc2 = XWPFTestDataSamples.writeOutAndReadBack(doc); + doc.close(); + cp = doc2.getProperties().getCoreProperties(); + Date dt3 = cp.getCreated(); + assertEquals(dateCreated, dt3); + doc2.close(); + } + + @Test + public void testGetSetRevision() { + String revision = _coreProperties.getRevision(); + assertTrue("Revision number is 1", Integer.parseInt(revision) > 1); + _coreProperties.setRevision("20"); + assertEquals("20", _coreProperties.getRevision()); + _coreProperties.setRevision("20xx"); + assertEquals("20", _coreProperties.getRevision()); + } + + @Test + public void testLastModifiedByUserProperty() { + String lastModifiedByUser = _coreProperties.getLastModifiedByUser(); + assertEquals("Paolo Mottadelli", lastModifiedByUser); + _coreProperties.setLastModifiedByUser("Test User"); + assertEquals("Test User", _coreProperties.getLastModifiedByUser()); + } + + public static boolean dateTimeEqualToUTCString(Date dateTime, String utcString) { + Calendar utcCalendar = LocaleUtil.getLocaleCalendar(LocaleUtil.TIMEZONE_UTC); + utcCalendar.setTimeInMillis(dateTime.getTime()); + String dateTimeUtcString = utcCalendar.get(Calendar.YEAR) + "-" + + zeroPad((utcCalendar.get(Calendar.MONTH)+1)) + "-" + + zeroPad(utcCalendar.get(Calendar.DAY_OF_MONTH)) + "T" + + zeroPad(utcCalendar.get(Calendar.HOUR_OF_DAY)) + ":" + + zeroPad(utcCalendar.get(Calendar.MINUTE)) + ":" + + zeroPad(utcCalendar.get(Calendar.SECOND)) + "Z"; + + return utcString.equals(dateTimeUtcString); + } + + @Ignore("Fails to add some of the thumbnails, needs more investigation") + @Test + public void testThumbnails() throws Exception { + POIXMLProperties noThumbProps = sampleNoThumb.getProperties(); + + assertNotNull(_props.getThumbnailPart()); + assertNull(noThumbProps.getThumbnailPart()); + + assertNotNull(_props.getThumbnailFilename()); + assertNull(noThumbProps.getThumbnailFilename()); + + assertNotNull(_props.getThumbnailImage()); + assertNull(noThumbProps.getThumbnailImage()); + + assertEquals("/thumbnail.jpeg", _props.getThumbnailFilename()); + + + // Adding / changing + ByteArrayInputStream imageData = new ByteArrayInputStream(new byte[1]); + noThumbProps.setThumbnail("Testing.png", imageData); + assertNotNull(noThumbProps.getThumbnailPart()); + assertEquals("/Testing.png", noThumbProps.getThumbnailFilename()); + assertNotNull(noThumbProps.getThumbnailImage()); + assertEquals(1, IOUtils.toByteArray(noThumbProps.getThumbnailImage()).length); + + imageData = new ByteArrayInputStream(new byte[2]); + noThumbProps.setThumbnail("Testing2.png", imageData); + assertNotNull(noThumbProps.getThumbnailPart()); + assertEquals("/Testing.png", noThumbProps.getThumbnailFilename()); + assertNotNull(noThumbProps.getThumbnailImage()); + assertEquals(2, IOUtils.toByteArray(noThumbProps.getThumbnailImage()).length); + } + + private static String zeroPad(long i) { + if (i >= 0 && i <=9) { + return "0" + i; + } else { + return String.valueOf(i); + } + } +} diff --git a/src/ooxml/testcases/org/apache/poi/ooxml/util/OOXMLLite.java b/src/ooxml/testcases/org/apache/poi/ooxml/util/OOXMLLite.java new file mode 100644 index 0000000000..e17b684d6f --- /dev/null +++ b/src/ooxml/testcases/org/apache/poi/ooxml/util/OOXMLLite.java @@ -0,0 +1,340 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.ooxml.util; + +import java.io.File; +import java.io.IOException; +import java.lang.reflect.Field; +import java.lang.reflect.Method; +import java.net.URL; +import java.security.AccessController; +import java.security.CodeSource; +import java.security.PrivilegedAction; +import java.security.ProtectionDomain; +import java.util.ArrayList; +import java.util.Enumeration; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.Vector; +import java.util.jar.JarEntry; +import java.util.jar.JarFile; +import java.util.regex.Pattern; + +import junit.framework.TestCase; + +import org.apache.poi.util.IOUtils; +import org.apache.poi.util.StringUtil; +import org.apache.poi.util.SuppressForbidden; +import org.junit.Test; +import org.junit.internal.TextListener; +import org.junit.runner.Description; +import org.junit.runner.JUnitCore; +import org.junit.runner.Result; + +/** + * Build a 'lite' version of the ooxml-schemas.jar + * + * @author Yegor Kozlov + */ +public final class OOXMLLite { + private static final Pattern SCHEMA_PATTERN = Pattern.compile("schemaorg_apache_xmlbeans/(system|element)/.*\\.xsb"); + + /** + * Destination directory to copy filtered classes + */ + private File _destDest; + + /** + * Directory with the compiled ooxml tests + */ + private File _testDir; + + /** + * Reference to the ooxml-schemas.jar + */ + private File _ooxmlJar; + + + OOXMLLite(String dest, String test, String ooxmlJar) { + _destDest = new File(dest); + _testDir = new File(test); + _ooxmlJar = new File(ooxmlJar); + } + + public static void main(String[] args) throws IOException { + System.out.println("Free memory (bytes): " + + Runtime.getRuntime().freeMemory()); + long maxMemory = Runtime.getRuntime().maxMemory(); + System.out.println("Maximum memory (bytes): " + + (maxMemory == Long.MAX_VALUE ? "no limit" : maxMemory)); + System.out.println("Total memory (bytes): " + + Runtime.getRuntime().totalMemory()); + + String dest = null, test = null, ooxml = null; + + for (int i = 0; i < args.length; i++) { + switch (args[i]) { + case "-dest": + dest = args[++i]; + break; + case "-test": + test = args[++i]; + break; + case "-ooxml": + ooxml = args[++i]; + break; + } + } + OOXMLLite builder = new OOXMLLite(dest, test, ooxml); + builder.build(); + } + + void build() throws IOException { + List> lst = new ArrayList<>(); + //collect unit tests + String exclude = StringUtil.join("|", + "BaseTestXWorkbook", + "BaseTestXSheet", + "BaseTestXRow", + "BaseTestXCell", + "BaseTestXSSFPivotTable", + "TestSXSSFWorkbook\\$\\d", + "TestUnfixedBugs", + "MemoryUsage", + "TestDataProvider", + "TestDataSamples", + "All.+Tests", + "ZipFileAssert", + "AesZipFileZipEntrySource", + "TempFileRecordingSXSSFWorkbookWithCustomZipEntrySource", + "PkiTestUtils", + "TestCellFormatPart\\$\\d", + "TestSignatureInfo\\$\\d", + "TestCertificateEncryption\\$CertData", + "TestPOIXMLDocument\\$OPCParser", + "TestPOIXMLDocument\\$TestFactory", + "TestXSLFTextParagraph\\$DrawTextParagraphProxy", + "TestXSSFExportToXML\\$\\d", + "TestXSSFExportToXML\\$DummyEntityResolver", + "TestFormulaEvaluatorOnXSSF\\$Result", + "TestFormulaEvaluatorOnXSSF\\$SS", + "TestMultiSheetFormulaEvaluatorOnXSSF\\$Result", + "TestMultiSheetFormulaEvaluatorOnXSSF\\$SS", + "TestXSSFBugs\\$\\d", + "AddImageBench", + "AddImageBench_jmhType_B\\d", + "AddImageBench_benchCreatePicture_jmhTest", + "TestEvilUnclosedBRFixingInputStream\\$EvilUnclosedBRFixingInputStream", + "TempFileRecordingSXSSFWorkbookWithCustomZipEntrySource\\$TempFileRecordingSheetDataWriterWithDecorator", + "TestXSSFBReader\\$1", + "TestXSSFBReader\\$TestSheetHandler", + "TestFormulaEvaluatorOnXSSF\\$1", + "TestMultiSheetFormulaEvaluatorOnXSSF\\$1", + "TestZipPackagePropertiesMarshaller\\$1", + "SLCommonUtils", + "TestPPTX2PNG\\$1", + "TestMatrixFormulasFromXMLSpreadsheet\\$1", + "TestMatrixFormulasFromXMLSpreadsheet\\$Navigator", + "TestPOIXMLDocument\\$UncaughtHandler", + "TestOleShape\\$Api", + "TestOleShape\\$1", + "TestPOIXMLDocument\\$1", + "TestXMLSlideShow\\$1", + "TestXMLSlideShow\\$BufAccessBAOS", + "TestXDDFChart\\$1", + "TestOOXMLLister\\$1", + "TestOOXMLPrettyPrint\\$1" + ); + System.out.println("Collecting unit tests from " + _testDir); + collectTests(_testDir, _testDir, lst, ".+.class$", ".+(" + exclude + ").class"); + System.out.println("Found " + lst.size() + " classes"); + + //run tests + JUnitCore jUnitCore = new JUnitCore(); + jUnitCore.addListener(new TextListener(System.out) { + private final Set classes = new HashSet<>(); + private int count; + + @Override + public void testStarted(Description description) { + // count how many test-classes we already saw + classes.add(description.getClassName()); + count++; + if(count % 100 == 0) { + System.out.println(); + System.out.println(classes.size() + "/" + lst.size() + ": " + description.getDisplayName()); + } + + super.testStarted(description); + } + }); + Result result = jUnitCore.run(lst.toArray(new Class[0])); + if (!result.wasSuccessful()) { + throw new RuntimeException("Tests did not succeed, cannot build ooxml-lite jar"); + } + + //see what classes from the ooxml-schemas.jar are loaded + System.out.println("Copying classes to " + _destDest); + Map> classes = getLoadedClasses(_ooxmlJar.getName()); + for (Class cls : classes.values()) { + String className = cls.getName(); + String classRef = className.replace('.', '/') + ".class"; + File destFile = new File(_destDest, classRef); + IOUtils.copy(cls.getResourceAsStream('/' + classRef), destFile); + + if(cls.isInterface()){ + /// Copy classes and interfaces declared as members of this class + for(Class fc : cls.getDeclaredClasses()){ + className = fc.getName(); + classRef = className.replace('.', '/') + ".class"; + destFile = new File(_destDest, classRef); + IOUtils.copy(fc.getResourceAsStream('/' + classRef), destFile); + } + } + } + + //finally copy the compiled .xsb files + System.out.println("Copying .xsb resources"); + try (JarFile jar = new JarFile(_ooxmlJar)) { + for (Enumeration e = jar.entries(); e.hasMoreElements(); ) { + JarEntry je = e.nextElement(); + if (SCHEMA_PATTERN.matcher(je.getName()).matches()) { + File destFile = new File(_destDest, je.getName()); + IOUtils.copy(jar.getInputStream(je), destFile); + } + } + } + } + + private static boolean checkForTestAnnotation(Class testclass) { + for (Method m : testclass.getDeclaredMethods()) { + if(m.isAnnotationPresent(Test.class)) { + return true; + } + } + + // also check super classes + if(testclass.getSuperclass() != null) { + for (Method m : testclass.getSuperclass().getDeclaredMethods()) { + if(m.isAnnotationPresent(Test.class)) { + return true; + } + } + } + + System.out.println("Class " + testclass.getName() + " does not derive from TestCase and does not have a @Test annotation"); + + // Should we also look at superclasses to find cases + // where we have abstract base classes with derived tests? + // if(checkForTestAnnotation(testclass.getSuperclass())) return true; + + return false; + } + + /** + * Recursively collect classes from the supplied directory + * + * @param arg the directory to search in + * @param out output + * @param ptrn the pattern (regexp) to filter found files + */ + private static void collectTests(File root, File arg, List> out, String ptrn, String exclude) { + if (arg.isDirectory()) { + File files[] = arg.listFiles(); + if (files != null) { + for (File f : files) { + collectTests(root, f, out, ptrn, exclude); + } + } + } else { + String path = arg.getAbsolutePath(); + String prefix = root.getAbsolutePath(); + String cls = path.substring(prefix.length() + 1).replace(File.separator, "."); + if(!cls.matches(ptrn)) return; + if (cls.matches(exclude)) return; + //ignore inner classes defined in tests + if (cls.indexOf('$') != -1) { + System.out.println("Inner class " + cls + " not included"); + return; + } + + cls = cls.replace(".class", ""); + + try { + Class testclass = Class.forName(cls); + if (TestCase.class.isAssignableFrom(testclass) + || checkForTestAnnotation(testclass)) { + out.add(testclass); + } + } catch (Throwable e) { // NOSONAR + System.out.println("Class " + cls + " is not in classpath"); + } + } + } + + /** + * + * @param ptrn the pattern to filter output + * @return the classes loaded by the system class loader keyed by class name + */ + @SuppressWarnings("unchecked") + private static Map> getLoadedClasses(String ptrn) { + // make the field accessible, we defer this from static initialization to here to + // allow JDKs which do not have this field (e.g. IBM JDK) to at least load the class + // without failing, see https://issues.apache.org/bugzilla/show_bug.cgi?id=56550 + final Field _classes = AccessController.doPrivileged(new PrivilegedAction() { + @SuppressForbidden("TODO: Reflection works until Java 8 on Oracle/Sun JDKs, but breaks afterwards (different classloader types, access checks)") + public Field run() { + try { + Field fld = ClassLoader.class.getDeclaredField("classes"); + fld.setAccessible(true); + return fld; + } catch (Exception e) { + throw new RuntimeException(e); + } + + } + }); + + ClassLoader appLoader = ClassLoader.getSystemClassLoader(); + try { + Vector> classes = (Vector>) _classes.get(appLoader); + Map> map = new HashMap<>(); + for (Class cls : classes) { + // e.g. proxy-classes, ... + ProtectionDomain pd = cls.getProtectionDomain(); + if (pd == null) continue; + CodeSource cs = pd.getCodeSource(); + if (cs == null) continue; + URL loc = cs.getLocation(); + if (loc == null) continue; + + String jar = loc.toString(); + if (jar.contains(ptrn)) { + map.put(cls.getName(), cls); + } + } + return map; + } catch (IllegalAccessException e) { + throw new RuntimeException(e); + } + } +} diff --git a/src/ooxml/testcases/org/apache/poi/ooxml/util/TestSAXHelper.java b/src/ooxml/testcases/org/apache/poi/ooxml/util/TestSAXHelper.java new file mode 100644 index 0000000000..825cdf40f5 --- /dev/null +++ b/src/ooxml/testcases/org/apache/poi/ooxml/util/TestSAXHelper.java @@ -0,0 +1,45 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.ooxml.util; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNotSame; +import static org.junit.Assert.assertTrue; + +import java.io.ByteArrayInputStream; + +import javax.xml.XMLConstants; + +import org.junit.Test; +import org.xml.sax.InputSource; +import org.xml.sax.XMLReader; + +public class TestSAXHelper { + @Test + public void testXMLReader() throws Exception { + XMLReader reader = SAXHelper.newXMLReader(); + assertNotSame(reader, SAXHelper.newXMLReader()); + assertTrue(reader.getFeature(XMLConstants.FEATURE_SECURE_PROCESSING)); + assertEquals(SAXHelper.IGNORING_ENTITY_RESOLVER, reader.getEntityResolver()); + assertNotNull(reader.getProperty("http://www.oracle.com/xml/jaxp/properties/entityExpansionLimit")); + assertEquals("4096", reader.getProperty("http://www.oracle.com/xml/jaxp/properties/entityExpansionLimit")); + assertNotNull(reader.getProperty("http://apache.org/xml/properties/security-manager")); + + reader.parse(new InputSource(new ByteArrayInputStream("".getBytes("UTF-8")))); + } +} diff --git a/src/ooxml/testcases/org/apache/poi/util/TestSAXHelper.java b/src/ooxml/testcases/org/apache/poi/util/TestSAXHelper.java deleted file mode 100644 index 04f3a7a267..0000000000 --- a/src/ooxml/testcases/org/apache/poi/util/TestSAXHelper.java +++ /dev/null @@ -1,42 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi.util; - -import javax.xml.XMLConstants; - -import org.junit.Test; -import org.xml.sax.InputSource; -import org.xml.sax.XMLReader; - -import java.io.ByteArrayInputStream; - -import static org.junit.Assert.*; - -public class TestSAXHelper { - @Test - public void testXMLReader() throws Exception { - XMLReader reader = SAXHelper.newXMLReader(); - assertNotSame(reader, SAXHelper.newXMLReader()); - assertTrue(reader.getFeature(XMLConstants.FEATURE_SECURE_PROCESSING)); - assertEquals(SAXHelper.IGNORING_ENTITY_RESOLVER, reader.getEntityResolver()); - assertNotNull(reader.getProperty("http://www.oracle.com/xml/jaxp/properties/entityExpansionLimit")); - assertEquals("4096", reader.getProperty("http://www.oracle.com/xml/jaxp/properties/entityExpansionLimit")); - assertNotNull(reader.getProperty("http://apache.org/xml/properties/security-manager")); - - reader.parse(new InputSource(new ByteArrayInputStream("".getBytes("UTF-8")))); - } -} diff --git a/src/scratchpad/src/org/apache/poi/POIReadOnlyDocument.java b/src/scratchpad/src/org/apache/poi/POIReadOnlyDocument.java deleted file mode 100644 index 3b3eca588d..0000000000 --- a/src/scratchpad/src/org/apache/poi/POIReadOnlyDocument.java +++ /dev/null @@ -1,75 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi; - -import java.io.File; -import java.io.OutputStream; - -import org.apache.poi.poifs.filesystem.DirectoryNode; -import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; -import org.apache.poi.poifs.filesystem.OPOIFSFileSystem; -import org.apache.poi.poifs.filesystem.POIFSFileSystem; - - -/** - * This holds the common functionality for all read-only - * POI Document classes, i.e. ones which don't support writing. - * - * @since POI 3.15 beta 3 - */ -public abstract class POIReadOnlyDocument extends POIDocument { - public POIReadOnlyDocument(DirectoryNode dir) { - super(dir); - } - public POIReadOnlyDocument(NPOIFSFileSystem fs) { - super(fs); - } - public POIReadOnlyDocument(OPOIFSFileSystem fs) { - super(fs); - } - public POIReadOnlyDocument(POIFSFileSystem fs) { - super(fs); - } - - /** - * Note - writing is not yet supported for this file format, sorry. - * - * @throws IllegalStateException If you call the method, as writing is not supported - */ - @Override - public void write() { - throw new IllegalStateException("Writing is not yet implemented for this Document Format"); - } - /** - * Note - writing is not yet supported for this file format, sorry. - * - * @throws IllegalStateException If you call the method, as writing is not supported - */ - @Override - public void write(File file) { - throw new IllegalStateException("Writing is not yet implemented for this Document Format"); - } - /** - * Note - writing is not yet supported for this file format, sorry. - * - * @throws IllegalStateException If you call the method, as writing is not supported - */ - @Override - public void write(OutputStream out) { - throw new IllegalStateException("Writing is not yet implemented for this Document Format"); - } -} diff --git a/src/scratchpad/src/org/apache/poi/extractor/OLE2ScratchpadExtractorFactory.java b/src/scratchpad/src/org/apache/poi/extractor/OLE2ScratchpadExtractorFactory.java deleted file mode 100644 index f77d0834f9..0000000000 --- a/src/scratchpad/src/org/apache/poi/extractor/OLE2ScratchpadExtractorFactory.java +++ /dev/null @@ -1,148 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi.extractor; - -import java.io.ByteArrayInputStream; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.InputStream; -import java.util.Iterator; -import java.util.List; - -import org.apache.poi.POIOLE2TextExtractor; -import org.apache.poi.POITextExtractor; -import org.apache.poi.hdgf.extractor.VisioTextExtractor; -import org.apache.poi.hpbf.extractor.PublisherTextExtractor; -import org.apache.poi.hslf.extractor.PowerPointExtractor; -import org.apache.poi.hslf.usermodel.HSLFSlideShow; -import org.apache.poi.hsmf.MAPIMessage; -import org.apache.poi.hsmf.datatypes.AttachmentChunks; -import org.apache.poi.hsmf.extractor.OutlookTextExtactor; -import org.apache.poi.hwpf.OldWordFileFormatException; -import org.apache.poi.hwpf.extractor.Word6Extractor; -import org.apache.poi.hwpf.extractor.WordExtractor; -import org.apache.poi.poifs.filesystem.DirectoryEntry; -import org.apache.poi.poifs.filesystem.DirectoryNode; -import org.apache.poi.poifs.filesystem.Entry; -import org.apache.poi.sl.extractor.SlideShowExtractor; -import org.apache.poi.sl.usermodel.SlideShowFactory; - -/** - * Scratchpad-specific logic for {@link OLE2ExtractorFactory} and - * {@link org.apache.poi.extractor.ExtractorFactory}, which permit the other two to run with - * no Scratchpad jar (though without functionality!) - *

Note - should not be used standalone, always use via the other - * two classes

- */ -@SuppressWarnings("WeakerAccess") -public class OLE2ScratchpadExtractorFactory { - /** - * Look for certain entries in the stream, to figure it - * out what format is desired - * Note - doesn't check for core-supported formats! - * Note - doesn't check for OOXML-supported formats - */ - public static POITextExtractor createExtractor(DirectoryNode poifsDir) throws IOException { - if (poifsDir.hasEntry("WordDocument")) { - // Old or new style word document? - try { - return new WordExtractor(poifsDir); - } catch (OldWordFileFormatException e) { - return new Word6Extractor(poifsDir); - } - } - - if (poifsDir.hasEntry(HSLFSlideShow.POWERPOINT_DOCUMENT)) { - return new SlideShowExtractor(SlideShowFactory.create(poifsDir)); - } - - if (poifsDir.hasEntry("VisioDocument")) { - return new VisioTextExtractor(poifsDir); - } - - if (poifsDir.hasEntry("Quill")) { - return new PublisherTextExtractor(poifsDir); - } - - final String[] outlookEntryNames = new String[] { - // message bodies, saved as plain text (PtypString) - // The first short (0x1000, 0x0047, 0x0037) refer to the Property ID (see [MS-OXPROPS].pdf) - // the second short (0x001e, 0x001f, 0x0102) refer to the type of data stored in this entry - // https://msdn.microsoft.com/endatatypes.Ex-us/library/cc433490(v=exchg.80).aspx - // @see org.apache.poi.hsmf.Types.MAPIType - "__substg1.0_1000001E", //PidTagBody ASCII - "__substg1.0_1000001F", //PidTagBody Unicode - "__substg1.0_0047001E", //PidTagMessageSubmissionId ASCII - "__substg1.0_0047001F", //PidTagMessageSubmissionId Unicode - "__substg1.0_0037001E", //PidTagSubject ASCII - "__substg1.0_0037001F", //PidTagSubject Unicode - }; - for (String entryName : outlookEntryNames) { - if (poifsDir.hasEntry(entryName)) { - return new OutlookTextExtactor(poifsDir); - } - } - - throw new IllegalArgumentException("No supported documents found in the OLE2 stream"); - } - - /** - * Returns an array of text extractors, one for each of - * the embedded documents in the file (if there are any). - * If there are no embedded documents, you'll get back an - * empty array. Otherwise, you'll get one open - * {@link POITextExtractor} for each embedded file. - */ - public static void identifyEmbeddedResources(POIOLE2TextExtractor ext, List dirs, List nonPOIFS) throws IOException { - // Find all the embedded directories - DirectoryEntry root = ext.getRoot(); - if (root == null) { - throw new IllegalStateException("The extractor didn't know which POIFS it came from!"); - } - - if (ext instanceof WordExtractor) { - // These are in ObjectPool -> _... under the root - try { - DirectoryEntry op = (DirectoryEntry) - root.getEntry("ObjectPool"); - Iterator it = op.getEntries(); - while(it.hasNext()) { - Entry entry = it.next(); - if(entry.getName().startsWith("_")) { - dirs.add(entry); - } - } - } catch(FileNotFoundException e) { - // ignored here - } - //} else if(ext instanceof PowerPointExtractor) { - // Tricky, not stored directly in poifs - // TODO - } else if (ext instanceof OutlookTextExtactor) { - // Stored in the Attachment blocks - MAPIMessage msg = ((OutlookTextExtactor)ext).getMAPIMessage(); - for (AttachmentChunks attachment : msg.getAttachmentFiles()) { - if (attachment.getAttachData() != null) { - byte[] data = attachment.getAttachData().getValue(); - nonPOIFS.add( new ByteArrayInputStream(data) ); - } else if (attachment.getAttachmentDirectory() != null) { - dirs.add(attachment.getAttachmentDirectory().getDirectory()); - } - } - } - } -} diff --git a/src/scratchpad/src/org/apache/poi/extractor/ole2/OLE2ScratchpadExtractorFactory.java b/src/scratchpad/src/org/apache/poi/extractor/ole2/OLE2ScratchpadExtractorFactory.java new file mode 100644 index 0000000000..1e3ebdc74e --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/extractor/ole2/OLE2ScratchpadExtractorFactory.java @@ -0,0 +1,148 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.extractor.ole2; + +import java.io.ByteArrayInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.util.Iterator; +import java.util.List; + +import org.apache.poi.extractor.POIOLE2TextExtractor; +import org.apache.poi.extractor.POITextExtractor; +import org.apache.poi.extractor.OLE2ExtractorFactory; +import org.apache.poi.hdgf.extractor.VisioTextExtractor; +import org.apache.poi.hpbf.extractor.PublisherTextExtractor; +import org.apache.poi.hslf.usermodel.HSLFSlideShow; +import org.apache.poi.hsmf.MAPIMessage; +import org.apache.poi.hsmf.datatypes.AttachmentChunks; +import org.apache.poi.hsmf.extractor.OutlookTextExtactor; +import org.apache.poi.hwpf.OldWordFileFormatException; +import org.apache.poi.hwpf.extractor.Word6Extractor; +import org.apache.poi.hwpf.extractor.WordExtractor; +import org.apache.poi.poifs.filesystem.DirectoryEntry; +import org.apache.poi.poifs.filesystem.DirectoryNode; +import org.apache.poi.poifs.filesystem.Entry; +import org.apache.poi.sl.extractor.SlideShowExtractor; +import org.apache.poi.sl.usermodel.SlideShowFactory; + +/** + * Scratchpad-specific logic for {@link OLE2ExtractorFactory} and + * {@link org.apache.poi.extractor.ExtractorFactory}, which permit the other two to run with + * no Scratchpad jar (though without functionality!) + *

Note - should not be used standalone, always use via the other + * two classes

+ */ +@SuppressWarnings("WeakerAccess") +public class OLE2ScratchpadExtractorFactory { + /** + * Look for certain entries in the stream, to figure it + * out what format is desired + * Note - doesn't check for core-supported formats! + * Note - doesn't check for OOXML-supported formats + */ + public static POITextExtractor createExtractor(DirectoryNode poifsDir) throws IOException { + if (poifsDir.hasEntry("WordDocument")) { + // Old or new style word document? + try { + return new WordExtractor(poifsDir); + } catch (OldWordFileFormatException e) { + return new Word6Extractor(poifsDir); + } + } + + if (poifsDir.hasEntry(HSLFSlideShow.POWERPOINT_DOCUMENT)) { + return new SlideShowExtractor(SlideShowFactory.create(poifsDir)); + } + + if (poifsDir.hasEntry("VisioDocument")) { + return new VisioTextExtractor(poifsDir); + } + + if (poifsDir.hasEntry("Quill")) { + return new PublisherTextExtractor(poifsDir); + } + + final String[] outlookEntryNames = new String[] { + // message bodies, saved as plain text (PtypString) + // The first short (0x1000, 0x0047, 0x0037) refer to the Property ID (see [MS-OXPROPS].pdf) + // the second short (0x001e, 0x001f, 0x0102) refer to the type of data stored in this entry + // https://msdn.microsoft.com/endatatypes.Ex-us/library/cc433490(v=exchg.80).aspx + // @see org.apache.poi.hsmf.Types.MAPIType + "__substg1.0_1000001E", //PidTagBody ASCII + "__substg1.0_1000001F", //PidTagBody Unicode + "__substg1.0_0047001E", //PidTagMessageSubmissionId ASCII + "__substg1.0_0047001F", //PidTagMessageSubmissionId Unicode + "__substg1.0_0037001E", //PidTagSubject ASCII + "__substg1.0_0037001F", //PidTagSubject Unicode + }; + for (String entryName : outlookEntryNames) { + if (poifsDir.hasEntry(entryName)) { + return new OutlookTextExtactor(poifsDir); + } + } + + throw new IllegalArgumentException("No supported documents found in the OLE2 stream"); + } + + /** + * Returns an array of text extractors, one for each of + * the embedded documents in the file (if there are any). + * If there are no embedded documents, you'll get back an + * empty array. Otherwise, you'll get one open + * {@link POITextExtractor} for each embedded file. + */ + public static void identifyEmbeddedResources(POIOLE2TextExtractor ext, List dirs, List nonPOIFS) throws IOException { + // Find all the embedded directories + DirectoryEntry root = ext.getRoot(); + if (root == null) { + throw new IllegalStateException("The extractor didn't know which POIFS it came from!"); + } + + if (ext instanceof WordExtractor) { + // These are in ObjectPool -> _... under the root + try { + DirectoryEntry op = (DirectoryEntry) + root.getEntry("ObjectPool"); + Iterator it = op.getEntries(); + while(it.hasNext()) { + Entry entry = it.next(); + if(entry.getName().startsWith("_")) { + dirs.add(entry); + } + } + } catch(FileNotFoundException e) { + // ignored here + } + //} else if(ext instanceof PowerPointExtractor) { + // Tricky, not stored directly in poifs + // TODO + } else if (ext instanceof OutlookTextExtactor) { + // Stored in the Attachment blocks + MAPIMessage msg = ((OutlookTextExtactor)ext).getMAPIMessage(); + for (AttachmentChunks attachment : msg.getAttachmentFiles()) { + if (attachment.getAttachData() != null) { + byte[] data = attachment.getAttachData().getValue(); + nonPOIFS.add( new ByteArrayInputStream(data) ); + } else if (attachment.getAttachmentDirectory() != null) { + dirs.add(attachment.getAttachmentDirectory().getDirectory()); + } + } + } + } +} diff --git a/src/scratchpad/src/org/apache/poi/hssf/converter/ExcelToFoUtils.java b/src/scratchpad/src/org/apache/poi/hssf/converter/ExcelToFoUtils.java deleted file mode 100644 index 2682550b2a..0000000000 --- a/src/scratchpad/src/org/apache/poi/hssf/converter/ExcelToFoUtils.java +++ /dev/null @@ -1,25 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi.hssf.converter; - -import org.apache.poi.util.Beta; - -@Beta -public class ExcelToFoUtils extends AbstractExcelUtils -{ - -} diff --git a/src/scratchpad/src/org/apache/poi/hssf/usermodel/HSSFChart.java b/src/scratchpad/src/org/apache/poi/hssf/usermodel/HSSFChart.java deleted file mode 100644 index f91781dc64..0000000000 --- a/src/scratchpad/src/org/apache/poi/hssf/usermodel/HSSFChart.java +++ /dev/null @@ -1,1371 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ - -package org.apache.poi.hssf.usermodel; - -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; - -import org.apache.poi.hssf.record.BOFRecord; -import org.apache.poi.hssf.record.DimensionsRecord; -import org.apache.poi.hssf.record.EOFRecord; -import org.apache.poi.hssf.record.FooterRecord; -import org.apache.poi.hssf.record.HCenterRecord; -import org.apache.poi.hssf.record.HeaderRecord; -import org.apache.poi.hssf.record.PrintSetupRecord; -import org.apache.poi.hssf.record.ProtectRecord; -import org.apache.poi.hssf.record.Record; -import org.apache.poi.hssf.record.RecordBase; -import org.apache.poi.hssf.record.SCLRecord; -import org.apache.poi.hssf.record.UnknownRecord; -import org.apache.poi.hssf.record.VCenterRecord; -import org.apache.poi.hssf.record.chart.AreaFormatRecord; -import org.apache.poi.hssf.record.chart.AxisLineFormatRecord; -import org.apache.poi.hssf.record.chart.AxisOptionsRecord; -import org.apache.poi.hssf.record.chart.AxisParentRecord; -import org.apache.poi.hssf.record.chart.AxisRecord; -import org.apache.poi.hssf.record.chart.AxisUsedRecord; -import org.apache.poi.hssf.record.chart.BarRecord; -import org.apache.poi.hssf.record.chart.BeginRecord; -import org.apache.poi.hssf.record.chart.CategorySeriesAxisRecord; -import org.apache.poi.hssf.record.chart.ChartFormatRecord; -import org.apache.poi.hssf.record.chart.ChartRecord; -import org.apache.poi.hssf.record.chart.ChartTitleFormatRecord; -import org.apache.poi.hssf.record.chart.DataFormatRecord; -import org.apache.poi.hssf.record.chart.DefaultDataLabelTextPropertiesRecord; -import org.apache.poi.hssf.record.chart.EndRecord; -import org.apache.poi.hssf.record.chart.FontBasisRecord; -import org.apache.poi.hssf.record.chart.FontIndexRecord; -import org.apache.poi.hssf.record.chart.FrameRecord; -import org.apache.poi.hssf.record.chart.LegendRecord; -import org.apache.poi.hssf.record.chart.LineFormatRecord; -import org.apache.poi.hssf.record.chart.LinkedDataRecord; -import org.apache.poi.hssf.record.chart.PlotAreaRecord; -import org.apache.poi.hssf.record.chart.PlotGrowthRecord; -import org.apache.poi.hssf.record.chart.SeriesIndexRecord; -import org.apache.poi.hssf.record.chart.SeriesRecord; -import org.apache.poi.hssf.record.chart.SeriesTextRecord; -import org.apache.poi.hssf.record.chart.SeriesToChartGroupRecord; -import org.apache.poi.hssf.record.chart.SheetPropertiesRecord; -import org.apache.poi.hssf.record.chart.TextRecord; -import org.apache.poi.hssf.record.chart.TickRecord; -import org.apache.poi.hssf.record.chart.UnitsRecord; -import org.apache.poi.hssf.record.chart.ValueRangeRecord; -import org.apache.poi.ss.formula.ptg.Area3DPtg; -import org.apache.poi.ss.formula.ptg.AreaPtgBase; -import org.apache.poi.ss.formula.ptg.Ptg; -import org.apache.poi.ss.util.CellRangeAddress; -import org.apache.poi.ss.util.CellRangeAddressBase; - -/** - * Has methods for construction of a chart object. - * - * @author Glen Stampoultzis (glens at apache.org) - */ -public final class HSSFChart { - private HSSFSheet sheet; - private ChartRecord chartRecord; - - private LegendRecord legendRecord; - @SuppressWarnings("unused") - private ChartTitleFormatRecord chartTitleFormat; - private SeriesTextRecord chartTitleText; - private List valueRanges = new ArrayList<>(); - - private HSSFChartType type = HSSFChartType.Unknown; - - private List series = new ArrayList<>(); - - public enum HSSFChartType { - Area { - @Override - public short getSid() { - return 0x101A; - } - }, - Bar { - @Override - public short getSid() { - return 0x1017; - } - }, - Line { - @Override - public short getSid() { - return 0x1018; - } - }, - Pie { - @Override - public short getSid() { - return 0x1019; - } - }, - Scatter { - @Override - public short getSid() { - return 0x101B; - } - }, - Unknown { - @Override - public short getSid() { - return 0; - } - }; - - public abstract short getSid(); - } - - private HSSFChart(HSSFSheet sheet, ChartRecord chartRecord) { - this.chartRecord = chartRecord; - this.sheet = sheet; - } - - /** - * Creates a bar chart. API needs some work. :) - *

- * NOTE: Does not yet work... checking it in just so others - * can take a look. - */ - public void createBarChart( HSSFWorkbook workbook, HSSFSheet parentSheet ) - { - - List records = new ArrayList<>(); - records.add( createMSDrawingObjectRecord() ); - records.add( createOBJRecord() ); - records.add( createBOFRecord() ); - records.add(new HeaderRecord("")); - records.add(new FooterRecord("")); - records.add( createHCenterRecord() ); - records.add( createVCenterRecord() ); - records.add( createPrintSetupRecord() ); - // unknown 33 - records.add( createFontBasisRecord1() ); - records.add( createFontBasisRecord2() ); - records.add(new ProtectRecord(false)); - records.add( createUnitsRecord() ); - records.add( createChartRecord( 0, 0, 30434904, 19031616 ) ); - records.add( createBeginRecord() ); - records.add( createSCLRecord( (short) 1, (short) 1 ) ); - records.add( createPlotGrowthRecord( 65536, 65536 ) ); - records.add( createFrameRecord1() ); - records.add( createBeginRecord() ); - records.add( createLineFormatRecord(true) ); - records.add( createAreaFormatRecord1() ); - records.add( createEndRecord() ); - records.add( createSeriesRecord() ); - records.add( createBeginRecord() ); - records.add( createTitleLinkedDataRecord() ); - records.add( createValuesLinkedDataRecord() ); - records.add( createCategoriesLinkedDataRecord() ); - records.add( createDataFormatRecord() ); - // records.add(createBeginRecord()); - // unknown - // records.add(createEndRecord()); - records.add( createSeriesToChartGroupRecord() ); - records.add( createEndRecord() ); - records.add( createSheetPropsRecord() ); - records.add( createDefaultTextRecord( DefaultDataLabelTextPropertiesRecord.CATEGORY_DATA_TYPE_ALL_TEXT_CHARACTERISTIC ) ); - records.add( createAllTextRecord() ); - records.add( createBeginRecord() ); - // unknown - records.add( createFontIndexRecord( 5 ) ); - records.add( createDirectLinkRecord() ); - records.add( createEndRecord() ); - records.add( createDefaultTextRecord( (short) 3 ) ); // eek, undocumented text type - records.add( createUnknownTextRecord() ); - records.add( createBeginRecord() ); - records.add( createFontIndexRecord( (short) 6 ) ); - records.add( createDirectLinkRecord() ); - records.add( createEndRecord() ); - - records.add( createAxisUsedRecord( (short) 1 ) ); - createAxisRecords( records ); - - records.add( createEndRecord() ); - records.add( createDimensionsRecord() ); - records.add( createSeriesIndexRecord(2) ); - records.add( createSeriesIndexRecord(1) ); - records.add( createSeriesIndexRecord(3) ); - records.add(EOFRecord.instance); - - - - parentSheet.insertChartRecords( records ); - workbook.insertChartRecord(); - } - - /** - * Returns all the charts for the given sheet. - * - * NOTE: You won't be able to do very much with - * these charts yet, as this is very limited support - */ - public static HSSFChart[] getSheetCharts(HSSFSheet sheet) { - List charts = new ArrayList<>(); - HSSFChart lastChart = null; - HSSFSeries lastSeries = null; - // Find records of interest - List records = sheet.getSheet().getRecords(); - for(RecordBase r : records) { - - if(r instanceof ChartRecord) { - lastSeries = null; - lastChart = new HSSFChart(sheet,(ChartRecord)r); - charts.add(lastChart); - } else if (r instanceof LinkedDataRecord) { - LinkedDataRecord linkedDataRecord = (LinkedDataRecord) r; - if (lastSeries != null) { - lastSeries.insertData(linkedDataRecord); - } - } - - if (lastChart == null) { - continue; - } - - if (r instanceof LegendRecord) { - lastChart.legendRecord = (LegendRecord)r; - } else if(r instanceof SeriesRecord) { - HSSFSeries series = new HSSFSeries( (SeriesRecord)r ); - lastChart.series.add(series); - lastSeries = series; - } else if(r instanceof ChartTitleFormatRecord) { - lastChart.chartTitleFormat = (ChartTitleFormatRecord)r; - } else if(r instanceof SeriesTextRecord) { - // Applies to a series, unless we've seen a legend already - SeriesTextRecord str = (SeriesTextRecord)r; - if(lastChart.legendRecord == null && lastChart.series.size() > 0) { - HSSFSeries series = lastChart.series.get(lastChart.series.size()-1); - series.seriesTitleText = str; - } else { - lastChart.chartTitleText = str; - } - } else if(r instanceof ValueRangeRecord){ - lastChart.valueRanges.add((ValueRangeRecord)r); - } else if (r instanceof Record) { - Record record = (Record) r; - for (HSSFChartType type : HSSFChartType.values()) { - if (type == HSSFChartType.Unknown) { - continue; - } - if (record.getSid() == type.getSid()) { - lastChart.type = type; - break; - } - } - } - } - - return charts.toArray( new HSSFChart[charts.size()] ); - } - - /** Get the X offset of the chart */ - public int getChartX() { return chartRecord.getX(); } - /** Get the Y offset of the chart */ - public int getChartY() { return chartRecord.getY(); } - /** Get the width of the chart. {@link ChartRecord} */ - public int getChartWidth() { return chartRecord.getWidth(); } - /** Get the height of the chart. {@link ChartRecord} */ - public int getChartHeight() { return chartRecord.getHeight(); } - - /** Sets the X offset of the chart */ - public void setChartX(int x) { chartRecord.setX(x); } - /** Sets the Y offset of the chart */ - public void setChartY(int y) { chartRecord.setY(y); } - /** Sets the width of the chart. {@link ChartRecord} */ - public void setChartWidth(int width) { chartRecord.setWidth(width); } - /** Sets the height of the chart. {@link ChartRecord} */ - public void setChartHeight(int height) { chartRecord.setHeight(height); } - - /** - * Returns the series of the chart - */ - public HSSFSeries[] getSeries() { - return series.toArray(new HSSFSeries[series.size()]); - } - - /** - * Returns the chart's title, if there is one, - * or null if not - */ - public String getChartTitle() { - if(chartTitleText != null) { - return chartTitleText.getText(); - } - return null; - } - - /** - * Changes the chart's title, but only if there - * was one already. - * TODO - add in the records if not - */ - public void setChartTitle(String title) { - if(chartTitleText != null) { - chartTitleText.setText(title); - } else { - throw new IllegalStateException("No chart title found to change"); - } - } - - /** - * Set value range (basic Axis Options) - * @param axisIndex 0 - primary axis, 1 - secondary axis - * @param minimum minimum value; Double.NaN - automatic; null - no change - * @param maximum maximum value; Double.NaN - automatic; null - no change - * @param majorUnit major unit value; Double.NaN - automatic; null - no change - * @param minorUnit minor unit value; Double.NaN - automatic; null - no change - */ - public void setValueRange( int axisIndex, Double minimum, Double maximum, Double majorUnit, Double minorUnit){ - ValueRangeRecord valueRange = valueRanges.get( axisIndex ); - if( valueRange == null ) return; - if( minimum != null ){ - valueRange.setAutomaticMinimum(minimum.isNaN()); - valueRange.setMinimumAxisValue(minimum); - } - if( maximum != null ){ - valueRange.setAutomaticMaximum(maximum.isNaN()); - valueRange.setMaximumAxisValue(maximum); - } - if( majorUnit != null ){ - valueRange.setAutomaticMajor(majorUnit.isNaN()); - valueRange.setMajorIncrement(majorUnit); - } - if( minorUnit != null ){ - valueRange.setAutomaticMinor(minorUnit.isNaN()); - valueRange.setMinorIncrement(minorUnit); - } - } - - private SeriesIndexRecord createSeriesIndexRecord( int index ) - { - SeriesIndexRecord r = new SeriesIndexRecord(); - r.setIndex((short)index); - return r; - } - - private DimensionsRecord createDimensionsRecord() - { - DimensionsRecord r = new DimensionsRecord(); - r.setFirstRow(0); - r.setLastRow(31); - r.setFirstCol((short)0); - r.setLastCol((short)1); - return r; - } - - private HCenterRecord createHCenterRecord() - { - HCenterRecord r = new HCenterRecord(); - r.setHCenter(false); - return r; - } - - private VCenterRecord createVCenterRecord() - { - VCenterRecord r = new VCenterRecord(); - r.setVCenter(false); - return r; - } - - private PrintSetupRecord createPrintSetupRecord() - { - PrintSetupRecord r = new PrintSetupRecord(); - r.setPaperSize((short)0); - r.setScale((short)18); - r.setPageStart((short)1); - r.setFitWidth((short)1); - r.setFitHeight((short)1); - r.setLeftToRight(false); - r.setLandscape(false); - r.setValidSettings(true); - r.setNoColor(false); - r.setDraft(false); - r.setNotes(false); - r.setNoOrientation(false); - r.setUsePage(false); - r.setHResolution((short)0); - r.setVResolution((short)0); - r.setHeaderMargin(0.5); - r.setFooterMargin(0.5); - r.setCopies((short)15); // what the ?? - return r; - } - - private FontBasisRecord createFontBasisRecord1() - { - FontBasisRecord r = new FontBasisRecord(); - r.setXBasis((short)9120); - r.setYBasis((short)5640); - r.setHeightBasis((short)200); - r.setScale((short)0); - r.setIndexToFontTable((short)5); - return r; - } - - private FontBasisRecord createFontBasisRecord2() - { - FontBasisRecord r = createFontBasisRecord1(); - r.setIndexToFontTable((short)6); - return r; - } - - private BOFRecord createBOFRecord() - { - BOFRecord r = new BOFRecord(); - r.setVersion((short)600); - r.setType((short)20); - r.setBuild((short)0x1CFE); - r.setBuildYear((short)1997); - r.setHistoryBitMask(0x40C9); - r.setRequiredVersion(106); - return r; - } - - private UnknownRecord createOBJRecord() - { - byte[] data = { - (byte) 0x15, (byte) 0x00, (byte) 0x12, (byte) 0x00, (byte) 0x05, (byte) 0x00, (byte) 0x02, (byte) 0x00, (byte) 0x11, (byte) 0x60, (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0xB8, (byte) 0x03, - (byte) 0x87, (byte) 0x03, (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x00, - }; - - return new UnknownRecord( (short) 0x005D, data ); - } - - private UnknownRecord createMSDrawingObjectRecord() - { - // Since we haven't created this object yet we'll just put in the raw - // form for the moment. - - byte[] data = { - (byte)0x0F, (byte)0x00, (byte)0x02, (byte)0xF0, (byte)0xC0, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x10, (byte)0x00, (byte)0x08, (byte)0xF0, (byte)0x08, (byte)0x00, (byte)0x00, (byte)0x00, - (byte)0x02, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x02, (byte)0x04, (byte)0x00, (byte)0x00, (byte)0x0F, (byte)0x00, (byte)0x03, (byte)0xF0, (byte)0xA8, (byte)0x00, (byte)0x00, (byte)0x00, - (byte)0x0F, (byte)0x00, (byte)0x04, (byte)0xF0, (byte)0x28, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x01, (byte)0x00, (byte)0x09, (byte)0xF0, (byte)0x10, (byte)0x00, (byte)0x00, (byte)0x00, - (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, - (byte)0x02, (byte)0x00, (byte)0x0A, (byte)0xF0, (byte)0x08, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x04, (byte)0x00, (byte)0x00, (byte)0x05, (byte)0x00, (byte)0x00, (byte)0x00, - (byte)0x0F, (byte)0x00, (byte)0x04, (byte)0xF0, (byte)0x70, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x92, (byte)0x0C, (byte)0x0A, (byte)0xF0, (byte)0x08, (byte)0x00, (byte)0x00, (byte)0x00, - (byte)0x02, (byte)0x04, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x0A, (byte)0x00, (byte)0x00, (byte)0x93, (byte)0x00, (byte)0x0B, (byte)0xF0, (byte)0x36, (byte)0x00, (byte)0x00, (byte)0x00, - (byte)0x7F, (byte)0x00, (byte)0x04, (byte)0x01, (byte)0x04, (byte)0x01, (byte)0xBF, (byte)0x00, (byte)0x08, (byte)0x00, (byte)0x08, (byte)0x00, (byte)0x81, (byte)0x01, (byte)0x4E, (byte)0x00, - (byte)0x00, (byte)0x08, (byte)0x83, (byte)0x01, (byte)0x4D, (byte)0x00, (byte)0x00, (byte)0x08, (byte)0xBF, (byte)0x01, (byte)0x10, (byte)0x00, (byte)0x11, (byte)0x00, (byte)0xC0, (byte)0x01, - (byte)0x4D, (byte)0x00, (byte)0x00, (byte)0x08, (byte)0xFF, (byte)0x01, (byte)0x08, (byte)0x00, (byte)0x08, (byte)0x00, (byte)0x3F, (byte)0x02, (byte)0x00, (byte)0x00, (byte)0x02, (byte)0x00, - (byte)0xBF, (byte)0x03, (byte)0x00, (byte)0x00, (byte)0x08, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x10, (byte)0xF0, (byte)0x12, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, - (byte)0x04, (byte)0x00, (byte)0xC0, (byte)0x02, (byte)0x0A, (byte)0x00, (byte)0xF4, (byte)0x00, (byte)0x0E, (byte)0x00, (byte)0x66, (byte)0x01, (byte)0x20, (byte)0x00, (byte)0xE9, (byte)0x00, - (byte)0x00, (byte)0x00, (byte)0x11, (byte)0xF0, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00 - }; - - return new UnknownRecord((short)0x00EC, data); - } - - private void createAxisRecords( List records ) - { - records.add( createAxisParentRecord() ); - records.add( createBeginRecord() ); - records.add( createAxisRecord( AxisRecord.AXIS_TYPE_CATEGORY_OR_X_AXIS ) ); - records.add( createBeginRecord() ); - records.add( createCategorySeriesAxisRecord() ); - records.add( createAxisOptionsRecord() ); - records.add( createTickRecord1() ); - records.add( createEndRecord() ); - records.add( createAxisRecord( AxisRecord.AXIS_TYPE_VALUE_AXIS ) ); - records.add( createBeginRecord() ); - records.add( createValueRangeRecord() ); - records.add( createTickRecord2() ); - records.add( createAxisLineFormatRecord( AxisLineFormatRecord.AXIS_TYPE_MAJOR_GRID_LINE ) ); - records.add( createLineFormatRecord(false) ); - records.add( createEndRecord() ); - records.add( createPlotAreaRecord() ); - records.add( createFrameRecord2() ); - records.add( createBeginRecord() ); - records.add( createLineFormatRecord2() ); - records.add( createAreaFormatRecord2() ); - records.add( createEndRecord() ); - records.add( createChartFormatRecord() ); - records.add( createBeginRecord() ); - records.add( createBarRecord() ); - // unknown 1022 - records.add( createLegendRecord() ); - records.add( createBeginRecord() ); - // unknown 104f - records.add( createTextRecord() ); - records.add( createBeginRecord() ); - // unknown 104f - records.add( createLinkedDataRecord() ); - records.add( createEndRecord() ); - records.add( createEndRecord() ); - records.add( createEndRecord() ); - records.add( createEndRecord() ); - } - - private LinkedDataRecord createLinkedDataRecord() - { - LinkedDataRecord r = new LinkedDataRecord(); - r.setLinkType(LinkedDataRecord.LINK_TYPE_TITLE_OR_TEXT); - r.setReferenceType(LinkedDataRecord.REFERENCE_TYPE_DIRECT); - r.setCustomNumberFormat(false); - r.setIndexNumberFmtRecord((short)0); - r.setFormulaOfLink(null); - return r; - } - - private TextRecord createTextRecord() - { - TextRecord r = new TextRecord(); - r.setHorizontalAlignment(TextRecord.HORIZONTAL_ALIGNMENT_CENTER); - r.setVerticalAlignment(TextRecord.VERTICAL_ALIGNMENT_CENTER); - r.setDisplayMode((short)1); - r.setRgbColor(0x00000000); - r.setX(-37); - r.setY(-60); - r.setWidth(0); - r.setHeight(0); - r.setAutoColor(true); - r.setShowKey(false); - r.setShowValue(false); - r.setVertical(false); - r.setAutoGeneratedText(true); - r.setGenerated(true); - r.setAutoLabelDeleted(false); - r.setAutoBackground(true); - r.setRotation((short)0); - r.setShowCategoryLabelAsPercentage(false); - r.setShowValueAsPercentage(false); - r.setShowBubbleSizes(false); - r.setShowLabel(false); - r.setIndexOfColorValue((short)77); - r.setDataLabelPlacement((short)0); - r.setTextRotation((short)0); - return r; - } - - private LegendRecord createLegendRecord() - { - LegendRecord r = new LegendRecord(); - r.setXAxisUpperLeft(3542); - r.setYAxisUpperLeft(1566); - r.setXSize(437); - r.setYSize(213); - r.setType(LegendRecord.TYPE_RIGHT); - r.setSpacing(LegendRecord.SPACING_MEDIUM); - r.setAutoPosition(true); - r.setAutoSeries(true); - r.setAutoXPositioning(true); - r.setAutoYPositioning(true); - r.setVertical(true); - r.setDataTable(false); - return r; - } - - private BarRecord createBarRecord() - { - BarRecord r = new BarRecord(); - r.setBarSpace((short)0); - r.setCategorySpace((short)150); - r.setHorizontal(false); - r.setStacked(false); - r.setDisplayAsPercentage(false); - r.setShadow(false); - return r; - } - - private ChartFormatRecord createChartFormatRecord() - { - ChartFormatRecord r = new ChartFormatRecord(); - r.setXPosition(0); - r.setYPosition(0); - r.setWidth(0); - r.setHeight(0); - r.setVaryDisplayPattern(false); - return r; - } - - private PlotAreaRecord createPlotAreaRecord() - { - return new PlotAreaRecord( ); - } - - private AxisLineFormatRecord createAxisLineFormatRecord( short format ) - { - AxisLineFormatRecord r = new AxisLineFormatRecord(); - r.setAxisType( format ); - return r; - } - - private ValueRangeRecord createValueRangeRecord() - { - ValueRangeRecord r = new ValueRangeRecord(); - r.setMinimumAxisValue( 0.0 ); - r.setMaximumAxisValue( 0.0 ); - r.setMajorIncrement( 0 ); - r.setMinorIncrement( 0 ); - r.setCategoryAxisCross( 0 ); - r.setAutomaticMinimum( true ); - r.setAutomaticMaximum( true ); - r.setAutomaticMajor( true ); - r.setAutomaticMinor( true ); - r.setAutomaticCategoryCrossing( true ); - r.setLogarithmicScale( false ); - r.setValuesInReverse( false ); - r.setCrossCategoryAxisAtMaximum( false ); - r.setReserved( true ); // what's this do?? - return r; - } - - private TickRecord createTickRecord1() - { - TickRecord r = new TickRecord(); - r.setMajorTickType( (byte) 2 ); - r.setMinorTickType( (byte) 0 ); - r.setLabelPosition( (byte) 3 ); - r.setBackground( (byte) 1 ); - r.setLabelColorRgb( 0 ); - r.setZero1( (short) 0 ); - r.setZero2( (short) 0 ); - r.setZero3( (short) 45 ); - r.setAutorotate( true ); - r.setAutoTextBackground( true ); - r.setRotation( (short) 0 ); - r.setAutorotate( true ); - r.setTickColor( (short) 77 ); - return r; - } - - private TickRecord createTickRecord2() - { - TickRecord r = createTickRecord1(); - r.setZero3((short)0); - return r; - } - - private AxisOptionsRecord createAxisOptionsRecord() - { - AxisOptionsRecord r = new AxisOptionsRecord(); - r.setMinimumCategory( (short) -28644 ); - r.setMaximumCategory( (short) -28715 ); - r.setMajorUnitValue( (short) 2 ); - r.setMajorUnit( (short) 0 ); - r.setMinorUnitValue( (short) 1 ); - r.setMinorUnit( (short) 0 ); - r.setBaseUnit( (short) 0 ); - r.setCrossingPoint( (short) -28644 ); - r.setDefaultMinimum( true ); - r.setDefaultMaximum( true ); - r.setDefaultMajor( true ); - r.setDefaultMinorUnit( true ); - r.setIsDate( true ); - r.setDefaultBase( true ); - r.setDefaultCross( true ); - r.setDefaultDateSettings( true ); - return r; - } - - private CategorySeriesAxisRecord createCategorySeriesAxisRecord() - { - CategorySeriesAxisRecord r = new CategorySeriesAxisRecord(); - r.setCrossingPoint( (short) 1 ); - r.setLabelFrequency( (short) 1 ); - r.setTickMarkFrequency( (short) 1 ); - r.setValueAxisCrossing( true ); - r.setCrossesFarRight( false ); - r.setReversed( false ); - return r; - } - - private AxisRecord createAxisRecord( short axisType ) - { - AxisRecord r = new AxisRecord(); - r.setAxisType( axisType ); - return r; - } - - private AxisParentRecord createAxisParentRecord() - { - AxisParentRecord r = new AxisParentRecord(); - r.setAxisType( AxisParentRecord.AXIS_TYPE_MAIN ); - r.setX( 479 ); - r.setY( 221 ); - r.setWidth( 2995 ); - r.setHeight( 2902 ); - return r; - } - - private AxisUsedRecord createAxisUsedRecord( short numAxis ) - { - AxisUsedRecord r = new AxisUsedRecord(); - r.setNumAxis( numAxis ); - return r; - } - - private LinkedDataRecord createDirectLinkRecord() - { - LinkedDataRecord r = new LinkedDataRecord(); - r.setLinkType( LinkedDataRecord.LINK_TYPE_TITLE_OR_TEXT ); - r.setReferenceType( LinkedDataRecord.REFERENCE_TYPE_DIRECT ); - r.setCustomNumberFormat( false ); - r.setIndexNumberFmtRecord( (short) 0 ); - r.setFormulaOfLink(null); - return r; - } - - private FontIndexRecord createFontIndexRecord( int index ) - { - FontIndexRecord r = new FontIndexRecord(); - r.setFontIndex( (short) index ); - return r; - } - - private TextRecord createAllTextRecord() - { - TextRecord r = new TextRecord(); - r.setHorizontalAlignment( TextRecord.HORIZONTAL_ALIGNMENT_CENTER ); - r.setVerticalAlignment( TextRecord.VERTICAL_ALIGNMENT_CENTER ); - r.setDisplayMode( TextRecord.DISPLAY_MODE_TRANSPARENT ); - r.setRgbColor( 0 ); - r.setX( -37 ); - r.setY( -60 ); - r.setWidth( 0 ); - r.setHeight( 0 ); - r.setAutoColor( true ); - r.setShowKey( false ); - r.setShowValue( true ); - r.setVertical( false ); - r.setAutoGeneratedText( true ); - r.setGenerated( true ); - r.setAutoLabelDeleted( false ); - r.setAutoBackground( true ); - r.setRotation( (short) 0 ); - r.setShowCategoryLabelAsPercentage( false ); - r.setShowValueAsPercentage( false ); - r.setShowBubbleSizes( false ); - r.setShowLabel( false ); - r.setIndexOfColorValue( (short) 77 ); - r.setDataLabelPlacement( (short) 0 ); - r.setTextRotation( (short) 0 ); - return r; - } - - private TextRecord createUnknownTextRecord() - { - TextRecord r = new TextRecord(); - r.setHorizontalAlignment( TextRecord.HORIZONTAL_ALIGNMENT_CENTER ); - r.setVerticalAlignment( TextRecord.VERTICAL_ALIGNMENT_CENTER ); - r.setDisplayMode( TextRecord.DISPLAY_MODE_TRANSPARENT ); - r.setRgbColor( 0 ); - r.setX( -37 ); - r.setY( -60 ); - r.setWidth( 0 ); - r.setHeight( 0 ); - r.setAutoColor( true ); - r.setShowKey( false ); - r.setShowValue( false ); - r.setVertical( false ); - r.setAutoGeneratedText( true ); - r.setGenerated( true ); - r.setAutoLabelDeleted( false ); - r.setAutoBackground( true ); - r.setRotation( (short) 0 ); - r.setShowCategoryLabelAsPercentage( false ); - r.setShowValueAsPercentage( false ); - r.setShowBubbleSizes( false ); - r.setShowLabel( false ); - r.setIndexOfColorValue( (short) 77 ); - r.setDataLabelPlacement( (short) 11088 ); - r.setTextRotation( (short) 0 ); - return r; - } - - private DefaultDataLabelTextPropertiesRecord createDefaultTextRecord( short categoryDataType ) - { - DefaultDataLabelTextPropertiesRecord r = new DefaultDataLabelTextPropertiesRecord(); - r.setCategoryDataType( categoryDataType ); - return r; - } - - private SheetPropertiesRecord createSheetPropsRecord() - { - SheetPropertiesRecord r = new SheetPropertiesRecord(); - r.setChartTypeManuallyFormatted( false ); - r.setPlotVisibleOnly( true ); - r.setDoNotSizeWithWindow( false ); - r.setDefaultPlotDimensions( true ); - r.setAutoPlotArea( false ); - return r; - } - - private SeriesToChartGroupRecord createSeriesToChartGroupRecord() - { - return new SeriesToChartGroupRecord(); - } - - private DataFormatRecord createDataFormatRecord() - { - DataFormatRecord r = new DataFormatRecord(); - r.setPointNumber( (short) -1 ); - r.setSeriesIndex( (short) 0 ); - r.setSeriesNumber( (short) 0 ); - r.setUseExcel4Colors( false ); - return r; - } - - private LinkedDataRecord createCategoriesLinkedDataRecord() - { - LinkedDataRecord r = new LinkedDataRecord(); - r.setLinkType( LinkedDataRecord.LINK_TYPE_CATEGORIES ); - r.setReferenceType( LinkedDataRecord.REFERENCE_TYPE_WORKSHEET ); - r.setCustomNumberFormat( false ); - r.setIndexNumberFmtRecord( (short) 0 ); - Area3DPtg p = new Area3DPtg(0, 31, 1, 1, - false, false, false, false, 0); - r.setFormulaOfLink(new Ptg[] { p, }); - return r; - } - - private LinkedDataRecord createValuesLinkedDataRecord() - { - LinkedDataRecord r = new LinkedDataRecord(); - r.setLinkType( LinkedDataRecord.LINK_TYPE_VALUES ); - r.setReferenceType( LinkedDataRecord.REFERENCE_TYPE_WORKSHEET ); - r.setCustomNumberFormat( false ); - r.setIndexNumberFmtRecord( (short) 0 ); - Area3DPtg p = new Area3DPtg(0, 31, 0, 0, - false, false, false, false, 0); - r.setFormulaOfLink(new Ptg[] { p, }); - return r; - } - - private LinkedDataRecord createTitleLinkedDataRecord() - { - LinkedDataRecord r = new LinkedDataRecord(); - r.setLinkType( LinkedDataRecord.LINK_TYPE_TITLE_OR_TEXT ); - r.setReferenceType( LinkedDataRecord.REFERENCE_TYPE_DIRECT ); - r.setCustomNumberFormat( false ); - r.setIndexNumberFmtRecord( (short) 0 ); - r.setFormulaOfLink(null); - return r; - } - - private SeriesRecord createSeriesRecord() - { - SeriesRecord r = new SeriesRecord(); - r.setCategoryDataType( SeriesRecord.CATEGORY_DATA_TYPE_NUMERIC ); - r.setValuesDataType( SeriesRecord.VALUES_DATA_TYPE_NUMERIC ); - r.setNumCategories( (short) 32 ); - r.setNumValues( (short) 31 ); - r.setBubbleSeriesType( SeriesRecord.BUBBLE_SERIES_TYPE_NUMERIC ); - r.setNumBubbleValues( (short) 0 ); - return r; - } - - private EndRecord createEndRecord() - { - return new EndRecord(); - } - - private AreaFormatRecord createAreaFormatRecord1() - { - AreaFormatRecord r = new AreaFormatRecord(); - r.setForegroundColor( 16777215 ); // RGB Color - r.setBackgroundColor( 0 ); // RGB Color - r.setPattern( (short) 1 ); // TODO: Add Pattern constants to record - r.setAutomatic( true ); - r.setInvert( false ); - r.setForecolorIndex( (short) 78 ); - r.setBackcolorIndex( (short) 77 ); - return r; - } - - private AreaFormatRecord createAreaFormatRecord2() - { - AreaFormatRecord r = new AreaFormatRecord(); - r.setForegroundColor(0x00c0c0c0); - r.setBackgroundColor(0x00000000); - r.setPattern((short)1); - r.setAutomatic(false); - r.setInvert(false); - r.setForecolorIndex((short)22); - r.setBackcolorIndex((short)79); - return r; - } - - private LineFormatRecord createLineFormatRecord( boolean drawTicks ) - { - LineFormatRecord r = new LineFormatRecord(); - r.setLineColor( 0 ); - r.setLinePattern( LineFormatRecord.LINE_PATTERN_SOLID ); - r.setWeight( (short) -1 ); - r.setAuto( true ); - r.setDrawTicks( drawTicks ); - r.setColourPaletteIndex( (short) 77 ); // what colour is this? - return r; - } - - private LineFormatRecord createLineFormatRecord2() - { - LineFormatRecord r = new LineFormatRecord(); - r.setLineColor( 0x00808080 ); - r.setLinePattern( (short) 0 ); - r.setWeight( (short) 0 ); - r.setAuto( false ); - r.setDrawTicks( false ); - r.setUnknown( false ); - r.setColourPaletteIndex( (short) 23 ); - return r; - } - - private FrameRecord createFrameRecord1() - { - FrameRecord r = new FrameRecord(); - r.setBorderType( FrameRecord.BORDER_TYPE_REGULAR ); - r.setAutoSize( false ); - r.setAutoPosition( true ); - return r; - } - - private FrameRecord createFrameRecord2() - { - FrameRecord r = new FrameRecord(); - r.setBorderType( FrameRecord.BORDER_TYPE_REGULAR ); - r.setAutoSize( true ); - r.setAutoPosition( true ); - return r; - } - - private PlotGrowthRecord createPlotGrowthRecord( int horizScale, int vertScale ) - { - PlotGrowthRecord r = new PlotGrowthRecord(); - r.setHorizontalScale( horizScale ); - r.setVerticalScale( vertScale ); - return r; - } - - private SCLRecord createSCLRecord( short numerator, short denominator ) - { - SCLRecord r = new SCLRecord(); - r.setDenominator( denominator ); - r.setNumerator( numerator ); - return r; - } - - private BeginRecord createBeginRecord() - { - return new BeginRecord(); - } - - private ChartRecord createChartRecord( int x, int y, int width, int height ) - { - ChartRecord r = new ChartRecord(); - r.setX( x ); - r.setY( y ); - r.setWidth( width ); - r.setHeight( height ); - return r; - } - - private UnitsRecord createUnitsRecord() - { - UnitsRecord r = new UnitsRecord(); - r.setUnits( (short) 0 ); - return r; - } - - - /** - * A series in a chart - */ - public static class HSSFSeries { - private SeriesRecord series; - private SeriesTextRecord seriesTitleText; - private LinkedDataRecord dataName; - private LinkedDataRecord dataValues; - private LinkedDataRecord dataCategoryLabels; - private LinkedDataRecord dataSecondaryCategoryLabels; - - /* package */ HSSFSeries(SeriesRecord series) { - this.series = series; - } - - /* package */ void insertData(LinkedDataRecord data){ - switch(data.getLinkType()){ - - case LinkedDataRecord.LINK_TYPE_TITLE_OR_TEXT: - dataName = data; - break; - case LinkedDataRecord.LINK_TYPE_VALUES: - dataValues = data; - break; - case LinkedDataRecord.LINK_TYPE_CATEGORIES: - dataCategoryLabels = data; - break; - case LinkedDataRecord.LINK_TYPE_SECONDARY_CATEGORIES: - dataSecondaryCategoryLabels = data; - break; - default: - throw new IllegalStateException("Invalid link type: " + data.getLinkType()); - } - } - - /* package */ void setSeriesTitleText(SeriesTextRecord seriesTitleText) - { - this.seriesTitleText = seriesTitleText; - } - - public short getNumValues() { - return series.getNumValues(); - } - /** - * See {@link SeriesRecord} - */ - public short getValueType() { - return series.getValuesDataType(); - } - - /** - * Returns the series' title, if there is one, - * or null if not - */ - public String getSeriesTitle() { - if(seriesTitleText != null) { - return seriesTitleText.getText(); - } - return null; - } - - /** - * Changes the series' title, but only if there - * was one already. - * TODO - add in the records if not - */ - public void setSeriesTitle(String title) { - if(seriesTitleText != null) { - seriesTitleText.setText(title); - } else { - throw new IllegalStateException("No series title found to change"); - } - } - - /** - * @return record with data names - */ - public LinkedDataRecord getDataName(){ - return dataName; - } - - /** - * @return record with data values - */ - public LinkedDataRecord getDataValues(){ - return dataValues; - } - - /** - * @return record with data category labels - */ - public LinkedDataRecord getDataCategoryLabels(){ - return dataCategoryLabels; - } - - /** - * @return record with data secondary category labels - */ - public LinkedDataRecord getDataSecondaryCategoryLabels() { - return dataSecondaryCategoryLabels; - } - - /** - * @return record with series - */ - public SeriesRecord getSeries() { - return series; - } - - private CellRangeAddressBase getCellRange(LinkedDataRecord linkedDataRecord) { - if (linkedDataRecord == null) - { - return null ; - } - - int firstRow = 0; - int lastRow = 0; - int firstCol = 0; - int lastCol = 0; - - for (Ptg ptg : linkedDataRecord.getFormulaOfLink()) { - if (ptg instanceof AreaPtgBase) { - AreaPtgBase areaPtg = (AreaPtgBase) ptg; - - firstRow = areaPtg.getFirstRow(); - lastRow = areaPtg.getLastRow(); - - firstCol = areaPtg.getFirstColumn(); - lastCol = areaPtg.getLastColumn(); - } - } - - return new CellRangeAddress(firstRow, lastRow, firstCol, lastCol); - } - - public CellRangeAddressBase getValuesCellRange() { - return getCellRange(dataValues); - } - - public CellRangeAddressBase getCategoryLabelsCellRange() { - return getCellRange(dataCategoryLabels); - } - - private Integer setVerticalCellRange(LinkedDataRecord linkedDataRecord, - CellRangeAddressBase range) { - if (linkedDataRecord == null) - { - return null; - } - - List ptgList = new ArrayList<>(); - - int rowCount = (range.getLastRow() - range.getFirstRow()) + 1; - int colCount = (range.getLastColumn() - range.getFirstColumn()) + 1; - - for (Ptg ptg : linkedDataRecord.getFormulaOfLink()) { - if (ptg instanceof AreaPtgBase) { - AreaPtgBase areaPtg = (AreaPtgBase) ptg; - - areaPtg.setFirstRow(range.getFirstRow()); - areaPtg.setLastRow(range.getLastRow()); - - areaPtg.setFirstColumn(range.getFirstColumn()); - areaPtg.setLastColumn(range.getLastColumn()); - ptgList.add(areaPtg); - } - } - - linkedDataRecord.setFormulaOfLink(ptgList.toArray(new Ptg[ptgList.size()])); - - return rowCount * colCount; - } - - public void setValuesCellRange(CellRangeAddressBase range) { - Integer count = setVerticalCellRange(dataValues, range); - if (count == null) - { - return; - } - - series.setNumValues((short)(int)count); - } - - public void setCategoryLabelsCellRange(CellRangeAddressBase range) { - Integer count = setVerticalCellRange(dataCategoryLabels, range); - if (count == null) - { - return; - } - - series.setNumCategories((short)(int)count); - } - } - - public HSSFSeries createSeries() throws Exception { - ArrayList seriesTemplate = new ArrayList<>(); - boolean seriesTemplateFilled = false; - - int idx = 0; - int deep = 0; - int chartRecordIdx = -1; - int chartDeep = -1; - int lastSeriesDeep = -1; - int endSeriesRecordIdx = -1; - int seriesIdx = 0; - final List records = sheet.getSheet().getRecords(); - - /* store first series as template and find last series index */ - for(final RecordBase record : records) { - - idx++; - - if (record instanceof BeginRecord) { - deep++; - } else if (record instanceof EndRecord) { - deep--; - - if (lastSeriesDeep == deep) { - lastSeriesDeep = -1; - endSeriesRecordIdx = idx; - if (!seriesTemplateFilled) { - seriesTemplate.add(record); - seriesTemplateFilled = true; - } - } - - if (chartDeep == deep) { - break; - } - } - - if (record instanceof ChartRecord) { - if (record == chartRecord) { - chartRecordIdx = idx; - chartDeep = deep; - } - } else if (record instanceof SeriesRecord) { - if (chartRecordIdx != -1) { - seriesIdx++; - lastSeriesDeep = deep; - } - } - - if (lastSeriesDeep != -1 && !seriesTemplateFilled) { - seriesTemplate.add(record) ; - } - } - - /* check if a series was found */ - if (endSeriesRecordIdx == -1) { - return null; - } - - /* next index in the records list where the new series can be inserted */ - idx = endSeriesRecordIdx + 1; - - HSSFSeries newSeries = null; - - /* duplicate record of the template series */ - ArrayList clonedRecords = new ArrayList<>(); - for(final RecordBase record : seriesTemplate) { - - Record newRecord = null; - - if (record instanceof BeginRecord) { - newRecord = new BeginRecord(); - } else if (record instanceof EndRecord) { - newRecord = new EndRecord(); - } else if (record instanceof SeriesRecord) { - SeriesRecord seriesRecord = (SeriesRecord) ((SeriesRecord)record).clone(); - newSeries = new HSSFSeries(seriesRecord); - newRecord = seriesRecord; - } else if (record instanceof LinkedDataRecord) { - LinkedDataRecord linkedDataRecord = ((LinkedDataRecord)record).clone(); - if (newSeries != null) { - newSeries.insertData(linkedDataRecord); - } - newRecord = linkedDataRecord; - } else if (record instanceof DataFormatRecord) { - DataFormatRecord dataFormatRecord = ((DataFormatRecord)record).clone(); - - dataFormatRecord.setSeriesIndex((short)seriesIdx) ; - dataFormatRecord.setSeriesNumber((short)seriesIdx) ; - - newRecord = dataFormatRecord; - } else if (record instanceof SeriesTextRecord) { - SeriesTextRecord seriesTextRecord = (SeriesTextRecord) ((SeriesTextRecord)record).clone(); - if (newSeries != null) { - newSeries.setSeriesTitleText(seriesTextRecord); - } - newRecord = seriesTextRecord; - } else if (record instanceof Record) { - newRecord = (Record) ((Record)record).clone(); - } - - if (newRecord != null) - { - clonedRecords.add(newRecord); - } - } - - /* check if a user model series object was created */ - if (newSeries == null) - { - return null; - } - - /* transfer series to record list */ - for(final RecordBase record : clonedRecords) { - records.add(idx++, record); - } - - return newSeries; - } - - public boolean removeSeries(HSSFSeries remSeries) { - int deep = 0; - int chartDeep = -1; - int lastSeriesDeep = -1; - int seriesIdx = -1; - boolean removeSeries = false; - boolean chartEntered = false; - boolean result = false; - final List records = sheet.getSheet().getRecords(); - - /* store first series as template and find last series index */ - Iterator iter = records.iterator(); - while (iter.hasNext()) { - RecordBase record = iter.next(); - - if (record instanceof BeginRecord) { - deep++; - } else if (record instanceof EndRecord) { - deep--; - - if (lastSeriesDeep == deep) { - lastSeriesDeep = -1; - - if (removeSeries) { - removeSeries = false; - result = true; - iter.remove(); - } - } - - if (chartDeep == deep) { - break; - } - } - - if (record instanceof ChartRecord) { - if (record == chartRecord) { - chartDeep = deep; - chartEntered = true; - } - } else if (record instanceof SeriesRecord) { - if (chartEntered) { - if (remSeries.series == record) { - lastSeriesDeep = deep; - removeSeries = true; - } else { - seriesIdx++; - } - } - } else if (record instanceof DataFormatRecord) { - if (chartEntered && !removeSeries) { - DataFormatRecord dataFormatRecord = (DataFormatRecord) record; - dataFormatRecord.setSeriesIndex((short) seriesIdx); - dataFormatRecord.setSeriesNumber((short) seriesIdx); - } - } - - if (removeSeries) { - iter.remove(); - } - } - - return result; - } - - public HSSFChartType getType() { - return type; - } -} diff --git a/src/testcases/org/apache/poi/dev/RecordGenerator.java b/src/testcases/org/apache/poi/dev/RecordGenerator.java new file mode 100644 index 0000000000..585003c526 --- /dev/null +++ b/src/testcases/org/apache/poi/dev/RecordGenerator.java @@ -0,0 +1,160 @@ + +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.dev; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.Locale; +import java.util.Properties; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.transform.OutputKeys; +import javax.xml.transform.Result; +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerException; +import javax.xml.transform.TransformerFactory; +import javax.xml.transform.stream.StreamResult; +import javax.xml.transform.stream.StreamSource; + +import org.apache.poi.util.XMLHelper; +import org.w3c.dom.Document; +import org.w3c.dom.Element; + +/** + * Description of the Class + * + *@author andy + *@since May 10, 2002 + */ +public class RecordGenerator { + /** + * The main program for the RecordGenerator class + * + *@param args The command line arguments + *@exception Exception Description of the Exception + */ + public static void main(String[] args) + throws Exception { + // Force load so that we don't start generating records and realise this hasn't compiled yet. + Class.forName("org.apache.poi.generator.FieldIterator"); + + if (args.length != 4) { + System.out.println("Usage:"); + System.out.println(" java org.apache.poi.hssf.util.RecordGenerator RECORD_DEFINTIONS RECORD_STYLES DEST_SRC_PATH TEST_SRC_PATH"); + } else { + generateRecords(args[0], args[1], args[2], args[3]); + } + } + + + private static void generateRecords(String defintionsDir, String recordStyleDir, String destSrcPathDir, String testSrcPathDir) + throws Exception { + File definitionsFiles[] = new File(defintionsDir).listFiles(); + if (definitionsFiles == null) { + System.err.println(defintionsDir+" is not a directory."); + return; + } + + for (File file : definitionsFiles) { + if (file.isFile() && + (file.getName().endsWith("_record.xml") || + file.getName().endsWith("_type.xml") + ) + ) { + // Get record name and package + DocumentBuilderFactory factory = XMLHelper.getDocumentBuilderFactory(); + DocumentBuilder builder = factory.newDocumentBuilder(); + Document document = builder.parse(file); + Element record = document.getDocumentElement(); + String extendstg = record.getElementsByTagName("extends").item(0).getFirstChild().getNodeValue(); + String suffix = record.getElementsByTagName("suffix").item(0).getFirstChild().getNodeValue(); + String recordName = record.getAttributes().getNamedItem("name").getNodeValue(); + String packageName = record.getAttributes().getNamedItem("package").getNodeValue(); + packageName = packageName.replace('.', '/'); + + // Generate record + String destinationPath = destSrcPathDir + "/" + packageName; + File destinationPathFile = new File(destinationPath); + if(!destinationPathFile.mkdirs()) { + throw new IOException("Could not create directory " + destinationPathFile); + } else { + System.out.println("Created destination directory: " + destinationPath); + } + String destinationFilepath = destinationPath + "/" + recordName + suffix + ".java"; + transform(file, new File(destinationFilepath), + new File(recordStyleDir + "/" + extendstg.toLowerCase(Locale.ROOT) + ".xsl")); + System.out.println("Generated " + suffix + ": " + destinationFilepath); + + // Generate test (if not already generated) + destinationPath = testSrcPathDir + "/" + packageName; + destinationPathFile = new File(destinationPath); + if(!destinationPathFile.mkdirs()) { + throw new IOException("Could not create directory " + destinationPathFile); + } else { + System.out.println("Created destination directory: " + destinationPath); + } + destinationFilepath = destinationPath + "/Test" + recordName + suffix + ".java"; + if (!new File(destinationFilepath).exists()) { + String temp = (recordStyleDir + "/" + extendstg.toLowerCase(Locale.ROOT) + "_test.xsl"); + transform(file, new File(destinationFilepath), new File(temp)); + System.out.println("Generated test: " + destinationFilepath); + } else { + System.out.println("Skipped test generation: " + destinationFilepath); + } + } + } + } + + + + /** + *

Executes an XSL transformation. This process transforms an XML input + * file into a text output file controlled by an XSLT specification.

+ * + * @param in the XML input file + * @param out the text output file + * @param xslt the XSLT specification, i.e. an XSL style sheet + * @throws FileNotFoundException + * @throws TransformerException + */ + private static void transform(final File in, final File out, final File xslt) + throws FileNotFoundException, TransformerException + { + final StreamSource ss = new StreamSource(xslt); + final TransformerFactory tf = TransformerFactory.newInstance(); + final Transformer t; + try + { + t = tf.newTransformer(ss); + } + catch (TransformerException ex) + { + System.err.println("Error compiling XSL style sheet " + xslt); + throw ex; + } + final Properties p = new Properties(); + p.setProperty(OutputKeys.METHOD, "text"); + t.setOutputProperties(p); + final Result result = new StreamResult(out); + t.transform(new StreamSource(in), result); + } + +}