You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

XSSFReader.java 16KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430
  1. /* ====================================================================
  2. Licensed to the Apache Software Foundation (ASF) under one or more
  3. contributor license agreements. See the NOTICE file distributed with
  4. this work for additional information regarding copyright ownership.
  5. The ASF licenses this file to You under the Apache License, Version 2.0
  6. (the "License"); you may not use this file except in compliance with
  7. the License. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. ==================================================================== */
  15. package org.apache.poi.xssf.eventusermodel;
  16. import javax.xml.parsers.ParserConfigurationException;
  17. import java.io.IOException;
  18. import java.io.InputStream;
  19. import java.util.ArrayList;
  20. import java.util.Collections;
  21. import java.util.HashMap;
  22. import java.util.Iterator;
  23. import java.util.LinkedList;
  24. import java.util.List;
  25. import java.util.Locale;
  26. import java.util.Map;
  27. import org.apache.poi.POIXMLException;
  28. import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
  29. import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
  30. import org.apache.poi.openxml4j.opc.OPCPackage;
  31. import org.apache.poi.openxml4j.opc.PackagePart;
  32. import org.apache.poi.openxml4j.opc.PackagePartName;
  33. import org.apache.poi.openxml4j.opc.PackageRelationship;
  34. import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
  35. import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
  36. import org.apache.poi.openxml4j.opc.PackagingURIHelper;
  37. import org.apache.poi.util.POILogFactory;
  38. import org.apache.poi.util.POILogger;
  39. import org.apache.poi.util.SAXHelper;
  40. import org.apache.poi.xssf.model.CommentsTable;
  41. import org.apache.poi.xssf.model.SharedStringsTable;
  42. import org.apache.poi.xssf.model.StylesTable;
  43. import org.apache.poi.xssf.model.ThemesTable;
  44. import org.apache.poi.xssf.usermodel.XSSFDrawing;
  45. import org.apache.poi.xssf.usermodel.XSSFRelation;
  46. import org.apache.poi.xssf.usermodel.XSSFShape;
  47. import org.apache.xmlbeans.XmlException;
  48. import org.xml.sax.Attributes;
  49. import org.xml.sax.InputSource;
  50. import org.xml.sax.SAXException;
  51. import org.xml.sax.XMLReader;
  52. import org.xml.sax.helpers.DefaultHandler;
  53. /**
  54. * This class makes it easy to get at individual parts
  55. * of an OOXML .xlsx file, suitable for low memory sax
  56. * parsing or similar.
  57. * It makes up the core part of the EventUserModel support
  58. * for XSSF.
  59. */
  60. public class XSSFReader {
  61. private static final POILogger LOGGER = POILogFactory.getLogger(XSSFReader.class);
  62. protected OPCPackage pkg;
  63. protected PackagePart workbookPart;
  64. /**
  65. * Creates a new XSSFReader, for the given package
  66. */
  67. public XSSFReader(OPCPackage pkg) throws IOException, OpenXML4JException {
  68. this.pkg = pkg;
  69. PackageRelationship coreDocRelationship = this.pkg.getRelationshipsByType(
  70. PackageRelationshipTypes.CORE_DOCUMENT).getRelationship(0);
  71. // strict OOXML likely not fully supported, see #57699
  72. // this code is similar to POIXMLDocumentPart.getPartFromOPCPackage(), but I could not combine it
  73. // easily due to different return values
  74. if(coreDocRelationship == null) {
  75. if (this.pkg.getRelationshipsByType(
  76. PackageRelationshipTypes.STRICT_CORE_DOCUMENT).getRelationship(0) != null) {
  77. throw new POIXMLException("Strict OOXML isn't currently supported, please see bug #57699");
  78. }
  79. throw new POIXMLException("OOXML file structure broken/invalid - no core document found!");
  80. }
  81. // Get the part that holds the workbook
  82. workbookPart = this.pkg.getPart(coreDocRelationship);
  83. }
  84. /**
  85. * Opens up the Shared Strings Table, parses it, and
  86. * returns a handy object for working with
  87. * shared strings.
  88. */
  89. public SharedStringsTable getSharedStringsTable() throws IOException, InvalidFormatException {
  90. ArrayList<PackagePart> parts = pkg.getPartsByContentType( XSSFRelation.SHARED_STRINGS.getContentType());
  91. return parts.size() == 0 ? null : new SharedStringsTable(parts.get(0));
  92. }
  93. /**
  94. * Opens up the Styles Table, parses it, and
  95. * returns a handy object for working with cell styles
  96. */
  97. public StylesTable getStylesTable() throws IOException, InvalidFormatException {
  98. ArrayList<PackagePart> parts = pkg.getPartsByContentType( XSSFRelation.STYLES.getContentType());
  99. if(parts.size() == 0) return null;
  100. // Create the Styles Table, and associate the Themes if present
  101. StylesTable styles = new StylesTable(parts.get(0));
  102. parts = pkg.getPartsByContentType( XSSFRelation.THEME.getContentType());
  103. if(parts.size() != 0) {
  104. styles.setTheme(new ThemesTable(parts.get(0)));
  105. }
  106. return styles;
  107. }
  108. /**
  109. * Returns an InputStream to read the contents of the
  110. * shared strings table.
  111. */
  112. public InputStream getSharedStringsData() throws IOException, InvalidFormatException {
  113. return XSSFRelation.SHARED_STRINGS.getContents(workbookPart);
  114. }
  115. /**
  116. * Returns an InputStream to read the contents of the
  117. * styles table.
  118. */
  119. public InputStream getStylesData() throws IOException, InvalidFormatException {
  120. return XSSFRelation.STYLES.getContents(workbookPart);
  121. }
  122. /**
  123. * Returns an InputStream to read the contents of the
  124. * themes table.
  125. */
  126. public InputStream getThemesData() throws IOException, InvalidFormatException {
  127. return XSSFRelation.THEME.getContents(workbookPart);
  128. }
  129. /**
  130. * Returns an InputStream to read the contents of the
  131. * main Workbook, which contains key overall data for
  132. * the file, including sheet definitions.
  133. */
  134. public InputStream getWorkbookData() throws IOException, InvalidFormatException {
  135. return workbookPart.getInputStream();
  136. }
  137. /**
  138. * Returns an InputStream to read the contents of the
  139. * specified Sheet.
  140. * @param relId The relationId of the sheet, from a r:id on the workbook
  141. */
  142. public InputStream getSheet(String relId) throws IOException, InvalidFormatException {
  143. PackageRelationship rel = workbookPart.getRelationship(relId);
  144. if(rel == null) {
  145. throw new IllegalArgumentException("No Sheet found with r:id " + relId);
  146. }
  147. PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI());
  148. PackagePart sheet = pkg.getPart(relName);
  149. if(sheet == null) {
  150. throw new IllegalArgumentException("No data found for Sheet with r:id " + relId);
  151. }
  152. return sheet.getInputStream();
  153. }
  154. /**
  155. * Returns an Iterator which will let you get at all the
  156. * different Sheets in turn.
  157. * Each sheet's InputStream is only opened when fetched
  158. * from the Iterator. It's up to you to close the
  159. * InputStreams when done with each one.
  160. */
  161. public Iterator<InputStream> getSheetsData() throws IOException, InvalidFormatException {
  162. return new SheetIterator(workbookPart);
  163. }
  164. /**
  165. * Iterator over sheet data.
  166. */
  167. public static class SheetIterator implements Iterator<InputStream> {
  168. /**
  169. * Maps relId and the corresponding PackagePart
  170. */
  171. private final Map<String, PackagePart> sheetMap;
  172. /**
  173. * Current sheet reference
  174. */
  175. XSSFSheetRef xssfSheetRef;
  176. /**
  177. * Iterator over CTSheet objects, returns sheets in <tt>logical</tt> order.
  178. * We can't rely on the Ooxml4J's relationship iterator because it returns objects in physical order,
  179. * i.e. as they are stored in the underlying package
  180. */
  181. final Iterator<XSSFSheetRef> sheetIterator;
  182. /**
  183. * Construct a new SheetIterator
  184. *
  185. * @param wb package part holding workbook.xml
  186. */
  187. SheetIterator(PackagePart wb) throws IOException {
  188. /**
  189. * The order of sheets is defined by the order of CTSheet elements in workbook.xml
  190. */
  191. try {
  192. //step 1. Map sheet's relationship Id and the corresponding PackagePart
  193. sheetMap = new HashMap<String, PackagePart>();
  194. OPCPackage pkg = wb.getPackage();
  195. String REL_WORKSHEET = XSSFRelation.WORKSHEET.getRelation();
  196. String REL_CHARTSHEET = XSSFRelation.CHARTSHEET.getRelation();
  197. for(PackageRelationship rel : wb.getRelationships()){
  198. String relType = rel.getRelationshipType();
  199. if (relType.equals(REL_WORKSHEET) || relType.equals(REL_CHARTSHEET)) {
  200. PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI());
  201. sheetMap.put(rel.getId(), pkg.getPart(relName));
  202. }
  203. }
  204. //step 2. Read array of CTSheet elements, wrap it in a LinkedList
  205. //and construct an iterator
  206. sheetIterator = createSheetIteratorFromWB(wb);
  207. } catch (InvalidFormatException e){
  208. throw new POIXMLException(e);
  209. }
  210. }
  211. Iterator<XSSFSheetRef> createSheetIteratorFromWB(PackagePart wb) throws IOException {
  212. XMLSheetRefReader xmlSheetRefReader = new XMLSheetRefReader();
  213. XMLReader xmlReader = null;
  214. try {
  215. xmlReader = SAXHelper.newXMLReader();
  216. } catch (ParserConfigurationException e) {
  217. throw new POIXMLException(e);
  218. } catch (SAXException e) {
  219. throw new POIXMLException(e);
  220. }
  221. xmlReader.setContentHandler(xmlSheetRefReader);
  222. try {
  223. xmlReader.parse(new InputSource(wb.getInputStream()));
  224. } catch (SAXException e) {
  225. throw new POIXMLException(e);
  226. }
  227. List<XSSFSheetRef> validSheets = new ArrayList<XSSFSheetRef>();
  228. for (XSSFSheetRef xssfSheetRef : xmlSheetRefReader.getSheetRefs()) {
  229. //if there's no relationship id, silently skip the sheet
  230. String sheetId = xssfSheetRef.getId();
  231. if (sheetId != null && sheetId.length() > 0) {
  232. validSheets.add(xssfSheetRef);
  233. }
  234. }
  235. return validSheets.iterator();
  236. }
  237. /**
  238. * Returns <tt>true</tt> if the iteration has more elements.
  239. *
  240. * @return <tt>true</tt> if the iterator has more elements.
  241. */
  242. @Override
  243. public boolean hasNext() {
  244. return sheetIterator.hasNext();
  245. }
  246. /**
  247. * Returns input stream of the next sheet in the iteration
  248. *
  249. * @return input stream of the next sheet in the iteration
  250. */
  251. @Override
  252. public InputStream next() {
  253. xssfSheetRef = sheetIterator.next();
  254. String sheetId = xssfSheetRef.getId();
  255. try {
  256. PackagePart sheetPkg = sheetMap.get(sheetId);
  257. return sheetPkg.getInputStream();
  258. } catch(IOException e) {
  259. throw new POIXMLException(e);
  260. }
  261. }
  262. /**
  263. * Returns name of the current sheet
  264. *
  265. * @return name of the current sheet
  266. */
  267. public String getSheetName() {
  268. return xssfSheetRef.getName();
  269. }
  270. /**
  271. * Returns the comments associated with this sheet,
  272. * or null if there aren't any
  273. */
  274. public CommentsTable getSheetComments() {
  275. PackagePart sheetPkg = getSheetPart();
  276. // Do we have a comments relationship? (Only ever one if so)
  277. try {
  278. PackageRelationshipCollection commentsList =
  279. sheetPkg.getRelationshipsByType(XSSFRelation.SHEET_COMMENTS.getRelation());
  280. if(commentsList.size() > 0) {
  281. PackageRelationship comments = commentsList.getRelationship(0);
  282. PackagePartName commentsName = PackagingURIHelper.createPartName(comments.getTargetURI());
  283. PackagePart commentsPart = sheetPkg.getPackage().getPart(commentsName);
  284. return new CommentsTable(commentsPart);
  285. }
  286. } catch (InvalidFormatException e) {
  287. return null;
  288. } catch (IOException e) {
  289. return null;
  290. }
  291. return null;
  292. }
  293. /**
  294. * Returns the shapes associated with this sheet,
  295. * an empty list or null if there is an exception
  296. */
  297. public List<XSSFShape> getShapes() {
  298. PackagePart sheetPkg = getSheetPart();
  299. List<XSSFShape> shapes= new LinkedList<XSSFShape>();
  300. // Do we have a comments relationship? (Only ever one if so)
  301. try {
  302. PackageRelationshipCollection drawingsList = sheetPkg.getRelationshipsByType(XSSFRelation.DRAWINGS.getRelation());
  303. for (int i = 0; i < drawingsList.size(); i++){
  304. PackageRelationship drawings = drawingsList.getRelationship(i);
  305. PackagePartName drawingsName = PackagingURIHelper.createPartName(drawings.getTargetURI());
  306. PackagePart drawingsPart = sheetPkg.getPackage().getPart(drawingsName);
  307. if (drawingsPart == null) {
  308. //parts can go missing; Excel ignores them silently -- TIKA-2134
  309. LOGGER.log(POILogger.WARN, "Missing drawing: "+drawingsName +". Skipping it.");
  310. continue;
  311. }
  312. XSSFDrawing drawing = new XSSFDrawing(drawingsPart);
  313. for (XSSFShape shape : drawing.getShapes()){
  314. shapes.add(shape);
  315. }
  316. }
  317. } catch (XmlException e){
  318. return null;
  319. } catch (InvalidFormatException e) {
  320. return null;
  321. } catch (IOException e) {
  322. return null;
  323. }
  324. return shapes;
  325. }
  326. public PackagePart getSheetPart() {
  327. String sheetId = xssfSheetRef.getId();
  328. return sheetMap.get(sheetId);
  329. }
  330. /**
  331. * We're read only, so remove isn't supported
  332. */
  333. @Override
  334. public void remove() {
  335. throw new IllegalStateException("Not supported");
  336. }
  337. }
  338. protected final static class XSSFSheetRef {
  339. //do we need to store sheetId, too?
  340. private final String id;
  341. private final String name;
  342. public XSSFSheetRef(String id, String name) {
  343. this.id = id;
  344. this.name = name;
  345. }
  346. public String getId() {
  347. return id;
  348. }
  349. public String getName() {
  350. return name;
  351. }
  352. }
  353. //scrapes sheet reference info and order from workbook.xml
  354. private static class XMLSheetRefReader extends DefaultHandler {
  355. private final static String SHEET = "sheet";
  356. private final static String ID = "id";
  357. private final static String NAME = "name";
  358. private final List<XSSFSheetRef> sheetRefs = new LinkedList();
  359. @Override
  360. public void startElement(String uri, String localName, String qName, Attributes attrs) throws SAXException {
  361. if (localName.toLowerCase(Locale.US).equals(SHEET)) {
  362. String name = null;
  363. String id = null;
  364. for (int i = 0; i < attrs.getLength(); i++) {
  365. if (attrs.getLocalName(i).toLowerCase(Locale.US).equals(NAME)) {
  366. name = attrs.getValue(i);
  367. } else if (attrs.getLocalName(i).toLowerCase(Locale.US).equals(ID)) {
  368. id = attrs.getValue(i);
  369. }
  370. sheetRefs.add(new XSSFSheetRef(id, name));
  371. }
  372. }
  373. }
  374. List<XSSFSheetRef> getSheetRefs() {
  375. return Collections.unmodifiableList(sheetRefs);
  376. }
  377. }
  378. }