You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

XWPFDocument.java 19KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541
  1. /* ====================================================================
  2. Licensed to the Apache Software Foundation (ASF) under one or more
  3. contributor license agreements. See the NOTICE file distributed with
  4. this work for additional information regarding copyright ownership.
  5. The ASF licenses this file to You under the Apache License, Version 2.0
  6. (the "License"); you may not use this file except in compliance with
  7. the License. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. ==================================================================== */
  15. package org.apache.poi.xwpf.usermodel;
  16. import java.io.IOException;
  17. import java.io.OutputStream;
  18. import java.io.InputStream;
  19. import java.util.*;
  20. import org.apache.poi.POIXMLDocument;
  21. import org.apache.poi.POIXMLException;
  22. import org.apache.poi.POIXMLDocumentPart;
  23. import org.apache.poi.POIXMLProperties;
  24. import org.apache.poi.util.PackageHelper;
  25. import org.apache.poi.xwpf.model.XWPFHeaderFooterPolicy;
  26. import org.apache.xmlbeans.XmlException;
  27. import org.apache.xmlbeans.XmlOptions;
  28. import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
  29. import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
  30. import org.apache.poi.openxml4j.opc.*;
  31. import org.openxmlformats.schemas.wordprocessingml.x2006.main.*;
  32. import javax.xml.namespace.QName;
  33. /**
  34. * Experimental class to do low level processing
  35. * of docx files.
  36. *
  37. * If you are using these low level classes, then you
  38. * will almost certainly need to refer to the OOXML
  39. * specifications from
  40. * http://www.ecma-international.org/publications/standards/Ecma-376.htm
  41. *
  42. * WARNING - APIs expected to change rapidly
  43. */
  44. public class XWPFDocument extends POIXMLDocument {
  45. private CTDocument1 ctDocument;
  46. private XWPFSettings settings;
  47. protected List<XWPFComment> comments;
  48. protected List<XWPFHyperlink> hyperlinks;
  49. protected List<XWPFParagraph> paragraphs;
  50. protected List<XWPFTable> tables;
  51. protected Map<Integer, XWPFFootnote> footnotes;
  52. protected Map<Integer, XWPFFootnote> endnotes;
  53. /** Handles the joy of different headers/footers for different pages */
  54. private XWPFHeaderFooterPolicy headerFooterPolicy;
  55. public XWPFDocument(OPCPackage pkg) throws IOException {
  56. super(ensureWriteAccess(pkg));
  57. //build a tree of POIXMLDocumentParts, this document being the root
  58. load(XWPFFactory.getInstance());
  59. }
  60. public XWPFDocument(InputStream is) throws IOException {
  61. super(PackageHelper.open(is));
  62. //build a tree of POIXMLDocumentParts, this workbook being the root
  63. load(XWPFFactory.getInstance());
  64. }
  65. public XWPFDocument(){
  66. super(newPackage());
  67. onDocumentCreate();
  68. }
  69. @Override
  70. protected void onDocumentRead() throws IOException {
  71. hyperlinks = new ArrayList<XWPFHyperlink>();
  72. comments = new ArrayList<XWPFComment>();
  73. paragraphs = new ArrayList<XWPFParagraph>();
  74. tables= new ArrayList<XWPFTable>();
  75. footnotes = new HashMap<Integer, XWPFFootnote>();
  76. endnotes = new HashMap<Integer, XWPFFootnote>();
  77. try {
  78. DocumentDocument doc = DocumentDocument.Factory.parse(getPackagePart().getInputStream());
  79. ctDocument = doc.getDocument();
  80. CTBody body = ctDocument.getBody();
  81. initFootnotes();
  82. // filling paragraph list
  83. for (CTP p : body.getPArray()) {
  84. paragraphs.add(new XWPFParagraph(p, this));
  85. }
  86. // Get any tables
  87. for(CTTbl table : body.getTblArray()) {
  88. tables.add(new XWPFTable(this, table));
  89. }
  90. // Sort out headers and footers
  91. if (doc.getDocument().getBody().getSectPr() != null)
  92. headerFooterPolicy = new XWPFHeaderFooterPolicy(this);
  93. for(POIXMLDocumentPart p : getRelations()){
  94. String relation = p.getPackageRelationship().getRelationshipType();
  95. if(relation.equals(XWPFRelation.COMMENT.getRelation())){
  96. CommentsDocument cmntdoc = CommentsDocument.Factory.parse(p.getPackagePart().getInputStream());
  97. for(CTComment ctcomment : cmntdoc.getComments().getCommentArray()) {
  98. comments.add(new XWPFComment(ctcomment));
  99. }
  100. }
  101. else if(relation.equals(XWPFRelation.SETTINGS.getRelation())){
  102. settings = (XWPFSettings)p;
  103. }
  104. }
  105. initHyperlinks();
  106. } catch (XmlException e) {
  107. throw new POIXMLException(e);
  108. }
  109. }
  110. private void initHyperlinks(){
  111. // Get the hyperlinks
  112. // TODO: make me optional/separated in private function
  113. try {
  114. Iterator <PackageRelationship> relIter =
  115. getPackagePart().getRelationshipsByType(XWPFRelation.HYPERLINK.getRelation()).iterator();
  116. while(relIter.hasNext()) {
  117. PackageRelationship rel = relIter.next();
  118. hyperlinks.add(new XWPFHyperlink(rel.getId(), rel.getTargetURI().toString()));
  119. }
  120. } catch (InvalidFormatException e){
  121. throw new POIXMLException(e);
  122. }
  123. }
  124. private void initFootnotes() throws XmlException, IOException {
  125. for(POIXMLDocumentPart p : getRelations()){
  126. String relation = p.getPackageRelationship().getRelationshipType();
  127. if(relation.equals(XWPFRelation.FOOTNOTE.getRelation())){
  128. FootnotesDocument footnotesDocument = FootnotesDocument.Factory.parse(p.getPackagePart().getInputStream());
  129. for(CTFtnEdn ctFtnEdn : footnotesDocument.getFootnotes().getFootnoteArray()) {
  130. footnotes.put(ctFtnEdn.getId().intValue(), new XWPFFootnote(this, ctFtnEdn));
  131. }
  132. } else if (relation.equals(XWPFRelation.ENDNOTE.getRelation())){
  133. EndnotesDocument endnotesDocument = EndnotesDocument.Factory.parse(p.getPackagePart().getInputStream());
  134. for(CTFtnEdn ctFtnEdn : endnotesDocument.getEndnotes().getEndnoteArray()) {
  135. endnotes.put(ctFtnEdn.getId().intValue(), new XWPFFootnote(this, ctFtnEdn));
  136. }
  137. }
  138. }
  139. }
  140. /**
  141. * Create a new SpreadsheetML package and setup the default minimal content
  142. */
  143. protected static OPCPackage newPackage() {
  144. try {
  145. OPCPackage pkg = OPCPackage.create(PackageHelper.createTempFile());
  146. // Main part
  147. PackagePartName corePartName = PackagingURIHelper.createPartName(XWPFRelation.DOCUMENT.getDefaultFileName());
  148. // Create main part relationship
  149. pkg.addRelationship(corePartName, TargetMode.INTERNAL, PackageRelationshipTypes.CORE_DOCUMENT);
  150. // Create main document part
  151. pkg.createPart(corePartName, XWPFRelation.DOCUMENT.getContentType());
  152. pkg.getPackageProperties().setCreatorProperty(DOCUMENT_CREATOR);
  153. return pkg;
  154. } catch (Exception e){
  155. throw new POIXMLException(e);
  156. }
  157. }
  158. /**
  159. * Create a new CTWorkbook with all values set to default
  160. */
  161. protected void onDocumentCreate() {
  162. hyperlinks = new ArrayList<XWPFHyperlink>();
  163. comments = new ArrayList<XWPFComment>();
  164. paragraphs = new ArrayList<XWPFParagraph>();
  165. tables= new ArrayList<XWPFTable>();
  166. ctDocument = CTDocument1.Factory.newInstance();
  167. ctDocument.addNewBody();
  168. settings = (XWPFSettings) createRelationship(XWPFRelation.SETTINGS, XWPFFactory.getInstance());
  169. POIXMLProperties.ExtendedProperties expProps = getProperties().getExtendedProperties();
  170. expProps.getUnderlyingProperties().setApplication(DOCUMENT_CREATOR);
  171. }
  172. /**
  173. * Returns the low level document base object
  174. */
  175. public CTDocument1 getDocument() {
  176. return ctDocument;
  177. }
  178. public Iterator<XWPFParagraph> getParagraphsIterator() {
  179. return paragraphs.iterator();
  180. }
  181. public XWPFParagraph[] getParagraphs() {
  182. return paragraphs.toArray(
  183. new XWPFParagraph[paragraphs.size()]
  184. );
  185. }
  186. public Iterator<XWPFTable> getTablesIterator()
  187. {
  188. return tables.iterator();
  189. }
  190. public XWPFHyperlink getHyperlinkByID(String id) {
  191. Iterator<XWPFHyperlink> iter = hyperlinks.iterator();
  192. while(iter.hasNext())
  193. {
  194. XWPFHyperlink link = iter.next();
  195. if(link.getId().equals(id))
  196. return link;
  197. }
  198. return null;
  199. }
  200. public XWPFFootnote getFootnoteByID(int id) {
  201. return footnotes.get(id);
  202. }
  203. public XWPFFootnote getEndnoteByID(int id) {
  204. return endnotes.get(id);
  205. }
  206. public Collection<XWPFFootnote> getFootnotes() {
  207. return footnotes == null ? new ArrayList<XWPFFootnote>() : footnotes.values();
  208. }
  209. public XWPFHyperlink[] getHyperlinks() {
  210. return hyperlinks.toArray(
  211. new XWPFHyperlink[hyperlinks.size()]
  212. );
  213. }
  214. public XWPFComment getCommentByID(String id) {
  215. Iterator<XWPFComment> iter = comments.iterator();
  216. while(iter.hasNext())
  217. {
  218. XWPFComment comment = iter.next();
  219. if(comment.getId().equals(id))
  220. return comment;
  221. }
  222. return null;
  223. }
  224. public XWPFComment[] getComments() {
  225. return comments.toArray(
  226. new XWPFComment[comments.size()]
  227. );
  228. }
  229. /**
  230. * Get the document part that's defined as the
  231. * given relationship of the core document.
  232. */
  233. public PackagePart getPartById(String id) {
  234. try {
  235. return getTargetPart(
  236. getCorePart().getRelationship(id)
  237. );
  238. } catch(InvalidFormatException e) {
  239. throw new IllegalArgumentException(e);
  240. }
  241. }
  242. /**
  243. * Returns the policy on headers and footers, which
  244. * also provides a way to get at them.
  245. */
  246. public XWPFHeaderFooterPolicy getHeaderFooterPolicy() {
  247. return headerFooterPolicy;
  248. }
  249. /**
  250. * Returns the styles object used
  251. */
  252. public CTStyles getStyle() throws XmlException, IOException {
  253. PackagePart[] parts;
  254. try {
  255. parts = getRelatedByType(XWPFRelation.STYLES.getRelation());
  256. } catch(InvalidFormatException e) {
  257. throw new IllegalStateException(e);
  258. }
  259. if(parts.length != 1) {
  260. throw new IllegalStateException("Expecting one Styles document part, but found " + parts.length);
  261. }
  262. StylesDocument sd =
  263. StylesDocument.Factory.parse(parts[0].getInputStream());
  264. return sd.getStyles();
  265. }
  266. /**
  267. * Get the document's embedded files.
  268. */
  269. public List<PackagePart> getAllEmbedds() throws OpenXML4JException {
  270. List<PackagePart> embedds = new LinkedList<PackagePart>();
  271. // Get the embeddings for the workbook
  272. for(PackageRelationship rel : getPackagePart().getRelationshipsByType(OLE_OBJECT_REL_TYPE))
  273. embedds.add(getTargetPart(rel));
  274. for(PackageRelationship rel : getPackagePart().getRelationshipsByType(PACK_OBJECT_REL_TYPE))
  275. embedds.add(getTargetPart(rel));
  276. return embedds;
  277. }
  278. @Override
  279. protected void commit() throws IOException {
  280. XmlOptions xmlOptions = new XmlOptions(DEFAULT_XML_OPTIONS);
  281. xmlOptions.setSaveSyntheticDocumentElement(new QName(CTDocument1.type.getName().getNamespaceURI(), "document"));
  282. Map<String, String> map = new HashMap<String, String>();
  283. map.put("http://schemas.openxmlformats.org/officeDocument/2006/math", "m");
  284. map.put("urn:schemas-microsoft-com:office:office", "o");
  285. map.put("http://schemas.openxmlformats.org/officeDocument/2006/relationships", "r");
  286. map.put("urn:schemas-microsoft-com:vml", "v");
  287. map.put("http://schemas.openxmlformats.org/markup-compatibility/2006", "ve");
  288. map.put("http://schemas.openxmlformats.org/wordprocessingml/2006/main", "w");
  289. map.put("urn:schemas-microsoft-com:office:word", "w10");
  290. map.put("http://schemas.microsoft.com/office/word/2006/wordml", "wne");
  291. map.put("http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing", "wp");
  292. xmlOptions.setSaveSuggestedPrefixes(map);
  293. PackagePart part = getPackagePart();
  294. OutputStream out = part.getOutputStream();
  295. ctDocument.save(out, xmlOptions);
  296. out.close();
  297. }
  298. /**
  299. * Appends a new paragraph to this document
  300. *
  301. * @return a new paragraph
  302. */
  303. public XWPFParagraph createParagraph(){
  304. return new XWPFParagraph(ctDocument.getBody().addNewP(), this);
  305. }
  306. /**
  307. * Create an empty table with one row and one column as default.
  308. *
  309. * @return a new table
  310. */
  311. public XWPFTable createTable(){
  312. return new XWPFTable(this, ctDocument.getBody().addNewTbl());
  313. }
  314. /**
  315. * Create an empty table with a number of rows and cols specified
  316. * @param rows
  317. * @param cols
  318. * @return table
  319. */
  320. public XWPFTable createTable(int rows, int cols) {
  321. return new XWPFTable(this, ctDocument.getBody().addNewTbl(), rows, cols);
  322. }
  323. public void createTOC() {
  324. CTSdtBlock block = this.getDocument().getBody().addNewSdt();
  325. TOC toc = new TOC(block);
  326. int i = 1;
  327. for (Iterator<XWPFParagraph> iterator = getParagraphsIterator() ; iterator.hasNext() ; ) {
  328. XWPFParagraph par = iterator.next();
  329. String parStyle = par.getStyle();
  330. if (parStyle != null && parStyle.substring(0, 7).equals("Heading")) {
  331. try {
  332. int level = Integer.valueOf(parStyle.substring("Heading".length()));
  333. toc.addRow(level, par.getText(), 1, "112723803");
  334. }
  335. catch (NumberFormatException e) {
  336. e.printStackTrace();
  337. }
  338. }
  339. }
  340. }
  341. /**
  342. * Verifies that the documentProtection tag in settings.xml file <br/>
  343. * specifies that the protection is enforced (w:enforcement="1") <br/>
  344. * and that the kind of protection is readOnly (w:edit="readOnly")<br/>
  345. * <br/>
  346. * sample snippet from settings.xml
  347. * <pre>
  348. * &lt;w:settings ... &gt;
  349. * &lt;w:documentProtection w:edit=&quot;readOnly&quot; w:enforcement=&quot;1&quot;/&gt;
  350. * </pre>
  351. *
  352. * @return true if documentProtection is enforced with option readOnly
  353. */
  354. public boolean isEnforcedReadonlyProtection() {
  355. return settings.isEnforcedWith(STDocProtect.READ_ONLY);
  356. }
  357. /**
  358. * Verifies that the documentProtection tag in settings.xml file <br/>
  359. * specifies that the protection is enforced (w:enforcement="1") <br/>
  360. * and that the kind of protection is forms (w:edit="forms")<br/>
  361. * <br/>
  362. * sample snippet from settings.xml
  363. * <pre>
  364. * &lt;w:settings ... &gt;
  365. * &lt;w:documentProtection w:edit=&quot;forms&quot; w:enforcement=&quot;1&quot;/&gt;
  366. * </pre>
  367. *
  368. * @return true if documentProtection is enforced with option forms
  369. */
  370. public boolean isEnforcedFillingFormsProtection() {
  371. return settings.isEnforcedWith(STDocProtect.FORMS);
  372. }
  373. /**
  374. * Verifies that the documentProtection tag in settings.xml file <br/>
  375. * specifies that the protection is enforced (w:enforcement="1") <br/>
  376. * and that the kind of protection is comments (w:edit="comments")<br/>
  377. * <br/>
  378. * sample snippet from settings.xml
  379. * <pre>
  380. * &lt;w:settings ... &gt;
  381. * &lt;w:documentProtection w:edit=&quot;comments&quot; w:enforcement=&quot;1&quot;/&gt;
  382. * </pre>
  383. *
  384. * @return true if documentProtection is enforced with option comments
  385. */
  386. public boolean isEnforcedCommentsProtection() {
  387. return settings.isEnforcedWith(STDocProtect.COMMENTS);
  388. }
  389. /**
  390. * Verifies that the documentProtection tag in settings.xml file <br/>
  391. * specifies that the protection is enforced (w:enforcement="1") <br/>
  392. * and that the kind of protection is trackedChanges (w:edit="trackedChanges")<br/>
  393. * <br/>
  394. * sample snippet from settings.xml
  395. * <pre>
  396. * &lt;w:settings ... &gt;
  397. * &lt;w:documentProtection w:edit=&quot;trackedChanges&quot; w:enforcement=&quot;1&quot;/&gt;
  398. * </pre>
  399. *
  400. * @return true if documentProtection is enforced with option trackedChanges
  401. */
  402. public boolean isEnforcedTrackedChangesProtection() {
  403. return settings.isEnforcedWith(STDocProtect.TRACKED_CHANGES);
  404. }
  405. /**
  406. * Enforces the readOnly protection.<br/>
  407. * In the documentProtection tag inside settings.xml file, <br/>
  408. * it sets the value of enforcement to "1" (w:enforcement="1") <br/>
  409. * and the value of edit to readOnly (w:edit="readOnly")<br/>
  410. * <br/>
  411. * sample snippet from settings.xml
  412. * <pre>
  413. * &lt;w:settings ... &gt;
  414. * &lt;w:documentProtection w:edit=&quot;readOnly&quot; w:enforcement=&quot;1&quot;/&gt;
  415. * </pre>
  416. */
  417. public void enforceReadonlyProtection() {
  418. settings.setEnforcementEditValue(STDocProtect.READ_ONLY);
  419. }
  420. /**
  421. * Enforce the Filling Forms protection.<br/>
  422. * In the documentProtection tag inside settings.xml file, <br/>
  423. * it sets the value of enforcement to "1" (w:enforcement="1") <br/>
  424. * and the value of edit to forms (w:edit="forms")<br/>
  425. * <br/>
  426. * sample snippet from settings.xml
  427. * <pre>
  428. * &lt;w:settings ... &gt;
  429. * &lt;w:documentProtection w:edit=&quot;forms&quot; w:enforcement=&quot;1&quot;/&gt;
  430. * </pre>
  431. */
  432. public void enforceFillingFormsProtection() {
  433. settings.setEnforcementEditValue(STDocProtect.FORMS);
  434. }
  435. /**
  436. * Enforce the Comments protection.<br/>
  437. * In the documentProtection tag inside settings.xml file,<br/>
  438. * it sets the value of enforcement to "1" (w:enforcement="1") <br/>
  439. * and the value of edit to comments (w:edit="comments")<br/>
  440. * <br/>
  441. * sample snippet from settings.xml
  442. * <pre>
  443. * &lt;w:settings ... &gt;
  444. * &lt;w:documentProtection w:edit=&quot;comments&quot; w:enforcement=&quot;1&quot;/&gt;
  445. * </pre>
  446. */
  447. public void enforceCommentsProtection() {
  448. settings.setEnforcementEditValue(STDocProtect.COMMENTS);
  449. }
  450. /**
  451. * Enforce the Tracked Changes protection.<br/>
  452. * In the documentProtection tag inside settings.xml file, <br/>
  453. * it sets the value of enforcement to "1" (w:enforcement="1") <br/>
  454. * and the value of edit to trackedChanges (w:edit="trackedChanges")<br/>
  455. * <br/>
  456. * sample snippet from settings.xml
  457. * <pre>
  458. * &lt;w:settings ... &gt;
  459. * &lt;w:documentProtection w:edit=&quot;trackedChanges&quot; w:enforcement=&quot;1&quot;/&gt;
  460. * </pre>
  461. */
  462. public void enforceTrackedChangesProtection() {
  463. settings.setEnforcementEditValue(STDocProtect.TRACKED_CHANGES);
  464. }
  465. /**
  466. * Remove protection enforcement.<br/>
  467. * In the documentProtection tag inside settings.xml file <br/>
  468. * it sets the value of enforcement to "0" (w:enforcement="0") <br/>
  469. */
  470. public void removeProtectionEnforcement() {
  471. settings.removeEnforcement();
  472. }
  473. }