You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

PictureRunMapper.java 4.1KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136
  1. /* ====================================================================
  2. Licensed to the Apache Software Foundation (ASF) under one or more
  3. contributor license agreements. See the NOTICE file distributed with
  4. this work for additional information regarding copyright ownership.
  5. The ASF licenses this file to You under the Apache License, Version 2.0
  6. (the "License"); you may not use this file except in compliance with
  7. the License. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. ==================================================================== */
  15. package org.apache.poi.hwpf.usermodel;
  16. import java.util.ArrayList;
  17. import java.util.HashMap;
  18. import java.util.HashSet;
  19. import java.util.List;
  20. import java.util.Map;
  21. import java.util.Set;
  22. import org.apache.poi.hwpf.HWPFDocument;
  23. import org.apache.poi.hwpf.model.PicturesTable;
  24. /**
  25. * Helper class for mapping Pictures to Runs within
  26. * a document.
  27. *
  28. * This allows for easy access to Pictures by Run,
  29. * as well as a way to find "Escher Floating"
  30. * Pictures which don't have the regular \u0001
  31. * references in the main text.
  32. *
  33. * Provides access to the pictures by offset, iteration
  34. * over the un-claimed, and peeking forward.
  35. */
  36. public class PictureRunMapper {
  37. private PicturesTable picturesTable;
  38. private Set<Picture> claimed = new HashSet<>();
  39. private Map<Integer, Picture> lookup;
  40. private List<Picture> nonU1based;
  41. private List<Picture> all;
  42. private int pn;
  43. public PictureRunMapper(HWPFDocument doc) {
  44. picturesTable = doc.getPicturesTable();
  45. all = picturesTable.getAllPictures();
  46. // Build the Offset-Picture lookup map
  47. lookup = new HashMap<>();
  48. for (Picture p : all) {
  49. lookup.put(p.getStartOffset(), p);
  50. }
  51. // Work out which Pictures aren't referenced by
  52. // a \u0001 in the main text
  53. // These are \u0008 escher floating ones, ones
  54. // found outside the normal text, and who
  55. // knows what else...
  56. nonU1based = new ArrayList<>();
  57. nonU1based.addAll(all);
  58. Range r = doc.getRange();
  59. for (int i = 0; i < r.numCharacterRuns(); i++) {
  60. CharacterRun cr = r.getCharacterRun(i);
  61. if (picturesTable.hasPicture(cr)) {
  62. Picture p = getFor(cr);
  63. int at = nonU1based.indexOf(p);
  64. nonU1based.set(at, null);
  65. }
  66. }
  67. }
  68. /**
  69. * Does this run have a Picture in it?
  70. *
  71. * @see #getFor(CharacterRun)
  72. */
  73. public boolean hasPicture(CharacterRun cr) {
  74. return picturesTable.hasPicture(cr);
  75. }
  76. /**
  77. * Get the Picture for this run, if any
  78. */
  79. public Picture getFor(CharacterRun cr) {
  80. return lookup.get(cr.getPicOffset());
  81. }
  82. /**
  83. * Mark a Picture as claimed.
  84. * Used when trying to match up non-Run based pictures
  85. */
  86. public void markAsClaimed(Picture picture) {
  87. claimed.add(picture);
  88. }
  89. /**
  90. * Has the given Picture been claimed by a non-Run yet?
  91. */
  92. public boolean hasBeenClaimed(Picture picture) {
  93. return claimed.contains(picture);
  94. }
  95. /**
  96. * Which Picture is this one of all the Pictures in
  97. * the Document?
  98. *
  99. * Useful when trying to extract all Pictures with
  100. * unique numbers or references
  101. */
  102. public int pictureNumber(Picture picture) {
  103. return all.indexOf(picture) + 1;
  104. }
  105. /**
  106. * Return the next unclaimed one, used towards
  107. * the end
  108. */
  109. public Picture nextUnclaimed() {
  110. Picture p = null;
  111. while (pn < nonU1based.size()) {
  112. p = nonU1based.get(pn);
  113. pn++;
  114. if (p != null) {
  115. claimed.add(p);
  116. return p;
  117. }
  118. }
  119. return null;
  120. }
  121. }