You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

ReplacingInputStream.java 6.8KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203
  1. /* ====================================================================
  2. Licensed to the Apache Software Foundation (ASF) under one or more
  3. contributor license agreements. See the NOTICE file distributed with
  4. this work for additional information regarding copyright ownership.
  5. The ASF licenses this file to You under the Apache License, Version 2.0
  6. (the "License"); you may not use this file except in compliance with
  7. the License. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. ==================================================================== */
  15. package org.apache.poi.util;
  16. import static java.nio.charset.StandardCharsets.UTF_8;
  17. import java.io.FilterInputStream;
  18. import java.io.IOException;
  19. import java.io.InputStream;
  20. import java.util.Arrays;
  21. /**
  22. * Simple FilterInputStream that can replace occurrences of bytes with something else.
  23. *
  24. * This has been taken from inbot-utils. (MIT licensed)
  25. *
  26. * @see <a href="https://github.com/Inbot/inbot-utils">inbot-utils</a>
  27. */
  28. @Internal
  29. public class ReplacingInputStream extends FilterInputStream {
  30. // while matching, this is where the bytes go.
  31. final int[] buf;
  32. private int matchedIndex;
  33. private int unbufferIndex;
  34. private int replacedIndex;
  35. private final byte[] pattern;
  36. private final byte[] replacement;
  37. private State state=State.NOT_MATCHED;
  38. // simple state machine for keeping track of what we are doing
  39. private enum State {
  40. NOT_MATCHED,
  41. MATCHING,
  42. REPLACING,
  43. UNBUFFER
  44. }
  45. /**
  46. * Replace occurrences of pattern in the input. Note: input is assumed to be UTF-8 encoded. If not the case use byte[] based pattern and replacement.
  47. * @param in input
  48. * @param pattern pattern to replace.
  49. * @param replacement the replacement or null
  50. */
  51. public ReplacingInputStream(InputStream in, String pattern, String replacement) {
  52. this(in, pattern.getBytes(UTF_8), replacement==null ? null : replacement.getBytes(UTF_8));
  53. }
  54. /**
  55. * Replace occurrences of pattern in the input.<p>
  56. *
  57. * If you want to normalize line endings DOS/MAC (\n\r | \r) to UNIX (\n), you can call the following:<br>
  58. * {@code new ReplacingInputStream(new ReplacingInputStream(is, "\n\r", "\n"), "\r", "\n")}
  59. *
  60. * @param in input
  61. * @param pattern pattern to replace
  62. * @param replacement the replacement or null
  63. */
  64. public ReplacingInputStream(InputStream in, byte[] pattern, byte[] replacement) {
  65. super(in);
  66. if (pattern == null || pattern.length == 0) {
  67. throw new IllegalArgumentException("pattern length should be > 0");
  68. }
  69. this.pattern = pattern;
  70. this.replacement = replacement;
  71. // we will never match more than the pattern length
  72. buf = new int[pattern.length];
  73. }
  74. @Override
  75. public int read(byte[] b, int off, int len) throws IOException {
  76. // copy of parent logic; we need to call our own read() instead of super.read(), which delegates instead of calling our read
  77. if (b == null) {
  78. throw new NullPointerException();
  79. } else if (off < 0 || len < 0 || len > b.length - off) {
  80. throw new IndexOutOfBoundsException();
  81. } else if (len == 0) {
  82. return 0;
  83. }
  84. int c = read();
  85. if (c == -1) {
  86. return -1;
  87. }
  88. b[off] = (byte)c;
  89. int i = 1;
  90. for (; i < len ; i++) {
  91. c = read();
  92. if (c == -1) {
  93. break;
  94. }
  95. b[off + i] = (byte)c;
  96. }
  97. return i;
  98. }
  99. @Override
  100. public int read(byte[] b) throws IOException {
  101. // call our own read
  102. return read(b, 0, b.length);
  103. }
  104. @Override
  105. public int read() throws IOException {
  106. // use a simple state machine to figure out what we are doing
  107. int next;
  108. switch (state) {
  109. default:
  110. case NOT_MATCHED:
  111. // we are not currently matching, replacing, or unbuffering
  112. next=super.read();
  113. if (pattern[0] != next) {
  114. return next;
  115. }
  116. // clear whatever was there
  117. Arrays.fill(buf, 0);
  118. // make sure we start at 0
  119. matchedIndex=0;
  120. buf[matchedIndex++]=next;
  121. if (pattern.length == 1) {
  122. // edge-case when the pattern length is 1 we go straight to replacing
  123. state=State.REPLACING;
  124. // reset replace counter
  125. replacedIndex=0;
  126. } else {
  127. // pattern of length 1
  128. state=State.MATCHING;
  129. }
  130. // recurse to continue matching
  131. return read();
  132. case MATCHING:
  133. // the previous bytes matched part of the pattern
  134. next=super.read();
  135. if (pattern[matchedIndex]==next) {
  136. buf[matchedIndex++]=next;
  137. if (matchedIndex==pattern.length) {
  138. // we've found a full match!
  139. if (replacement==null || replacement.length==0) {
  140. // the replacement is empty, go straight to NOT_MATCHED
  141. state=State.NOT_MATCHED;
  142. matchedIndex=0;
  143. } else {
  144. // start replacing
  145. state=State.REPLACING;
  146. replacedIndex=0;
  147. }
  148. }
  149. } else {
  150. // mismatch -> unbuffer
  151. buf[matchedIndex++]=next;
  152. state=State.UNBUFFER;
  153. unbufferIndex=0;
  154. }
  155. return read();
  156. case REPLACING:
  157. // we've fully matched the pattern and are returning bytes from the replacement
  158. next=replacement[replacedIndex++];
  159. if (replacedIndex==replacement.length) {
  160. state=State.NOT_MATCHED;
  161. replacedIndex=0;
  162. }
  163. return next;
  164. case UNBUFFER:
  165. // we partially matched the pattern before encountering a non matching byte
  166. // we need to serve up the buffered bytes before we go back to NOT_MATCHED
  167. next=buf[unbufferIndex++];
  168. if (unbufferIndex==matchedIndex) {
  169. state=State.NOT_MATCHED;
  170. matchedIndex=0;
  171. }
  172. return next;
  173. }
  174. }
  175. @Override
  176. public String toString() {
  177. return state.name() + " " + matchedIndex + " " + replacedIndex + " " + unbufferIndex;
  178. }
  179. }