You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

ReplacingInputStream.java 6.9KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204
  1. /* ====================================================================
  2. Licensed to the Apache Software Foundation (ASF) under one or more
  3. contributor license agreements. See the NOTICE file distributed with
  4. this work for additional information regarding copyright ownership.
  5. The ASF licenses this file to You under the Apache License, Version 2.0
  6. (the "License"); you may not use this file except in compliance with
  7. the License. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. ==================================================================== */
  15. package org.apache.poi.util;
  16. import java.io.FilterInputStream;
  17. import java.io.IOException;
  18. import java.io.InputStream;
  19. import java.nio.charset.Charset;
  20. import java.util.Arrays;
  21. /**
  22. * Simple FilterInputStream that can replace occurrences of bytes with something else.
  23. *
  24. * This has been taken from inbot-utils. (MIT licensed)
  25. *
  26. * @see <a href="https://github.com/Inbot/inbot-utils">inbot-utils</a>
  27. */
  28. @Internal
  29. public class ReplacingInputStream extends FilterInputStream {
  30. // while matching, this is where the bytes go.
  31. final int[] buf;
  32. private int matchedIndex=0;
  33. private int unbufferIndex=0;
  34. private int replacedIndex=0;
  35. private final byte[] pattern;
  36. private final byte[] replacement;
  37. private State state=State.NOT_MATCHED;
  38. // simple state machine for keeping track of what we are doing
  39. private enum State {
  40. NOT_MATCHED,
  41. MATCHING,
  42. REPLACING,
  43. UNBUFFER
  44. }
  45. private static final Charset UTF8 = Charset.forName("UTF-8");
  46. /**
  47. * Replace occurrences of pattern in the input. Note: input is assumed to be UTF-8 encoded. If not the case use byte[] based pattern and replacement.
  48. * @param in input
  49. * @param pattern pattern to replace.
  50. * @param replacement the replacement or null
  51. */
  52. public ReplacingInputStream(InputStream in, String pattern, String replacement) {
  53. this(in, pattern.getBytes(UTF8), replacement==null ? null : replacement.getBytes(UTF8));
  54. }
  55. /**
  56. * Replace occurrences of pattern in the input.<p>
  57. *
  58. * If you want to normalize line endings DOS/MAC (\n\r | \r) to UNIX (\n), you can call the following:<br/>
  59. * {@code new ReplacingInputStream(new ReplacingInputStream(is, "\n\r", "\n"), "\r", "\n")}
  60. *
  61. * @param in input
  62. * @param pattern pattern to replace
  63. * @param replacement the replacement or null
  64. */
  65. public ReplacingInputStream(InputStream in, byte[] pattern, byte[] replacement) {
  66. super(in);
  67. if (pattern == null || pattern.length == 0) {
  68. throw new IllegalArgumentException("pattern length should be > 0");
  69. }
  70. this.pattern = pattern;
  71. this.replacement = replacement;
  72. // we will never match more than the pattern length
  73. buf = new int[pattern.length];
  74. }
  75. @Override
  76. public int read(byte[] b, int off, int len) throws IOException {
  77. // copy of parent logic; we need to call our own read() instead of super.read(), which delegates instead of calling our read
  78. if (b == null) {
  79. throw new NullPointerException();
  80. } else if (off < 0 || len < 0 || len > b.length - off) {
  81. throw new IndexOutOfBoundsException();
  82. } else if (len == 0) {
  83. return 0;
  84. }
  85. int c = read();
  86. if (c == -1) {
  87. return -1;
  88. }
  89. b[off] = (byte)c;
  90. int i = 1;
  91. for (; i < len ; i++) {
  92. c = read();
  93. if (c == -1) {
  94. break;
  95. }
  96. b[off + i] = (byte)c;
  97. }
  98. return i;
  99. }
  100. @Override
  101. public int read(byte[] b) throws IOException {
  102. // call our own read
  103. return read(b, 0, b.length);
  104. }
  105. @Override
  106. public int read() throws IOException {
  107. // use a simple state machine to figure out what we are doing
  108. int next;
  109. switch (state) {
  110. default:
  111. case NOT_MATCHED:
  112. // we are not currently matching, replacing, or unbuffering
  113. next=super.read();
  114. if (pattern[0] != next) {
  115. return next;
  116. }
  117. // clear whatever was there
  118. Arrays.fill(buf, 0);
  119. // make sure we start at 0
  120. matchedIndex=0;
  121. buf[matchedIndex++]=next;
  122. if (pattern.length == 1) {
  123. // edge-case when the pattern length is 1 we go straight to replacing
  124. state=State.REPLACING;
  125. // reset replace counter
  126. replacedIndex=0;
  127. } else {
  128. // pattern of length 1
  129. state=State.MATCHING;
  130. }
  131. // recurse to continue matching
  132. return read();
  133. case MATCHING:
  134. // the previous bytes matched part of the pattern
  135. next=super.read();
  136. if (pattern[matchedIndex]==next) {
  137. buf[matchedIndex++]=next;
  138. if (matchedIndex==pattern.length) {
  139. // we've found a full match!
  140. if (replacement==null || replacement.length==0) {
  141. // the replacement is empty, go straight to NOT_MATCHED
  142. state=State.NOT_MATCHED;
  143. matchedIndex=0;
  144. } else {
  145. // start replacing
  146. state=State.REPLACING;
  147. replacedIndex=0;
  148. }
  149. }
  150. } else {
  151. // mismatch -> unbuffer
  152. buf[matchedIndex++]=next;
  153. state=State.UNBUFFER;
  154. unbufferIndex=0;
  155. }
  156. return read();
  157. case REPLACING:
  158. // we've fully matched the pattern and are returning bytes from the replacement
  159. next=replacement[replacedIndex++];
  160. if (replacedIndex==replacement.length) {
  161. state=State.NOT_MATCHED;
  162. replacedIndex=0;
  163. }
  164. return next;
  165. case UNBUFFER:
  166. // we partially matched the pattern before encountering a non matching byte
  167. // we need to serve up the buffered bytes before we go back to NOT_MATCHED
  168. next=buf[unbufferIndex++];
  169. if (unbufferIndex==matchedIndex) {
  170. state=State.NOT_MATCHED;
  171. matchedIndex=0;
  172. }
  173. return next;
  174. }
  175. }
  176. @Override
  177. public String toString() {
  178. return state.name() + " " + matchedIndex + " " + replacedIndex + " " + unbufferIndex;
  179. }
  180. }