123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204 |
- /* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- ==================================================================== */
-
- package org.apache.poi.util;
-
- import java.io.FilterInputStream;
- import java.io.IOException;
- import java.io.InputStream;
- import java.nio.charset.Charset;
- import java.util.Arrays;
-
- /**
- * Simple FilterInputStream that can replace occurrences of bytes with something else.
- *
- * This has been taken from inbot-utils. (MIT licensed)
- *
- * @see <a href="https://github.com/Inbot/inbot-utils">inbot-utils</a>
- */
- @Internal
- public class ReplacingInputStream extends FilterInputStream {
-
- // while matching, this is where the bytes go.
- final int[] buf;
- private int matchedIndex=0;
- private int unbufferIndex=0;
- private int replacedIndex=0;
-
- private final byte[] pattern;
- private final byte[] replacement;
- private State state=State.NOT_MATCHED;
-
- // simple state machine for keeping track of what we are doing
- private enum State {
- NOT_MATCHED,
- MATCHING,
- REPLACING,
- UNBUFFER
- }
-
- private static final Charset UTF8 = Charset.forName("UTF-8");
-
- /**
- * Replace occurrences of pattern in the input. Note: input is assumed to be UTF-8 encoded. If not the case use byte[] based pattern and replacement.
- * @param in input
- * @param pattern pattern to replace.
- * @param replacement the replacement or null
- */
- public ReplacingInputStream(InputStream in, String pattern, String replacement) {
- this(in, pattern.getBytes(UTF8), replacement==null ? null : replacement.getBytes(UTF8));
- }
-
- /**
- * Replace occurrences of pattern in the input.<p>
- *
- * If you want to normalize line endings DOS/MAC (\n\r | \r) to UNIX (\n), you can call the following:<br/>
- * {@code new ReplacingInputStream(new ReplacingInputStream(is, "\n\r", "\n"), "\r", "\n")}
- *
- * @param in input
- * @param pattern pattern to replace
- * @param replacement the replacement or null
- */
- public ReplacingInputStream(InputStream in, byte[] pattern, byte[] replacement) {
- super(in);
- if (pattern == null || pattern.length == 0) {
- throw new IllegalArgumentException("pattern length should be > 0");
- }
- this.pattern = pattern;
- this.replacement = replacement;
- // we will never match more than the pattern length
- buf = new int[pattern.length];
- }
-
- @Override
- public int read(byte[] b, int off, int len) throws IOException {
- // copy of parent logic; we need to call our own read() instead of super.read(), which delegates instead of calling our read
- if (b == null) {
- throw new NullPointerException();
- } else if (off < 0 || len < 0 || len > b.length - off) {
- throw new IndexOutOfBoundsException();
- } else if (len == 0) {
- return 0;
- }
-
- int c = read();
- if (c == -1) {
- return -1;
- }
- b[off] = (byte)c;
-
- int i = 1;
- for (; i < len ; i++) {
- c = read();
- if (c == -1) {
- break;
- }
- b[off + i] = (byte)c;
- }
- return i;
-
- }
-
- @Override
- public int read(byte[] b) throws IOException {
- // call our own read
- return read(b, 0, b.length);
- }
-
- @Override
- public int read() throws IOException {
- // use a simple state machine to figure out what we are doing
- int next;
- switch (state) {
- default:
- case NOT_MATCHED:
- // we are not currently matching, replacing, or unbuffering
- next=super.read();
- if (pattern[0] != next) {
- return next;
- }
-
- // clear whatever was there
- Arrays.fill(buf, 0);
- // make sure we start at 0
- matchedIndex=0;
-
- buf[matchedIndex++]=next;
- if (pattern.length == 1) {
- // edge-case when the pattern length is 1 we go straight to replacing
- state=State.REPLACING;
- // reset replace counter
- replacedIndex=0;
- } else {
- // pattern of length 1
- state=State.MATCHING;
- }
- // recurse to continue matching
- return read();
-
- case MATCHING:
- // the previous bytes matched part of the pattern
- next=super.read();
- if (pattern[matchedIndex]==next) {
- buf[matchedIndex++]=next;
- if (matchedIndex==pattern.length) {
- // we've found a full match!
- if (replacement==null || replacement.length==0) {
- // the replacement is empty, go straight to NOT_MATCHED
- state=State.NOT_MATCHED;
- matchedIndex=0;
- } else {
- // start replacing
- state=State.REPLACING;
- replacedIndex=0;
- }
- }
- } else {
- // mismatch -> unbuffer
- buf[matchedIndex++]=next;
- state=State.UNBUFFER;
- unbufferIndex=0;
- }
- return read();
-
- case REPLACING:
- // we've fully matched the pattern and are returning bytes from the replacement
- next=replacement[replacedIndex++];
- if (replacedIndex==replacement.length) {
- state=State.NOT_MATCHED;
- replacedIndex=0;
- }
- return next;
-
- case UNBUFFER:
- // we partially matched the pattern before encountering a non matching byte
- // we need to serve up the buffered bytes before we go back to NOT_MATCHED
- next=buf[unbufferIndex++];
- if (unbufferIndex==matchedIndex) {
- state=State.NOT_MATCHED;
- matchedIndex=0;
- }
- return next;
- }
- }
-
- @Override
- public String toString() {
- return state.name() + " " + matchedIndex + " " + replacedIndex + " " + unbufferIndex;
- }
-
- }
|