123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378 |
- /*
- * Copyright (C) 2008, Google Inc.
- * and other copyright owners as documented in the project's IP log.
- *
- * This program and the accompanying materials are made available
- * under the terms of the Eclipse Distribution License v1.0 which
- * accompanies this distribution, is reproduced below, and is
- * available at http://www.eclipse.org/org/documents/edl-v10.php
- *
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials provided
- * with the distribution.
- *
- * - Neither the name of the Eclipse Foundation, Inc. nor the
- * names of its contributors may be used to endorse or promote
- * products derived from this software without specific prior
- * written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
- * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
- package org.eclipse.jgit.util;
-
- import java.util.Arrays;
-
- import org.eclipse.jgit.lib.Constants;
-
- /** Utility functions related to quoted string handling. */
- public abstract class QuotedString {
- /** Quoting style that obeys the rules Git applies to file names */
- public static final GitPathStyle GIT_PATH = new GitPathStyle();
-
- /**
- * Quoting style used by the Bourne shell.
- * <p>
- * Quotes are unconditionally inserted during {@link #quote(String)}. This
- * protects shell meta-characters like <code>$</code> or <code>~</code> from
- * being recognized as special.
- */
- public static final BourneStyle BOURNE = new BourneStyle();
-
- /** Bourne style, but permits <code>~user</code> at the start of the string. */
- public static final BourneUserPathStyle BOURNE_USER_PATH = new BourneUserPathStyle();
-
- /**
- * Quote an input string by the quoting rules.
- * <p>
- * If the input string does not require any quoting, the same String
- * reference is returned to the caller.
- * <p>
- * Otherwise a quoted string is returned, including the opening and closing
- * quotation marks at the start and end of the string. If the style does not
- * permit raw Unicode characters then the string will first be encoded in
- * UTF-8, with unprintable sequences possibly escaped by the rules.
- *
- * @param in
- * any non-null Unicode string.
- * @return a quoted string. See above for details.
- */
- public abstract String quote(String in);
-
- /**
- * Clean a previously quoted input, decoding the result via UTF-8.
- * <p>
- * This method must match quote such that:
- *
- * <pre>
- * a.equals(dequote(quote(a)));
- * </pre>
- *
- * is true for any <code>a</code>.
- *
- * @param in
- * a Unicode string to remove quoting from.
- * @return the cleaned string.
- * @see #dequote(byte[], int, int)
- */
- public String dequote(final String in) {
- final byte[] b = Constants.encode(in);
- return dequote(b, 0, b.length);
- }
-
- /**
- * Decode a previously quoted input, scanning a UTF-8 encoded buffer.
- * <p>
- * This method must match quote such that:
- *
- * <pre>
- * a.equals(dequote(Constants.encode(quote(a))));
- * </pre>
- *
- * is true for any <code>a</code>.
- * <p>
- * This method removes any opening/closing quotation marks added by
- * {@link #quote(String)}.
- *
- * @param in
- * the input buffer to parse.
- * @param offset
- * first position within <code>in</code> to scan.
- * @param end
- * one position past in <code>in</code> to scan.
- * @return the cleaned string.
- */
- public abstract String dequote(byte[] in, int offset, int end);
-
- /**
- * Quoting style used by the Bourne shell.
- * <p>
- * Quotes are unconditionally inserted during {@link #quote(String)}. This
- * protects shell meta-characters like <code>$</code> or <code>~</code> from
- * being recognized as special.
- */
- public static class BourneStyle extends QuotedString {
- @Override
- public String quote(final String in) {
- final StringBuilder r = new StringBuilder();
- r.append('\'');
- int start = 0, i = 0;
- for (; i < in.length(); i++) {
- switch (in.charAt(i)) {
- case '\'':
- case '!':
- r.append(in, start, i);
- r.append('\'');
- r.append('\\');
- r.append(in.charAt(i));
- r.append('\'');
- start = i + 1;
- break;
- }
- }
- r.append(in, start, i);
- r.append('\'');
- return r.toString();
- }
-
- @Override
- public String dequote(final byte[] in, int ip, final int ie) {
- boolean inquote = false;
- final byte[] r = new byte[ie - ip];
- int rPtr = 0;
- while (ip < ie) {
- final byte b = in[ip++];
- switch (b) {
- case '\'':
- inquote = !inquote;
- continue;
- case '\\':
- if (inquote || ip == ie)
- r[rPtr++] = b; // literal within a quote
- else
- r[rPtr++] = in[ip++];
- continue;
- default:
- r[rPtr++] = b;
- continue;
- }
- }
- return RawParseUtils.decode(Constants.CHARSET, r, 0, rPtr);
- }
- }
-
- /** Bourne style, but permits <code>~user</code> at the start of the string. */
- public static class BourneUserPathStyle extends BourneStyle {
- @Override
- public String quote(final String in) {
- if (in.matches("^~[A-Za-z0-9_-]+$")) { //$NON-NLS-1$
- // If the string is just "~user" we can assume they
- // mean "~user/".
- //
- return in + "/"; //$NON-NLS-1$
- }
-
- if (in.matches("^~[A-Za-z0-9_-]*/.*$")) { //$NON-NLS-1$
- // If the string is of "~/path" or "~user/path"
- // we must not escape ~/ or ~user/ from the shell.
- //
- final int i = in.indexOf('/') + 1;
- if (i == in.length())
- return in;
- return in.substring(0, i) + super.quote(in.substring(i));
- }
-
- return super.quote(in);
- }
- }
-
- /** Quoting style that obeys the rules Git applies to file names */
- public static final class GitPathStyle extends QuotedString {
- private static final byte[] quote;
- static {
- quote = new byte[128];
- Arrays.fill(quote, (byte) -1);
-
- for (int i = '0'; i <= '9'; i++)
- quote[i] = 0;
- for (int i = 'a'; i <= 'z'; i++)
- quote[i] = 0;
- for (int i = 'A'; i <= 'Z'; i++)
- quote[i] = 0;
- quote[' '] = 0;
- quote['$'] = 0;
- quote['%'] = 0;
- quote['&'] = 0;
- quote['*'] = 0;
- quote['+'] = 0;
- quote[','] = 0;
- quote['-'] = 0;
- quote['.'] = 0;
- quote['/'] = 0;
- quote[':'] = 0;
- quote[';'] = 0;
- quote['='] = 0;
- quote['?'] = 0;
- quote['@'] = 0;
- quote['_'] = 0;
- quote['^'] = 0;
- quote['|'] = 0;
- quote['~'] = 0;
-
- quote['\u0007'] = 'a';
- quote['\b'] = 'b';
- quote['\f'] = 'f';
- quote['\n'] = 'n';
- quote['\r'] = 'r';
- quote['\t'] = 't';
- quote['\u000B'] = 'v';
- quote['\\'] = '\\';
- quote['"'] = '"';
- }
-
- @Override
- public String quote(final String instr) {
- if (instr.length() == 0)
- return "\"\""; //$NON-NLS-1$
- boolean reuse = true;
- final byte[] in = Constants.encode(instr);
- final StringBuilder r = new StringBuilder(2 + in.length);
- r.append('"');
- for (int i = 0; i < in.length; i++) {
- final int c = in[i] & 0xff;
- if (c < quote.length) {
- final byte style = quote[c];
- if (style == 0) {
- r.append((char) c);
- continue;
- }
- if (style > 0) {
- reuse = false;
- r.append('\\');
- r.append((char) style);
- continue;
- }
- }
-
- reuse = false;
- r.append('\\');
- r.append((char) (((c >> 6) & 03) + '0'));
- r.append((char) (((c >> 3) & 07) + '0'));
- r.append((char) (((c >> 0) & 07) + '0'));
- }
- if (reuse)
- return instr;
- r.append('"');
- return r.toString();
- }
-
- @Override
- public String dequote(final byte[] in, final int inPtr, final int inEnd) {
- if (2 <= inEnd - inPtr && in[inPtr] == '"' && in[inEnd - 1] == '"')
- return dq(in, inPtr + 1, inEnd - 1);
- return RawParseUtils.decode(Constants.CHARSET, in, inPtr, inEnd);
- }
-
- private static String dq(final byte[] in, int inPtr, final int inEnd) {
- final byte[] r = new byte[inEnd - inPtr];
- int rPtr = 0;
- while (inPtr < inEnd) {
- final byte b = in[inPtr++];
- if (b != '\\') {
- r[rPtr++] = b;
- continue;
- }
-
- if (inPtr == inEnd) {
- // Lone trailing backslash. Treat it as a literal.
- //
- r[rPtr++] = '\\';
- break;
- }
-
- switch (in[inPtr++]) {
- case 'a':
- r[rPtr++] = 0x07 /* \a = BEL */;
- continue;
- case 'b':
- r[rPtr++] = '\b';
- continue;
- case 'f':
- r[rPtr++] = '\f';
- continue;
- case 'n':
- r[rPtr++] = '\n';
- continue;
- case 'r':
- r[rPtr++] = '\r';
- continue;
- case 't':
- r[rPtr++] = '\t';
- continue;
- case 'v':
- r[rPtr++] = 0x0B/* \v = VT */;
- continue;
-
- case '\\':
- case '"':
- r[rPtr++] = in[inPtr - 1];
- continue;
-
- case '0':
- case '1':
- case '2':
- case '3': {
- int cp = in[inPtr - 1] - '0';
- for (int n = 1; n < 3 && inPtr < inEnd; n++) {
- final byte c = in[inPtr];
- if ('0' <= c && c <= '7') {
- cp <<= 3;
- cp |= c - '0';
- inPtr++;
- } else {
- break;
- }
- }
- r[rPtr++] = (byte) cp;
- continue;
- }
-
- default:
- // Any other code is taken literally.
- //
- r[rPtr++] = '\\';
- r[rPtr++] = in[inPtr - 1];
- continue;
- }
- }
-
- return RawParseUtils.decode(Constants.CHARSET, r, 0, rPtr);
- }
-
- private GitPathStyle() {
- // Singleton
- }
- }
- }
|