/* * Copyright (C) 2008, Google Inc. * and other copyright owners as documented in the project's IP log. * * This program and the accompanying materials are made available * under the terms of the Eclipse Distribution License v1.0 which * accompanies this distribution, is reproduced below, and is * available at http://www.eclipse.org/org/documents/edl-v10.php * * All rights reserved. * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution. * * - Neither the name of the Eclipse Foundation, Inc. nor the * names of its contributors may be used to endorse or promote * products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package org.eclipse.jgit.util; import java.util.Arrays; import org.eclipse.jgit.lib.Constants; /** Utility functions related to quoted string handling. */ public abstract class QuotedString { /** Quoting style that obeys the rules Git applies to file names */ public static final GitPathStyle GIT_PATH = new GitPathStyle(); /** * Quoting style used by the Bourne shell. *
* Quotes are unconditionally inserted during {@link #quote(String)}. This
* protects shell meta-characters like $
or ~
from
* being recognized as special.
*/
public static final BourneStyle BOURNE = new BourneStyle();
/** Bourne style, but permits ~user
at the start of the string. */
public static final BourneUserPathStyle BOURNE_USER_PATH = new BourneUserPathStyle();
/**
* Quote an input string by the quoting rules.
*
* If the input string does not require any quoting, the same String * reference is returned to the caller. *
* Otherwise a quoted string is returned, including the opening and closing * quotation marks at the start and end of the string. If the style does not * permit raw Unicode characters then the string will first be encoded in * UTF-8, with unprintable sequences possibly escaped by the rules. * * @param in * any non-null Unicode string. * @return a quoted string. See above for details. */ public abstract String quote(String in); /** * Clean a previously quoted input, decoding the result via UTF-8. *
* This method must match quote such that: * *
* a.equals(dequote(quote(a))); ** * is true for any
a
.
*
* @param in
* a Unicode string to remove quoting from.
* @return the cleaned string.
* @see #dequote(byte[], int, int)
*/
public String dequote(final String in) {
final byte[] b = Constants.encode(in);
return dequote(b, 0, b.length);
}
/**
* Decode a previously quoted input, scanning a UTF-8 encoded buffer.
* * This method must match quote such that: * *
* a.equals(dequote(Constants.encode(quote(a)))); ** * is true for any
a
.
*
* This method removes any opening/closing quotation marks added by
* {@link #quote(String)}.
*
* @param in
* the input buffer to parse.
* @param offset
* first position within in
to scan.
* @param end
* one position past in in
to scan.
* @return the cleaned string.
*/
public abstract String dequote(byte[] in, int offset, int end);
/**
* Quoting style used by the Bourne shell.
*
* Quotes are unconditionally inserted during {@link #quote(String)}. This
* protects shell meta-characters like $
or ~
from
* being recognized as special.
*/
public static class BourneStyle extends QuotedString {
@Override
public String quote(final String in) {
final StringBuilder r = new StringBuilder();
r.append('\'');
int start = 0, i = 0;
for (; i < in.length(); i++) {
switch (in.charAt(i)) {
case '\'':
case '!':
r.append(in, start, i);
r.append('\'');
r.append('\\');
r.append(in.charAt(i));
r.append('\'');
start = i + 1;
break;
}
}
r.append(in, start, i);
r.append('\'');
return r.toString();
}
@Override
public String dequote(final byte[] in, int ip, final int ie) {
boolean inquote = false;
final byte[] r = new byte[ie - ip];
int rPtr = 0;
while (ip < ie) {
final byte b = in[ip++];
switch (b) {
case '\'':
inquote = !inquote;
continue;
case '\\':
if (inquote || ip == ie)
r[rPtr++] = b; // literal within a quote
else
r[rPtr++] = in[ip++];
continue;
default:
r[rPtr++] = b;
continue;
}
}
return RawParseUtils.decode(Constants.CHARSET, r, 0, rPtr);
}
}
/** Bourne style, but permits ~user
at the start of the string. */
public static class BourneUserPathStyle extends BourneStyle {
@Override
public String quote(final String in) {
if (in.matches("^~[A-Za-z0-9_-]+$")) { //$NON-NLS-1$
// If the string is just "~user" we can assume they
// mean "~user/".
//
return in + "/"; //$NON-NLS-1$
}
if (in.matches("^~[A-Za-z0-9_-]*/.*$")) { //$NON-NLS-1$
// If the string is of "~/path" or "~user/path"
// we must not escape ~/ or ~user/ from the shell.
//
final int i = in.indexOf('/') + 1;
if (i == in.length())
return in;
return in.substring(0, i) + super.quote(in.substring(i));
}
return super.quote(in);
}
}
/** Quoting style that obeys the rules Git applies to file names */
public static final class GitPathStyle extends QuotedString {
private static final byte[] quote;
static {
quote = new byte[128];
Arrays.fill(quote, (byte) -1);
for (int i = '0'; i <= '9'; i++)
quote[i] = 0;
for (int i = 'a'; i <= 'z'; i++)
quote[i] = 0;
for (int i = 'A'; i <= 'Z'; i++)
quote[i] = 0;
quote[' '] = 0;
quote['$'] = 0;
quote['%'] = 0;
quote['&'] = 0;
quote['*'] = 0;
quote['+'] = 0;
quote[','] = 0;
quote['-'] = 0;
quote['.'] = 0;
quote['/'] = 0;
quote[':'] = 0;
quote[';'] = 0;
quote['='] = 0;
quote['?'] = 0;
quote['@'] = 0;
quote['_'] = 0;
quote['^'] = 0;
quote['|'] = 0;
quote['~'] = 0;
quote['\u0007'] = 'a';
quote['\b'] = 'b';
quote['\f'] = 'f';
quote['\n'] = 'n';
quote['\r'] = 'r';
quote['\t'] = 't';
quote['\u000B'] = 'v';
quote['\\'] = '\\';
quote['"'] = '"';
}
@Override
public String quote(final String instr) {
if (instr.length() == 0)
return "\"\""; //$NON-NLS-1$
boolean reuse = true;
final byte[] in = Constants.encode(instr);
final StringBuilder r = new StringBuilder(2 + in.length);
r.append('"');
for (int i = 0; i < in.length; i++) {
final int c = in[i] & 0xff;
if (c < quote.length) {
final byte style = quote[c];
if (style == 0) {
r.append((char) c);
continue;
}
if (style > 0) {
reuse = false;
r.append('\\');
r.append((char) style);
continue;
}
}
reuse = false;
r.append('\\');
r.append((char) (((c >> 6) & 03) + '0'));
r.append((char) (((c >> 3) & 07) + '0'));
r.append((char) (((c >> 0) & 07) + '0'));
}
if (reuse)
return instr;
r.append('"');
return r.toString();
}
@Override
public String dequote(final byte[] in, final int inPtr, final int inEnd) {
if (2 <= inEnd - inPtr && in[inPtr] == '"' && in[inEnd - 1] == '"')
return dq(in, inPtr + 1, inEnd - 1);
return RawParseUtils.decode(Constants.CHARSET, in, inPtr, inEnd);
}
private static String dq(final byte[] in, int inPtr, final int inEnd) {
final byte[] r = new byte[inEnd - inPtr];
int rPtr = 0;
while (inPtr < inEnd) {
final byte b = in[inPtr++];
if (b != '\\') {
r[rPtr++] = b;
continue;
}
if (inPtr == inEnd) {
// Lone trailing backslash. Treat it as a literal.
//
r[rPtr++] = '\\';
break;
}
switch (in[inPtr++]) {
case 'a':
r[rPtr++] = 0x07 /* \a = BEL */;
continue;
case 'b':
r[rPtr++] = '\b';
continue;
case 'f':
r[rPtr++] = '\f';
continue;
case 'n':
r[rPtr++] = '\n';
continue;
case 'r':
r[rPtr++] = '\r';
continue;
case 't':
r[rPtr++] = '\t';
continue;
case 'v':
r[rPtr++] = 0x0B/* \v = VT */;
continue;
case '\\':
case '"':
r[rPtr++] = in[inPtr - 1];
continue;
case '0':
case '1':
case '2':
case '3': {
int cp = in[inPtr - 1] - '0';
for (int n = 1; n < 3 && inPtr < inEnd; n++) {
final byte c = in[inPtr];
if ('0' <= c && c <= '7') {
cp <<= 3;
cp |= c - '0';
inPtr++;
} else {
break;
}
}
r[rPtr++] = (byte) cp;
continue;
}
default:
// Any other code is taken literally.
//
r[rPtr++] = '\\';
r[rPtr++] = in[inPtr - 1];
continue;
}
}
return RawParseUtils.decode(Constants.CHARSET, r, 0, rPtr);
}
private GitPathStyle() {
// Singleton
}
}
}