diff options
author | Thomas Wolf <thomas.wolf@paranor.ch> | 2017-02-23 22:49:43 +0100 |
---|---|---|
committer | Matthias Sohn <matthias.sohn@sap.com> | 2017-03-04 00:23:42 +0100 |
commit | 1f3e74ed9f19a6fade29a8c94dc6e3c1ac3963d5 (patch) | |
tree | 9a0d1c692f8f6b6a02ceffaa542b7e70c5162c8d | |
parent | 45f62576dec19570835e8679f74bc9a6ad8a4990 (diff) | |
download | jgit-1f3e74ed9f19a6fade29a8c94dc6e3c1ac3963d5.tar.gz jgit-1f3e74ed9f19a6fade29a8c94dc6e3c1ac3963d5.zip |
Make Repository.normalizeBranchName less strict
This operation was added recently with the goal to provide some
way to auto-correct invalid user input, or to provide a correction
suggestion to the user -- EGit uses it now that way. But the initial
implementation was very restrictive; it removed all non-ASCII
characters and even slashes.
Understandably end users were not happy with that. Git has no such
restriction to ASCII-only; nor does JGit. Branch names should be
meaningful to the end user, and if a user-supplied branch name is
invalid for technical reasons, a "normalized" name should still
be meaningful to the user.
Rewrite to attempt a minimal fix such that the result will pass
isValidRefName.
* Replace all Unicode whitespace by underscore.
* Replace troublesome special characters by dash.
* Collapse sequences of underscores, dots, and dashes.
* Remove underscores, dots, and dashes following slashes, and
collapse sequences of slashes.
* Strip leading and trailing sequences of slashes, dots, dashes,
and underscores.
* Avoid the ".lock" extension.
* Avoid the Windows reserved device names.
* If input name is null return an empty String so callers don't need to
check for null.
This still allows branch names with single slashes as separators
between components, avoids some pitfalls that isValidRefName() tests
for, and leaves other character untouched and thus allows non-ASCII
branch names.
Also move the function from the bottom of the file up to where
isValidRefName is implemented.
Bug: 512508
Change-Id: Ia0576d9b2489162208c05e51c6d54e9f0c88c3a7
Signed-off-by: Thomas Wolf <thomas.wolf@paranor.ch>
Signed-off-by: Matthias Sohn <matthias.sohn@sap.com>
-rw-r--r-- | org.eclipse.jgit.test/tst/org/eclipse/jgit/lib/ValidRefNameTest.java | 143 | ||||
-rw-r--r-- | org.eclipse.jgit/src/org/eclipse/jgit/lib/Repository.java | 157 |
2 files changed, 171 insertions, 129 deletions
diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/lib/ValidRefNameTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/lib/ValidRefNameTest.java index f069b65f66..d431a8910b 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/lib/ValidRefNameTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/lib/ValidRefNameTest.java @@ -44,6 +44,7 @@ package org.eclipse.jgit.lib; import static org.eclipse.jgit.junit.Assert.assertEquals; +import static org.junit.Assert.assertEquals; import org.eclipse.jgit.junit.MockSystemReader; import org.eclipse.jgit.util.SystemReader; @@ -94,6 +95,25 @@ public class ValidRefNameTest { } } + private static void assertNormalized(final String name, + final String expected) { + SystemReader instance = SystemReader.getInstance(); + try { + setUnixSystemReader(); + String normalized = Repository.normalizeBranchName(name); + assertEquals("Normalization of " + name, expected, normalized); + assertEquals("\"" + normalized + "\"", true, + Repository.isValidRefName(Constants.R_HEADS + normalized)); + setWindowsSystemReader(); + normalized = Repository.normalizeBranchName(name); + assertEquals("Normalization of " + name, expected, normalized); + assertEquals("\"" + normalized + "\"", true, + Repository.isValidRefName(Constants.R_HEADS + normalized)); + } finally { + SystemReader.setInstance(instance); + } + } + @Test public void testEmptyString() { assertValid(false, ""); @@ -250,97 +270,56 @@ public class ValidRefNameTest { @Test public void testNormalizeBranchName() { - + assertEquals(true, Repository.normalizeBranchName(null) == ""); assertEquals(true, Repository.normalizeBranchName("").equals("")); + assertNormalized("Bug 12345::::Hello World", "Bug_12345-Hello_World"); + assertNormalized("Bug 12345 :::: Hello World", "Bug_12345_Hello_World"); + assertNormalized("Bug 12345 :::: Hello::: World", + "Bug_12345_Hello-World"); + assertNormalized(":::Bug 12345 - Hello World", "Bug_12345_Hello_World"); + assertNormalized("---Bug 12345 - Hello World", "Bug_12345_Hello_World"); + assertNormalized("Bug 12345 ---- Hello --- World", + "Bug_12345_Hello_World"); + assertNormalized("Bug 12345 - Hello World!", "Bug_12345_Hello_World!"); + assertNormalized("Bug 12345 : Hello World!", "Bug_12345_Hello_World!"); + assertNormalized("Bug 12345 _ Hello World!", "Bug_12345_Hello_World!"); + assertNormalized("Bug 12345 - Hello World!", + "Bug_12345_Hello_World!"); + assertNormalized(" Bug 12345 - Hello World! ", + "Bug_12345_Hello_World!"); + assertNormalized(" Bug 12345 - Hello World! ", + "Bug_12345_Hello_World!"); + assertNormalized("Bug 12345 - Hello______ World!", + "Bug_12345_Hello_World!"); + assertNormalized("_Bug 12345 - Hello World!", "Bug_12345_Hello_World!"); + } - assertEquals(true, - Repository.normalizeBranchName("__@#$@#$@$____ _") - .equals("")); - - assertEquals(true, - Repository.normalizeBranchName("~`!@#$%^&*()_+}]{[|\\\";?>.<,/") - .equals("")); - - assertEquals(true, - Repository.normalizeBranchName("Bug 12345 :::: Hello World") - .equals("Bug_12345-Hello_World")); - - assertEquals(true, - Repository.normalizeBranchName("Bug 12345 :::: Hello::: World") - .equals("Bug_12345-Hello-_World")); - - assertEquals(true, - Repository.normalizeBranchName(":::Bug 12345 - Hello World") - .equals("Bug_12345-Hello_World")); - - assertEquals(true, - Repository.normalizeBranchName("---Bug 12345 - Hello World") - .equals("Bug_12345-Hello_World")); - - assertEquals(true, - Repository.normalizeBranchName("Bug 12345 ---- Hello --- World") - .equals("Bug_12345-Hello-World")); - - assertEquals(true, Repository.normalizeBranchName(null) == null); - - assertEquals(true, - Repository.normalizeBranchName("Bug 12345 - Hello World!") - .equals("Bug_12345-Hello_World")); - - assertEquals(true, - Repository.normalizeBranchName("Bug 12345 : Hello World!") - .equals("Bug_12345-Hello_World")); - - assertEquals(true, - Repository.normalizeBranchName("Bug 12345 _ Hello World!") - .equals("Bug_12345_Hello_World")); - - assertEquals(true, - Repository - .normalizeBranchName("Bug 12345 - Hello World!") - .equals("Bug_12345-Hello_World")); - - assertEquals(true, - Repository.normalizeBranchName(" Bug 12345 - Hello World! ") - .equals("Bug_12345-Hello_World")); - - assertEquals(true, - Repository - .normalizeBranchName(" Bug 12345 - Hello World! ") - .equals("Bug_12345-Hello_World")); - - assertEquals(true, - Repository - .normalizeBranchName( - "Bug 12345 - Hello______ World!") - .equals("Bug_12345-Hello_World")); - - assertEquals(true, - Repository.normalizeBranchName("_Bug 12345 - Hello World!") - .equals("Bug_12345-Hello_World")); - - assertEquals(true, - Repository - .normalizeBranchName( - "Bug 12345 - Hello Wo!@#$%^&*(rld {@") - .equals("Bug_12345-Hello_World_")); - - assertEquals(true, - Repository.normalizeBranchName("Bug 1#$ 2345 - Hello World") - .equals("Bug_12345-Hello_World")); + @Test + public void testNormalizeWithSlashes() { + assertNormalized("foo/bar/baz", "foo/bar/baz"); + assertNormalized("foo/bar.lock/baz.lock", "foo/bar_lock/baz_lock"); + assertNormalized("foo/.git/.git~1/bar", "foo/git/git-1/bar"); + assertNormalized(".foo/aux/con/com3.txt/com0/prn/lpt1", + "foo/+aux/+con/+com3.txt/com0/+prn/+lpt1"); + assertNormalized("foo/../bar///.--ba...z", "foo/bar/ba.z"); + } + + @Test + public void testNormalizeWithUnicode() { + assertNormalized("f\u00f6\u00f6/.b\u00e0r/*[<>|^~/b\u00e9\\z", + "f\u00f6\u00f6/b\u00e0r/b\u00e9-z"); + assertNormalized("\u5165\u53e3 entrance;/.\u51fa\u53e3_*ex*it*/", + "\u5165\u53e3_entrance;/\u51fa\u53e3_ex-it"); } @Test public void testNormalizeAlreadyValidRefName() { - assertEquals(true, - Repository.normalizeBranchName("refs/heads/m.a.s.t.e.r") - .equals("refs/heads/m.a.s.t.e.r")); + assertNormalized("refs/heads/m.a.s.t.e.r", "refs/heads/m.a.s.t.e.r"); + assertNormalized("refs/tags/v1.0-20170223", "refs/tags/v1.0-20170223"); } @Test public void testNormalizeTrimmedUnicodeAlreadyValidRefName() { - assertEquals(true, - Repository.normalizeBranchName(" \u00e5ngstr\u00f6m\t") - .equals("\u00e5ngstr\u00f6m")); + assertNormalized(" \u00e5ngstr\u00f6m\t", "\u00e5ngstr\u00f6m"); } } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/Repository.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/Repository.java index e2f6c3c711..9b1c2eac75 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/lib/Repository.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/Repository.java @@ -66,6 +66,7 @@ import java.util.Map; import java.util.Set; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; +import java.util.regex.Pattern; import org.eclipse.jgit.annotations.NonNull; import org.eclipse.jgit.annotations.Nullable; @@ -111,6 +112,17 @@ public abstract class Repository implements AutoCloseable { private static final Logger LOG = LoggerFactory.getLogger(Repository.class); private static final ListenerList globalListeners = new ListenerList(); + /** + * Branch names containing slashes should not have a name component that is + * one of the reserved device names on Windows. + * + * @see #normalizeBranchName(String) + */ + private static final Pattern FORBIDDEN_BRANCH_NAME_COMPONENTS = Pattern + .compile( + "(^|/)(aux|com[1-9]|con|lpt[1-9]|nul|prn)(\\.[^/]*)?", //$NON-NLS-1$ + Pattern.CASE_INSENSITIVE); + /** @return the global listener list observing all events in this JVM. */ public static ListenerList getGlobalListenerList() { return globalListeners; @@ -923,7 +935,7 @@ public abstract class Repository implements AutoCloseable { else desc = getClass().getSimpleName() + "-" //$NON-NLS-1$ + System.identityHashCode(this); - return "Repository[" + desc + "]"; //$NON-NLS-1$ + return "Repository[" + desc + "]"; //$NON-NLS-1$ //$NON-NLS-2$ } /** @@ -1333,12 +1345,105 @@ public abstract class Repository implements AutoCloseable { } /** + * Normalizes the passed branch name into a possible valid branch name. The + * validity of the returned name should be checked by a subsequent call to + * {@link #isValidRefName(String)}. + * <p/> + * Future implementations of this method could be more restrictive or more + * lenient about the validity of specific characters in the returned name. + * <p/> + * The current implementation returns the trimmed input string if this is + * already a valid branch name. Otherwise it returns a trimmed string with + * special characters not allowed by {@link #isValidRefName(String)} + * replaced by hyphens ('-') and blanks replaced by underscores ('_'). + * Leading and trailing slashes, dots, hyphens, and underscores are removed. + * + * @param name + * to normalize + * + * @return The normalized name or an empty String if it is {@code null} or + * empty. + * @since 4.7 + * @see #isValidRefName(String) + */ + public static String normalizeBranchName(String name) { + if (name == null || name.isEmpty()) { + return ""; //$NON-NLS-1$ + } + String result = name.trim(); + String fullName = result.startsWith(Constants.R_HEADS) ? result + : Constants.R_HEADS + result; + if (isValidRefName(fullName)) { + return result; + } + + // All Unicode blanks to underscore + result = result.replaceAll("(?:\\h|\\v)+", "_"); //$NON-NLS-1$ //$NON-NLS-2$ + StringBuilder b = new StringBuilder(); + char p = '/'; + for (int i = 0, len = result.length(); i < len; i++) { + char c = result.charAt(i); + if (c < ' ' || c == 127) { + continue; + } + // Substitute a dash for problematic characters + switch (c) { + case '\\': + case '^': + case '~': + case ':': + case '?': + case '*': + case '[': + case '@': + case '<': + case '>': + case '|': + case '"': + c = '-'; + break; + default: + break; + } + // Collapse multiple slashes, dashes, dots, underscores, and omit + // dashes, dots, and underscores following a slash. + switch (c) { + case '/': + if (p == '/') { + continue; + } + p = '/'; + break; + case '.': + case '_': + case '-': + if (p == '/' || p == '-') { + continue; + } + p = '-'; + break; + default: + p = c; + break; + } + b.append(c); + } + // Strip trailing special characters, and avoid the .lock extension + result = b.toString().replaceFirst("[/_.-]+$", "") //$NON-NLS-1$ //$NON-NLS-2$ + .replaceAll("\\.lock($|/)", "_lock$1"); //$NON-NLS-1$ //$NON-NLS-2$ + return FORBIDDEN_BRANCH_NAME_COMPONENTS.matcher(result) + .replaceAll("$1+$2$3"); //$NON-NLS-1$ + } + + /** * Strip work dir and return normalized repository path. * - * @param workDir Work dir - * @param file File whose path shall be stripped of its workdir - * @return normalized repository relative path or the empty - * string if the file is not relative to the work directory. + * @param workDir + * Work dir + * @param file + * File whose path shall be stripped of its workdir + * @return normalized repository relative path or the empty string if the + * file is not relative to the work directory. */ @NonNull public static String stripWorkDir(File workDir, File file) { @@ -1889,46 +1994,4 @@ public abstract class Repository implements AutoCloseable { public void autoGC(ProgressMonitor monitor) { // default does nothing } - - /** - * Normalizes the passed branch name into a possible valid branch name. The - * validity of the returned name should be checked by a subsequent call to - * {@link #isValidRefName(String)}. - * <p/> - * Future implementations of this method could be more restrictive or more - * lenient about the validity of specific characters in the returned name. - * <p/> - * The current implementation returns the trimmed input string if this is - * already a valid branch name. Otherwise it returns a trimmed string only - * containing word characters ([a-zA-Z_0-9]) and hyphens ('-'). Colons are - * replaced by hyphens. Repeating underscores and hyphens are replaced by a - * single occurrence. Underscores and hyphens at the beginning of the string - * are removed. - * - * @param name - * The name to normalize. - * - * @return The normalized String or null if null was passed. - * @since 4.7 - * @see #isValidRefName(String) - */ - public static String normalizeBranchName(String name) { - if (name == null || name.length() == 0) { - return name; - } - String result = name.trim(); - String fullName = result.startsWith(Constants.R_HEADS) ? result - : Constants.R_HEADS + result; - if (isValidRefName(fullName)) { - return result; - } - return result.replaceAll("\\s+([_:-])*?\\s+", "$1") //$NON-NLS-1$//$NON-NLS-2$ - .replaceAll(":", "-") //$NON-NLS-1$//$NON-NLS-2$ - .replaceAll("\\s+", "_") //$NON-NLS-1$//$NON-NLS-2$ - .replaceAll("_{2,}", "_") //$NON-NLS-1$//$NON-NLS-2$ - .replaceAll("-{2,}", "-") //$NON-NLS-1$//$NON-NLS-2$ - .replaceAll("[^\\w-]", "") //$NON-NLS-1$ //$NON-NLS-2$ - .replaceAll("^_+", "") //$NON-NLS-1$//$NON-NLS-2$ - .replaceAll("^-+", ""); //$NON-NLS-1$//$NON-NLS-2$ - } } |