]> source.dussan.org Git - jgit.git/commitdiff
Make Repository.normalizeBranchName less strict 88/91488/7
authorThomas Wolf <thomas.wolf@paranor.ch>
Thu, 23 Feb 2017 21:49:43 +0000 (22:49 +0100)
committerMatthias Sohn <matthias.sohn@sap.com>
Fri, 3 Mar 2017 23:23:42 +0000 (00:23 +0100)
This operation was added recently with the goal to provide some
way to auto-correct invalid user input, or to provide a correction
suggestion to the user -- EGit uses it now that way. But the initial
implementation was very restrictive; it removed all non-ASCII
characters and even slashes.

Understandably end users were not happy with that. Git has no such
restriction to ASCII-only; nor does JGit. Branch names should be
meaningful to the end user, and if a user-supplied branch name is
invalid for technical reasons, a "normalized" name should still
be meaningful to the user.

Rewrite to attempt a minimal fix such that the result will pass
isValidRefName.

* Replace all Unicode whitespace by underscore.
* Replace troublesome special characters by dash.
* Collapse sequences of underscores, dots, and dashes.
* Remove underscores, dots, and dashes following slashes, and
  collapse sequences of slashes.
* Strip leading and trailing sequences of slashes, dots, dashes,
  and underscores.
* Avoid the ".lock" extension.
* Avoid the Windows reserved device names.
* If input name is null return an empty String so callers don't need to
check for null.

This still allows branch names with single slashes as separators
between components, avoids some pitfalls that isValidRefName() tests
for, and leaves other character untouched and thus allows non-ASCII
branch names.

Also move the function from the bottom of the file up to where
isValidRefName is implemented.

Bug: 512508
Change-Id: Ia0576d9b2489162208c05e51c6d54e9f0c88c3a7
Signed-off-by: Thomas Wolf <thomas.wolf@paranor.ch>
Signed-off-by: Matthias Sohn <matthias.sohn@sap.com>
org.eclipse.jgit.test/tst/org/eclipse/jgit/lib/ValidRefNameTest.java
org.eclipse.jgit/src/org/eclipse/jgit/lib/Repository.java

index f069b65f66ccc89d51debc981d758454bda9b65b..d431a8910b2926f0c05d9330e464efa26cf2f862 100644 (file)
@@ -44,6 +44,7 @@
 package org.eclipse.jgit.lib;
 
 import static org.eclipse.jgit.junit.Assert.assertEquals;
+import static org.junit.Assert.assertEquals;
 
 import org.eclipse.jgit.junit.MockSystemReader;
 import org.eclipse.jgit.util.SystemReader;
@@ -94,6 +95,25 @@ public class ValidRefNameTest {
                }
        }
 
+       private static void assertNormalized(final String name,
+                       final String expected) {
+               SystemReader instance = SystemReader.getInstance();
+               try {
+                       setUnixSystemReader();
+                       String normalized = Repository.normalizeBranchName(name);
+                       assertEquals("Normalization of " + name, expected, normalized);
+                       assertEquals("\"" + normalized + "\"", true,
+                                       Repository.isValidRefName(Constants.R_HEADS + normalized));
+                       setWindowsSystemReader();
+                       normalized = Repository.normalizeBranchName(name);
+                       assertEquals("Normalization of " + name, expected, normalized);
+                       assertEquals("\"" + normalized + "\"", true,
+                                       Repository.isValidRefName(Constants.R_HEADS + normalized));
+               } finally {
+                       SystemReader.setInstance(instance);
+               }
+       }
+
        @Test
        public void testEmptyString() {
                assertValid(false, "");
@@ -250,97 +270,56 @@ public class ValidRefNameTest {
 
        @Test
        public void testNormalizeBranchName() {
-
+               assertEquals(true, Repository.normalizeBranchName(null) == "");
                assertEquals(true, Repository.normalizeBranchName("").equals(""));
+               assertNormalized("Bug 12345::::Hello World", "Bug_12345-Hello_World");
+               assertNormalized("Bug 12345 :::: Hello World", "Bug_12345_Hello_World");
+               assertNormalized("Bug 12345 :::: Hello::: World",
+                               "Bug_12345_Hello-World");
+               assertNormalized(":::Bug 12345 - Hello World", "Bug_12345_Hello_World");
+               assertNormalized("---Bug 12345 - Hello World", "Bug_12345_Hello_World");
+               assertNormalized("Bug 12345 ---- Hello --- World",
+                               "Bug_12345_Hello_World");
+               assertNormalized("Bug 12345 - Hello World!", "Bug_12345_Hello_World!");
+               assertNormalized("Bug 12345 : Hello World!", "Bug_12345_Hello_World!");
+               assertNormalized("Bug 12345 _ Hello World!", "Bug_12345_Hello_World!");
+               assertNormalized("Bug 12345   -       Hello World!",
+                               "Bug_12345_Hello_World!");
+               assertNormalized(" Bug 12345   -   Hello World! ",
+                               "Bug_12345_Hello_World!");
+               assertNormalized(" Bug 12345   -   Hello World!   ",
+                               "Bug_12345_Hello_World!");
+               assertNormalized("Bug 12345   -   Hello______ World!",
+                               "Bug_12345_Hello_World!");
+               assertNormalized("_Bug 12345 - Hello World!", "Bug_12345_Hello_World!");
+       }
 
-               assertEquals(true,
-                               Repository.normalizeBranchName("__@#$@#$@$____   _")
-                                               .equals(""));
-
-               assertEquals(true,
-                               Repository.normalizeBranchName("~`!@#$%^&*()_+}]{[|\\\";?>.<,/")
-                                               .equals(""));
-
-               assertEquals(true,
-                               Repository.normalizeBranchName("Bug 12345 :::: Hello World")
-                                               .equals("Bug_12345-Hello_World"));
-
-               assertEquals(true,
-                               Repository.normalizeBranchName("Bug 12345 :::: Hello::: World")
-                                               .equals("Bug_12345-Hello-_World"));
-
-               assertEquals(true,
-                               Repository.normalizeBranchName(":::Bug 12345 - Hello World")
-                                               .equals("Bug_12345-Hello_World"));
-
-               assertEquals(true,
-                               Repository.normalizeBranchName("---Bug 12345 - Hello World")
-                                               .equals("Bug_12345-Hello_World"));
-
-               assertEquals(true,
-                               Repository.normalizeBranchName("Bug 12345 ---- Hello --- World")
-                                               .equals("Bug_12345-Hello-World"));
-
-               assertEquals(true, Repository.normalizeBranchName(null) == null);
-
-               assertEquals(true,
-                               Repository.normalizeBranchName("Bug 12345 - Hello World!")
-                                               .equals("Bug_12345-Hello_World"));
-
-               assertEquals(true,
-                               Repository.normalizeBranchName("Bug 12345 : Hello World!")
-                                               .equals("Bug_12345-Hello_World"));
-
-               assertEquals(true,
-                               Repository.normalizeBranchName("Bug 12345 _ Hello World!")
-                                               .equals("Bug_12345_Hello_World"));
-
-               assertEquals(true,
-                               Repository
-                                               .normalizeBranchName("Bug 12345   -       Hello World!")
-                                               .equals("Bug_12345-Hello_World"));
-
-               assertEquals(true,
-                               Repository.normalizeBranchName(" Bug 12345   -   Hello World! ")
-                                               .equals("Bug_12345-Hello_World"));
-
-               assertEquals(true,
-                               Repository
-                                               .normalizeBranchName(" Bug 12345   -   Hello World!   ")
-                                               .equals("Bug_12345-Hello_World"));
-
-               assertEquals(true,
-                               Repository
-                                               .normalizeBranchName(
-                                                               "Bug 12345   -   Hello______ World!")
-                                               .equals("Bug_12345-Hello_World"));
-
-               assertEquals(true,
-                               Repository.normalizeBranchName("_Bug 12345 - Hello World!")
-                                               .equals("Bug_12345-Hello_World"));
-
-               assertEquals(true,
-                               Repository
-                                               .normalizeBranchName(
-                                                               "Bug 12345 - Hello Wo!@#$%^&*(rld {@")
-                                               .equals("Bug_12345-Hello_World_"));
-
-               assertEquals(true,
-                               Repository.normalizeBranchName("Bug 1#$  2345 - Hello World")
-                                               .equals("Bug_12345-Hello_World"));
+       @Test
+       public void testNormalizeWithSlashes() {
+               assertNormalized("foo/bar/baz", "foo/bar/baz");
+               assertNormalized("foo/bar.lock/baz.lock", "foo/bar_lock/baz_lock");
+               assertNormalized("foo/.git/.git~1/bar", "foo/git/git-1/bar");
+               assertNormalized(".foo/aux/con/com3.txt/com0/prn/lpt1",
+                               "foo/+aux/+con/+com3.txt/com0/+prn/+lpt1");
+               assertNormalized("foo/../bar///.--ba...z", "foo/bar/ba.z");
+       }
+
+       @Test
+       public void testNormalizeWithUnicode() {
+               assertNormalized("f\u00f6\u00f6/.b\u00e0r/*[<>|^~/b\u00e9\\z",
+                               "f\u00f6\u00f6/b\u00e0r/b\u00e9-z");
+               assertNormalized("\u5165\u53e3 entrance;/.\u51fa\u53e3_*ex*it*/",
+                               "\u5165\u53e3_entrance;/\u51fa\u53e3_ex-it");
        }
 
        @Test
        public void testNormalizeAlreadyValidRefName() {
-               assertEquals(true,
-                               Repository.normalizeBranchName("refs/heads/m.a.s.t.e.r")
-                                               .equals("refs/heads/m.a.s.t.e.r"));
+               assertNormalized("refs/heads/m.a.s.t.e.r", "refs/heads/m.a.s.t.e.r");
+               assertNormalized("refs/tags/v1.0-20170223", "refs/tags/v1.0-20170223");
        }
 
        @Test
        public void testNormalizeTrimmedUnicodeAlreadyValidRefName() {
-               assertEquals(true,
-                               Repository.normalizeBranchName(" \u00e5ngstr\u00f6m\t")
-                                               .equals("\u00e5ngstr\u00f6m"));
+               assertNormalized(" \u00e5ngstr\u00f6m\t", "\u00e5ngstr\u00f6m");
        }
 }
index e2f6c3c711e7b3f22277e86cbb35fb14c281c4e2..9b1c2eac75dfe7bd864ddd201a40985a96c90296 100644 (file)
@@ -66,6 +66,7 @@ import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicLong;
+import java.util.regex.Pattern;
 
 import org.eclipse.jgit.annotations.NonNull;
 import org.eclipse.jgit.annotations.Nullable;
@@ -111,6 +112,17 @@ public abstract class Repository implements AutoCloseable {
        private static final Logger LOG = LoggerFactory.getLogger(Repository.class);
        private static final ListenerList globalListeners = new ListenerList();
 
+       /**
+        * Branch names containing slashes should not have a name component that is
+        * one of the reserved device names on Windows.
+        *
+        * @see #normalizeBranchName(String)
+        */
+       private static final Pattern FORBIDDEN_BRANCH_NAME_COMPONENTS = Pattern
+                       .compile(
+                                       "(^|/)(aux|com[1-9]|con|lpt[1-9]|nul|prn)(\\.[^/]*)?", //$NON-NLS-1$
+                                       Pattern.CASE_INSENSITIVE);
+
        /** @return the global listener list observing all events in this JVM. */
        public static ListenerList getGlobalListenerList() {
                return globalListeners;
@@ -923,7 +935,7 @@ public abstract class Repository implements AutoCloseable {
                else
                        desc = getClass().getSimpleName() + "-" //$NON-NLS-1$
                                        + System.identityHashCode(this);
-               return "Repository[" + desc + "]"; //$NON-NLS-1$
+               return "Repository[" + desc + "]"; //$NON-NLS-1$ //$NON-NLS-2$
        }
 
        /**
@@ -1332,13 +1344,106 @@ public abstract class Repository implements AutoCloseable {
                return components > 1;
        }
 
+       /**
+        * Normalizes the passed branch name into a possible valid branch name. The
+        * validity of the returned name should be checked by a subsequent call to
+        * {@link #isValidRefName(String)}.
+        * <p/>
+        * Future implementations of this method could be more restrictive or more
+        * lenient about the validity of specific characters in the returned name.
+        * <p/>
+        * The current implementation returns the trimmed input string if this is
+        * already a valid branch name. Otherwise it returns a trimmed string with
+        * special characters not allowed by {@link #isValidRefName(String)}
+        * replaced by hyphens ('-') and blanks replaced by underscores ('_').
+        * Leading and trailing slashes, dots, hyphens, and underscores are removed.
+        *
+        * @param name
+        *            to normalize
+        *
+        * @return The normalized name or an empty String if it is {@code null} or
+        *         empty.
+        * @since 4.7
+        * @see #isValidRefName(String)
+        */
+       public static String normalizeBranchName(String name) {
+               if (name == null || name.isEmpty()) {
+                       return ""; //$NON-NLS-1$
+               }
+               String result = name.trim();
+               String fullName = result.startsWith(Constants.R_HEADS) ? result
+                               : Constants.R_HEADS + result;
+               if (isValidRefName(fullName)) {
+                       return result;
+               }
+
+               // All Unicode blanks to underscore
+               result = result.replaceAll("(?:\\h|\\v)+", "_"); //$NON-NLS-1$ //$NON-NLS-2$
+               StringBuilder b = new StringBuilder();
+               char p = '/';
+               for (int i = 0, len = result.length(); i < len; i++) {
+                       char c = result.charAt(i);
+                       if (c < ' ' || c == 127) {
+                               continue;
+                       }
+                       // Substitute a dash for problematic characters
+                       switch (c) {
+                       case '\\':
+                       case '^':
+                       case '~':
+                       case ':':
+                       case '?':
+                       case '*':
+                       case '[':
+                       case '@':
+                       case '<':
+                       case '>':
+                       case '|':
+                       case '"':
+                               c = '-';
+                               break;
+                       default:
+                               break;
+                       }
+                       // Collapse multiple slashes, dashes, dots, underscores, and omit
+                       // dashes, dots, and underscores following a slash.
+                       switch (c) {
+                       case '/':
+                               if (p == '/') {
+                                       continue;
+                               }
+                               p = '/';
+                               break;
+                       case '.':
+                       case '_':
+                       case '-':
+                               if (p == '/' || p == '-') {
+                                       continue;
+                               }
+                               p = '-';
+                               break;
+                       default:
+                               p = c;
+                               break;
+                       }
+                       b.append(c);
+               }
+               // Strip trailing special characters, and avoid the .lock extension
+               result = b.toString().replaceFirst("[/_.-]+$", "") //$NON-NLS-1$ //$NON-NLS-2$
+                               .replaceAll("\\.lock($|/)", "_lock$1"); //$NON-NLS-1$ //$NON-NLS-2$
+               return FORBIDDEN_BRANCH_NAME_COMPONENTS.matcher(result)
+                               .replaceAll("$1+$2$3"); //$NON-NLS-1$
+       }
+
        /**
         * Strip work dir and return normalized repository path.
         *
-        * @param workDir Work dir
-        * @param file File whose path shall be stripped of its workdir
-        * @return normalized repository relative path or the empty
-        *         string if the file is not relative to the work directory.
+        * @param workDir
+        *            Work dir
+        * @param file
+        *            File whose path shall be stripped of its workdir
+        * @return normalized repository relative path or the empty string if the
+        *         file is not relative to the work directory.
         */
        @NonNull
        public static String stripWorkDir(File workDir, File file) {
@@ -1889,46 +1994,4 @@ public abstract class Repository implements AutoCloseable {
        public void autoGC(ProgressMonitor monitor) {
                // default does nothing
        }
-
-       /**
-        * Normalizes the passed branch name into a possible valid branch name. The
-        * validity of the returned name should be checked by a subsequent call to
-        * {@link #isValidRefName(String)}.
-        * <p/>
-        * Future implementations of this method could be more restrictive or more
-        * lenient about the validity of specific characters in the returned name.
-        * <p/>
-        * The current implementation returns the trimmed input string if this is
-        * already a valid branch name. Otherwise it returns a trimmed string only
-        * containing word characters ([a-zA-Z_0-9]) and hyphens ('-'). Colons are
-        * replaced by hyphens. Repeating underscores and hyphens are replaced by a
-        * single occurrence. Underscores and hyphens at the beginning of the string
-        * are removed.
-        *
-        * @param name
-        *            The name to normalize.
-        *
-        * @return The normalized String or null if null was passed.
-        * @since 4.7
-        * @see #isValidRefName(String)
-        */
-       public static String normalizeBranchName(String name) {
-               if (name == null || name.length() == 0) {
-                       return name;
-               }
-               String result = name.trim();
-               String fullName = result.startsWith(Constants.R_HEADS) ? result
-                               : Constants.R_HEADS + result;
-               if (isValidRefName(fullName)) {
-                       return result;
-               }
-               return result.replaceAll("\\s+([_:-])*?\\s+", "$1") //$NON-NLS-1$//$NON-NLS-2$
-                               .replaceAll(":", "-") //$NON-NLS-1$//$NON-NLS-2$
-                               .replaceAll("\\s+", "_") //$NON-NLS-1$//$NON-NLS-2$
-                               .replaceAll("_{2,}", "_") //$NON-NLS-1$//$NON-NLS-2$
-                               .replaceAll("-{2,}", "-") //$NON-NLS-1$//$NON-NLS-2$
-                               .replaceAll("[^\\w-]", "") //$NON-NLS-1$ //$NON-NLS-2$
-                               .replaceAll("^_+", "") //$NON-NLS-1$//$NON-NLS-2$
-                               .replaceAll("^-+", ""); //$NON-NLS-1$//$NON-NLS-2$
-       }
 }