diff options
author | Matthias Sohn <matthias.sohn@sap.com> | 2014-12-15 14:42:04 +0100 |
---|---|---|
committer | Matthias Sohn <matthias.sohn@sap.com> | 2014-12-18 14:49:20 +0100 |
commit | d476d2f7296792508e02a1c44030a8151dcf4e00 (patch) | |
tree | 33d3748ce0593e5f135a23c761266872b67014c4 /org.eclipse.jgit | |
parent | a09b1b6c3d90713ab5e3473bd7aa32387dc294c3 (diff) | |
download | jgit-d476d2f7296792508e02a1c44030a8151dcf4e00.tar.gz jgit-d476d2f7296792508e02a1c44030a8151dcf4e00.zip |
ObjectChecker: Disallow names potentially mapping to ".git" on HFS+
Mac's HFS+ folds concatentations of ".git" and ignorable Unicode
characters [1] to ".git" [2]. Hence we need to disallow all names which
could potentially be a shortname for ".git". Example: in an empty
directory create a folder ".g\U+200Cit". Now you can't create another
folder ".git".
The following characters are ignorable Unicode which are ignored on
HFS+:
unicode hex name
-------------------------------------------------
U+200C 0xe2808c ZERO WIDTH NON-JOINER
U+200D 0xe2808d ZERO WIDTH JOINER
U+200E 0xe2808e LEFT-TO-RIGHT MARK
U+200F 0xe2808f RIGHT-TO-LEFT MARK
U+202A 0xe280aa LEFT-TO-RIGHT EMBEDDING
U+202B 0xe280ab RIGHT-TO-LEFT EMBEDDING
U+202C 0xe280ac POP DIRECTIONAL FORMATTING
U+202D 0xe280ad LEFT-TO-RIGHT OVERRIDE
U+202E 0xe280ae RIGHT-TO-LEFT OVERRIDE
U+206A 0xe281aa INHIBIT SYMMETRIC SWAPPING
U+206B 0xe281ab ACTIVATE SYMMETRIC SWAPPING
U+206C 0xe281ac INHIBIT ARABIC FORM SHAPING
U+206D 0xe281ad ACTIVATE ARABIC FORM SHAPING
U+206E 0xe281ae NATIONAL DIGIT SHAPES
U+206F 0xe281af NOMINAL DIGIT SHAPES
U+FEFF 0xefbbbf ZERO WIDTH NO-BREAK SPACE
[1] http://www.unicode.org/versions/Unicode7.0.0/ch05.pdf#G40025
http://www.unicode.org/reports/tr31/#Layout_and_Format_Control_Characters
[2] http://dubeiko.com/development/FileSystems/HFSPLUS/tn1150.html#UnicodeSubtleties
Change-Id: Ib6a1dd090b2649bdd8ec16387c994ed29de2860d
Signed-off-by: Matthias Sohn <matthias.sohn@sap.com>
Diffstat (limited to 'org.eclipse.jgit')
-rw-r--r-- | org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectChecker.java | 87 |
1 files changed, 87 insertions, 0 deletions
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectChecker.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectChecker.java index 4913c4437a..281bccde65 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectChecker.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectChecker.java @@ -469,6 +469,11 @@ public class ObjectChecker { RawParseUtils.decode(raw, ptr, end))); } + if (macosx && isMacHFSGit(raw, ptr, end)) + throw new CorruptObjectException(String.format( + "invalid name '%s' contains ignorable Unicode characters", + RawParseUtils.decode(raw, ptr, end))); + if (windows) { // Windows ignores space and dot at end of file name. if (raw[end - 1] == ' ' || raw[end - 1] == '.') @@ -479,6 +484,88 @@ public class ObjectChecker { } } + // Mac's HFS+ folds permutations of ".git" and Unicode ignorable characters + // to ".git" therefore we should prevent such names + private static boolean isMacHFSGit(byte[] raw, int ptr, int end) + throws CorruptObjectException { + boolean ignorable = false; + byte[] git = new byte[] { '.', 'g', 'i', 't' }; + int g = 0; + while (ptr < end) { + switch (raw[ptr]) { + case (byte) 0xe2: // http://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192 + checkTruncatedIgnorableUTF8(raw, ptr, end); + switch (raw[ptr + 1]) { + case (byte) 0x80: + switch (raw[ptr + 2]) { + case (byte) 0x8c: // U+200C 0xe2808c ZERO WIDTH NON-JOINER + case (byte) 0x8d: // U+200D 0xe2808d ZERO WIDTH JOINER + case (byte) 0x8e: // U+200E 0xe2808e LEFT-TO-RIGHT MARK + case (byte) 0x8f: // U+200F 0xe2808f RIGHT-TO-LEFT MARK + case (byte) 0xaa: // U+202A 0xe280aa LEFT-TO-RIGHT EMBEDDING + case (byte) 0xab: // U+202B 0xe280ab RIGHT-TO-LEFT EMBEDDING + case (byte) 0xac: // U+202C 0xe280ac POP DIRECTIONAL FORMATTING + case (byte) 0xad: // U+202D 0xe280ad LEFT-TO-RIGHT OVERRIDE + case (byte) 0xae: // U+202E 0xe280ae RIGHT-TO-LEFT OVERRIDE + ignorable = true; + ptr += 3; + continue; + default: + return false; + } + case (byte) 0x81: + switch (raw[ptr + 2]) { + case (byte) 0xaa: // U+206A 0xe281aa INHIBIT SYMMETRIC SWAPPING + case (byte) 0xab: // U+206B 0xe281ab ACTIVATE SYMMETRIC SWAPPING + case (byte) 0xac: // U+206C 0xe281ac INHIBIT ARABIC FORM SHAPING + case (byte) 0xad: // U+206D 0xe281ad ACTIVATE ARABIC FORM SHAPING + case (byte) 0xae: // U+206E 0xe281ae NATIONAL DIGIT SHAPES + case (byte) 0xaf: // U+206F 0xe281af NOMINAL DIGIT SHAPES + ignorable = true; + ptr += 3; + continue; + default: + return false; + } + } + break; + case (byte) 0xef: // http://www.utf8-chartable.de/unicode-utf8-table.pl?start=65024 + checkTruncatedIgnorableUTF8(raw, ptr, end); + // U+FEFF 0xefbbbf ZERO WIDTH NO-BREAK SPACE + if ((raw[ptr + 1] == (byte) 0xbb) + && (raw[ptr + 2] == (byte) 0xbf)) { + ignorable = true; + ptr += 3; + continue; + } + return false; + default: + if (g == 4) + return false; + if (raw[ptr++] != git[g++]) + return false; + } + } + if (g == 4 && ignorable) + return true; + return false; + } + + private static void checkTruncatedIgnorableUTF8(byte[] raw, int ptr, int end) + throws CorruptObjectException { + if ((ptr + 2) >= end) + throw new CorruptObjectException(MessageFormat.format( + "invalid name contains byte sequence ''{0}'' which is not a valid UTF-8 character", + toHexString(raw, ptr, end))); + } + + private static String toHexString(byte[] raw, int ptr, int end) { + StringBuilder b = new StringBuilder("0x"); //$NON-NLS-1$ + for (int i = ptr; i < end; i++) + b.append(String.format("%02x", Byte.valueOf(raw[i]))); //$NON-NLS-1$ + return b.toString(); + } + private static void checkNotWindowsDevice(byte[] raw, int ptr, int end) throws CorruptObjectException { switch (toLower(raw[ptr])) { |