diff options
author | James Moger <james.moger@gitblit.com> | 2012-06-07 17:30:18 -0400 |
---|---|---|
committer | James Moger <james.moger@gitblit.com> | 2012-06-07 17:30:18 -0400 |
commit | ae9e157ef4e6a3708489725d4436cc15d273308f (patch) | |
tree | 5d93ddebf97927205789d702ae8eebc5e613ff08 | |
parent | 47867891efc2aa996fa78f7c224e46d65dc04457 (diff) | |
download | gitblit-ae9e157ef4e6a3708489725d4436cc15d273308f.tar.gz gitblit-ae9e157ef4e6a3708489725d4436cc15d273308f.zip |
Try multiple encodings when working with string blobs (issue 97)
-rw-r--r-- | distrib/gitblit.properties | 11 | ||||
-rw-r--r-- | docs/04_releases.mkd | 2 | ||||
-rw-r--r-- | src/com/gitblit/GitBlit.java | 9 | ||||
-rw-r--r-- | src/com/gitblit/LuceneExecutor.java | 3 | ||||
-rw-r--r-- | src/com/gitblit/PagesServlet.java | 8 | ||||
-rw-r--r-- | src/com/gitblit/utils/JGitUtils.java | 11 | ||||
-rw-r--r-- | src/com/gitblit/utils/StringUtils.java | 42 | ||||
-rw-r--r-- | src/com/gitblit/wicket/pages/BlobPage.java | 11 | ||||
-rw-r--r-- | src/com/gitblit/wicket/pages/MarkdownPage.java | 6 | ||||
-rw-r--r-- | src/com/gitblit/wicket/pages/RawPage.java | 9 | ||||
-rw-r--r-- | src/com/gitblit/wicket/pages/SummaryPage.java | 3 | ||||
-rw-r--r-- | tests/com/gitblit/tests/JGitUtilsTest.java | 3 |
12 files changed, 96 insertions, 22 deletions
diff --git a/distrib/gitblit.properties b/distrib/gitblit.properties index 5292a91e..8e768262 100644 --- a/distrib/gitblit.properties +++ b/distrib/gitblit.properties @@ -362,6 +362,16 @@ web.loginMessage = gitblit # SINCE 0.5.0
web.repositoriesMessage = gitblit
+# Ordered list of charsets/encodings to use when trying to display a blob.
+# If empty, UTF-8 and ISO-8859-1 are used. The server's default charset
+# is always appended to the encoding list. If all encodings fail to cleanly
+# decode the blob content, UTF-8 will be used with the standard malformed
+# input/unmappable character replacement strings.
+#
+# SPACE-DELIMITED
+# SINCE 1.0.0
+web.blobEncodings = UTF-8 ISO-8859-1
+
# Manually set the default timezone to be used by Gitblit for display in the
# web ui. This value is independent of the JVM timezone. Specifying a blank
# value will default to the JVM timezone.
@@ -432,6 +442,7 @@ web.forwardSlashCharacter = / # e.g.
# web.otherUrls = ssh://localhost/git/{0} git://localhost/git/{0}
#
+# SPACE-DELIMITED
# SINCE 0.5.0
web.otherUrls =
diff --git a/docs/04_releases.mkd b/docs/04_releases.mkd index d20000bf..00981978 100644 --- a/docs/04_releases.mkd +++ b/docs/04_releases.mkd @@ -16,6 +16,8 @@ #### additions
+- Added setting to control charsets for blob string decoding. Default encodings are UTF-8, ISO-8859-1, and server's default charset. (issue 97)
+ **New:** *web.blobEncodings = UTF-8 ISO-8859-1*
- Exposed JGit's internal configuration settings in gitblit.properties/web.xml (issue 93)
**New:** *git.packedGitWindowSize = 8k*
**New:** *git.packedGitLimit = 10m*
diff --git a/src/com/gitblit/GitBlit.java b/src/com/gitblit/GitBlit.java index f96340ae..dc53540e 100644 --- a/src/com/gitblit/GitBlit.java +++ b/src/com/gitblit/GitBlit.java @@ -189,6 +189,15 @@ public class GitBlit implements ServletContextListener { return self().timezone;
}
+ /**
+ * Returns the user-defined blob encodings.
+ *
+ * @return an array of encodings, may be empty
+ */
+ public static String [] getEncodings() {
+ return getStrings(Keys.web.blobEncodings).toArray(new String[0]);
+ }
+
/**
* Returns the boolean value for the specified key. If the key does not
diff --git a/src/com/gitblit/LuceneExecutor.java b/src/com/gitblit/LuceneExecutor.java index afd1cc5a..b3165434 100644 --- a/src/com/gitblit/LuceneExecutor.java +++ b/src/com/gitblit/LuceneExecutor.java @@ -642,6 +642,7 @@ public class LuceneExecutor implements Runnable { String branch, RevCommit commit) {
IndexResult result = new IndexResult();
try {
+ String [] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]);
List<PathChangeModel> changedPaths = JGitUtils.getFilesInCommit(repository, commit);
String revDate = DateTools.timeToString(commit.getCommitTime() * 1000L,
Resolution.MINUTE);
@@ -674,7 +675,7 @@ public class LuceneExecutor implements Runnable { if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {
// read the blob content
String str = JGitUtils.getStringContent(repository, commit.getTree(),
- path.path);
+ path.path, encodings);
doc.add(new Field(FIELD_CONTENT, str, Store.YES, Index.ANALYZED));
writer.addDocument(doc);
}
diff --git a/src/com/gitblit/PagesServlet.java b/src/com/gitblit/PagesServlet.java index d6304f70..ad9276b4 100644 --- a/src/com/gitblit/PagesServlet.java +++ b/src/com/gitblit/PagesServlet.java @@ -141,13 +141,15 @@ public class PagesServlet extends HttpServlet { }
response.setDateHeader("Last-Modified", JGitUtils.getCommitDate(commit).getTime());
+ String [] encodings = GitBlit.getEncodings();
+
RevTree tree = commit.getTree();
byte[] content = null;
if (StringUtils.isEmpty(resource)) {
// find resource
String[] files = { "index.html", "index.htm", "index.mkd" };
for (String file : files) {
- content = JGitUtils.getStringContent(r, tree, file)
+ content = JGitUtils.getStringContent(r, tree, file, encodings)
.getBytes(Constants.ENCODING);
if (content != null) {
resource = file;
@@ -165,7 +167,7 @@ public class PagesServlet extends HttpServlet { contentType = "text/plain";
}
if (contentType.startsWith("text")) {
- content = JGitUtils.getStringContent(r, tree, resource).getBytes(
+ content = JGitUtils.getStringContent(r, tree, resource, encodings).getBytes(
Constants.ENCODING);
} else {
content = JGitUtils.getByteContent(r, tree, resource);
@@ -177,7 +179,7 @@ public class PagesServlet extends HttpServlet { // no content, try custom 404 page
if (ArrayUtils.isEmpty(content)) {
- String custom404 = JGitUtils.getStringContent(r, tree, "404.html");
+ String custom404 = JGitUtils.getStringContent(r, tree, "404.html", encodings);
if (!StringUtils.isEmpty(custom404)) {
content = custom404.getBytes(Constants.ENCODING);
}
diff --git a/src/com/gitblit/utils/JGitUtils.java b/src/com/gitblit/utils/JGitUtils.java index f5ca5efd..72a8ab3c 100644 --- a/src/com/gitblit/utils/JGitUtils.java +++ b/src/com/gitblit/utils/JGitUtils.java @@ -20,7 +20,6 @@ import java.io.File; import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
-import java.nio.charset.Charset;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.Arrays;
@@ -543,14 +542,15 @@ public class JGitUtils { * @param tree
* if null, the RevTree from HEAD is assumed.
* @param blobPath
+ * @param charsets optional
* @return UTF-8 string content
*/
- public static String getStringContent(Repository repository, RevTree tree, String blobPath) {
+ public static String getStringContent(Repository repository, RevTree tree, String blobPath, String... charsets) {
byte[] content = getByteContent(repository, tree, blobPath);
if (content == null) {
return null;
}
- return new String(content, Charset.forName(Constants.CHARACTER_ENCODING));
+ return StringUtils.decodeString(content, charsets);
}
/**
@@ -589,14 +589,15 @@ public class JGitUtils { *
* @param repository
* @param objectId
+ * @param charsets optional
* @return UTF-8 string content
*/
- public static String getStringContent(Repository repository, String objectId) {
+ public static String getStringContent(Repository repository, String objectId, String... charsets) {
byte[] content = getByteContent(repository, objectId);
if (content == null) {
return null;
}
- return new String(content, Charset.forName(Constants.CHARACTER_ENCODING));
+ return StringUtils.decodeString(content, charsets);
}
/**
diff --git a/src/com/gitblit/utils/StringUtils.java b/src/com/gitblit/utils/StringUtils.java index 2c357241..baed5f0c 100644 --- a/src/com/gitblit/utils/StringUtils.java +++ b/src/com/gitblit/utils/StringUtils.java @@ -16,13 +16,23 @@ package com.gitblit.utils;
import java.io.UnsupportedEncodingException;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.CharacterCodingException;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.IllegalCharsetNameException;
+import java.nio.charset.UnsupportedCharsetException;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
+import java.util.LinkedHashSet;
import java.util.List;
+import java.util.Set;
import java.util.regex.PatternSyntaxException;
/**
@@ -550,4 +560,36 @@ public class StringUtils { // remember to append any characters to the right of a match
return sb.toString();
}
+
+ /**
+ * Decodes a string by trying several charsets until one does not throw a
+ * coding exception. Last resort is to interpret as UTF-8 with illegal
+ * character substitution.
+ *
+ * @param content
+ * @param charsets optional
+ * @return a string
+ */
+ public static String decodeString(byte [] content, String... charsets) {
+ Set<String> sets = new LinkedHashSet<String>();
+ if (!ArrayUtils.isEmpty(charsets)) {
+ sets.addAll(Arrays.asList(charsets));
+ }
+ sets.addAll(Arrays.asList("UTF-8", "ISO-8859-1", Charset.defaultCharset().name()));
+ for (String charset : sets) {
+ try {
+ Charset cs = Charset.forName(charset);
+ CharsetDecoder decoder = cs.newDecoder();
+ CharBuffer buffer = decoder.decode(ByteBuffer.wrap(content));
+ return buffer.toString();
+ } catch (CharacterCodingException e) {
+ // ignore and advance to the next charset
+ } catch (IllegalCharsetNameException e) {
+ // ignore illegal charset names
+ } catch (UnsupportedCharsetException e) {
+ // ignore unsupported charsets
+ }
+ }
+ return new String(content, Charset.forName("UTF-8"));
+ }
}
\ No newline at end of file diff --git a/src/com/gitblit/wicket/pages/BlobPage.java b/src/com/gitblit/wicket/pages/BlobPage.java index 1c438370..fb5a962b 100644 --- a/src/com/gitblit/wicket/pages/BlobPage.java +++ b/src/com/gitblit/wicket/pages/BlobPage.java @@ -41,7 +41,8 @@ public class BlobPage extends RepositoryPage { Repository r = getRepository();
final String blobPath = WicketUtils.getPath(params);
-
+ String [] encodings = GitBlit.getEncodings();
+
if (StringUtils.isEmpty(blobPath)) {
// blob by objectid
@@ -54,7 +55,7 @@ public class BlobPage extends RepositoryPage { add(new BookmarkablePageLink<Void>("headLink", BlobPage.class).setEnabled(false));
add(new CommitHeaderPanel("commitHeader", objectId));
add(new PathBreadcrumbsPanel("breadcrumbs", repositoryName, blobPath, objectId));
- Component c = new Label("blobText", JGitUtils.getStringContent(r, objectId));
+ Component c = new Label("blobText", JGitUtils.getStringContent(r, objectId, encodings));
WicketUtils.setCssClass(c, "plainprint");
add(c);
} else {
@@ -111,7 +112,7 @@ public class BlobPage extends RepositoryPage { case 1:
// PrettyPrint blob text
c = new Label("blobText", JGitUtils.getStringContent(r, commit.getTree(),
- blobPath));
+ blobPath, encodings));
WicketUtils.setCssClass(c, "prettyprint linenums");
break;
case 2:
@@ -125,14 +126,14 @@ public class BlobPage extends RepositoryPage { default:
// plain text
c = new Label("blobText", JGitUtils.getStringContent(r, commit.getTree(),
- blobPath));
+ blobPath, encodings));
WicketUtils.setCssClass(c, "plainprint");
}
add(c);
} else {
// plain text
Label blobLabel = new Label("blobText", JGitUtils.getStringContent(r,
- commit.getTree(), blobPath));
+ commit.getTree(), blobPath, encodings));
WicketUtils.setCssClass(blobLabel, "plainprint");
add(blobLabel);
}
diff --git a/src/com/gitblit/wicket/pages/MarkdownPage.java b/src/com/gitblit/wicket/pages/MarkdownPage.java index aaf12bad..5764235a 100644 --- a/src/com/gitblit/wicket/pages/MarkdownPage.java +++ b/src/com/gitblit/wicket/pages/MarkdownPage.java @@ -24,6 +24,7 @@ import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.Repository;
import org.eclipse.jgit.revwalk.RevCommit;
+import com.gitblit.GitBlit;
import com.gitblit.utils.JGitUtils;
import com.gitblit.utils.MarkdownUtils;
import com.gitblit.wicket.WicketUtils;
@@ -37,7 +38,8 @@ public class MarkdownPage extends RepositoryPage { Repository r = getRepository();
RevCommit commit = JGitUtils.getCommit(r, objectId);
-
+ String [] encodings = GitBlit.getEncodings();
+
// markdown page links
add(new BookmarkablePageLink<Void>("blameLink", BlamePage.class,
WicketUtils.newPathParameter(repositoryName, objectId, markdownPath)));
@@ -49,7 +51,7 @@ public class MarkdownPage extends RepositoryPage { WicketUtils.newPathParameter(repositoryName, Constants.HEAD, markdownPath)));
// Read raw markdown content and transform it to html
- String markdownText = JGitUtils.getStringContent(r, commit.getTree(), markdownPath);
+ String markdownText = JGitUtils.getStringContent(r, commit.getTree(), markdownPath, encodings);
String htmlText;
try {
htmlText = MarkdownUtils.transformMarkdown(markdownText);
diff --git a/src/com/gitblit/wicket/pages/RawPage.java b/src/com/gitblit/wicket/pages/RawPage.java index f71d986f..00cc5bf7 100644 --- a/src/com/gitblit/wicket/pages/RawPage.java +++ b/src/com/gitblit/wicket/pages/RawPage.java @@ -43,7 +43,8 @@ public class RawPage extends WebPage { final String repositoryName = WicketUtils.getRepositoryName(params);
final String objectId = WicketUtils.getObject(params);
final String blobPath = WicketUtils.getPath(params);
-
+ String [] encodings = GitBlit.getEncodings();
+
Repository r = GitBlit.self().getRepository(repositoryName);
if (r == null) {
error(getString("gb.canNotLoadRepository") + " " + repositoryName);
@@ -53,7 +54,7 @@ public class RawPage extends WebPage { if (StringUtils.isEmpty(blobPath)) {
// objectid referenced raw view
- Label blobLabel = new Label("rawText", JGitUtils.getStringContent(r, objectId));
+ Label blobLabel = new Label("rawText", JGitUtils.getStringContent(r, objectId, encodings));
WicketUtils.setCssClass(blobLabel, "plainprint");
add(blobLabel);
} else {
@@ -92,14 +93,14 @@ public class RawPage extends WebPage { default:
// plain text
c = new Label("rawText", JGitUtils.getStringContent(r, commit.getTree(),
- blobPath));
+ blobPath, encodings));
WicketUtils.setCssClass(c, "plainprint");
}
add(c);
} else {
// plain text
Label blobLabel = new Label("rawText", JGitUtils.getStringContent(r,
- commit.getTree(), blobPath));
+ commit.getTree(), blobPath, encodings));
WicketUtils.setCssClass(blobLabel, "plainprint");
add(blobLabel);
}
diff --git a/src/com/gitblit/wicket/pages/SummaryPage.java b/src/com/gitblit/wicket/pages/SummaryPage.java index 2996b662..8e145c8d 100644 --- a/src/com/gitblit/wicket/pages/SummaryPage.java +++ b/src/com/gitblit/wicket/pages/SummaryPage.java @@ -158,7 +158,8 @@ public class SummaryPage extends RepositoryPage { }
}
if (!StringUtils.isEmpty(readme)) {
- String markdownText = JGitUtils.getStringContent(r, head.getTree(), readme);
+ String [] encodings = GitBlit.getEncodings();
+ String markdownText = JGitUtils.getStringContent(r, head.getTree(), readme, encodings);
htmlText = MarkdownUtils.transformMarkdown(markdownText);
}
} catch (ParseException p) {
diff --git a/tests/com/gitblit/tests/JGitUtilsTest.java b/tests/com/gitblit/tests/JGitUtilsTest.java index 616ea837..dc4d3c50 100644 --- a/tests/com/gitblit/tests/JGitUtilsTest.java +++ b/tests/com/gitblit/tests/JGitUtilsTest.java @@ -37,6 +37,7 @@ import org.eclipse.jgit.lib.PersonIdent; import org.eclipse.jgit.lib.Repository;
import org.eclipse.jgit.lib.RepositoryCache.FileKey;
import org.eclipse.jgit.revwalk.RevCommit;
+import org.eclipse.jgit.revwalk.RevTree;
import org.eclipse.jgit.util.FS;
import org.eclipse.jgit.util.FileUtils;
import org.junit.Test;
@@ -265,7 +266,7 @@ public class JGitUtilsTest { @Test
public void testStringContent() throws Exception {
Repository repository = GitBlitSuite.getHelloworldRepository();
- String contentA = JGitUtils.getStringContent(repository, null, "java.java");
+ String contentA = JGitUtils.getStringContent(repository, (RevTree) null, "java.java");
RevCommit commit = JGitUtils.getCommit(repository, Constants.HEAD);
String contentB = JGitUtils.getStringContent(repository, commit.getTree(), "java.java");
String contentC = JGitUtils.getStringContent(repository, commit.getTree(), "missing.txt");
|