Browse Source

Try multiple encodings when working with string blobs (issue 97)

tags/v1.0.0
James Moger 12 years ago
parent
commit
ae9e157ef4

+ 11
- 0
distrib/gitblit.properties View File

@@ -362,6 +362,16 @@ web.loginMessage = gitblit
# SINCE 0.5.0
web.repositoriesMessage = gitblit
# Ordered list of charsets/encodings to use when trying to display a blob.
# If empty, UTF-8 and ISO-8859-1 are used. The server's default charset
# is always appended to the encoding list. If all encodings fail to cleanly
# decode the blob content, UTF-8 will be used with the standard malformed
# input/unmappable character replacement strings.
#
# SPACE-DELIMITED
# SINCE 1.0.0
web.blobEncodings = UTF-8 ISO-8859-1
# Manually set the default timezone to be used by Gitblit for display in the
# web ui. This value is independent of the JVM timezone. Specifying a blank
# value will default to the JVM timezone.
@@ -432,6 +442,7 @@ web.forwardSlashCharacter = /
# e.g.
# web.otherUrls = ssh://localhost/git/{0} git://localhost/git/{0}
#
# SPACE-DELIMITED
# SINCE 0.5.0
web.otherUrls =

+ 2
- 0
docs/04_releases.mkd View File

@@ -16,6 +16,8 @@
#### additions
- Added setting to control charsets for blob string decoding. Default encodings are UTF-8, ISO-8859-1, and server's default charset. (issue 97)
**New:** *web.blobEncodings = UTF-8 ISO-8859-1*
- Exposed JGit's internal configuration settings in gitblit.properties/web.xml (issue 93)
**New:** *git.packedGitWindowSize = 8k*
**New:** *git.packedGitLimit = 10m*

+ 9
- 0
src/com/gitblit/GitBlit.java View File

@@ -189,6 +189,15 @@ public class GitBlit implements ServletContextListener {
return self().timezone;
}
/**
* Returns the user-defined blob encodings.
*
* @return an array of encodings, may be empty
*/
public static String [] getEncodings() {
return getStrings(Keys.web.blobEncodings).toArray(new String[0]);
}
/**
* Returns the boolean value for the specified key. If the key does not

+ 2
- 1
src/com/gitblit/LuceneExecutor.java View File

@@ -642,6 +642,7 @@ public class LuceneExecutor implements Runnable {
String branch, RevCommit commit) {
IndexResult result = new IndexResult();
try {
String [] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]);
List<PathChangeModel> changedPaths = JGitUtils.getFilesInCommit(repository, commit);
String revDate = DateTools.timeToString(commit.getCommitTime() * 1000L,
Resolution.MINUTE);
@@ -674,7 +675,7 @@ public class LuceneExecutor implements Runnable {
if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {
// read the blob content
String str = JGitUtils.getStringContent(repository, commit.getTree(),
path.path);
path.path, encodings);
doc.add(new Field(FIELD_CONTENT, str, Store.YES, Index.ANALYZED));
writer.addDocument(doc);
}

+ 5
- 3
src/com/gitblit/PagesServlet.java View File

@@ -141,13 +141,15 @@ public class PagesServlet extends HttpServlet {
}
response.setDateHeader("Last-Modified", JGitUtils.getCommitDate(commit).getTime());
String [] encodings = GitBlit.getEncodings();
RevTree tree = commit.getTree();
byte[] content = null;
if (StringUtils.isEmpty(resource)) {
// find resource
String[] files = { "index.html", "index.htm", "index.mkd" };
for (String file : files) {
content = JGitUtils.getStringContent(r, tree, file)
content = JGitUtils.getStringContent(r, tree, file, encodings)
.getBytes(Constants.ENCODING);
if (content != null) {
resource = file;
@@ -165,7 +167,7 @@ public class PagesServlet extends HttpServlet {
contentType = "text/plain";
}
if (contentType.startsWith("text")) {
content = JGitUtils.getStringContent(r, tree, resource).getBytes(
content = JGitUtils.getStringContent(r, tree, resource, encodings).getBytes(
Constants.ENCODING);
} else {
content = JGitUtils.getByteContent(r, tree, resource);
@@ -177,7 +179,7 @@ public class PagesServlet extends HttpServlet {
// no content, try custom 404 page
if (ArrayUtils.isEmpty(content)) {
String custom404 = JGitUtils.getStringContent(r, tree, "404.html");
String custom404 = JGitUtils.getStringContent(r, tree, "404.html", encodings);
if (!StringUtils.isEmpty(custom404)) {
content = custom404.getBytes(Constants.ENCODING);
}

+ 6
- 5
src/com/gitblit/utils/JGitUtils.java View File

@@ -20,7 +20,6 @@ import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.charset.Charset;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.Arrays;
@@ -543,14 +542,15 @@ public class JGitUtils {
* @param tree
* if null, the RevTree from HEAD is assumed.
* @param blobPath
* @param charsets optional
* @return UTF-8 string content
*/
public static String getStringContent(Repository repository, RevTree tree, String blobPath) {
public static String getStringContent(Repository repository, RevTree tree, String blobPath, String... charsets) {
byte[] content = getByteContent(repository, tree, blobPath);
if (content == null) {
return null;
}
return new String(content, Charset.forName(Constants.CHARACTER_ENCODING));
return StringUtils.decodeString(content, charsets);
}
/**
@@ -589,14 +589,15 @@ public class JGitUtils {
*
* @param repository
* @param objectId
* @param charsets optional
* @return UTF-8 string content
*/
public static String getStringContent(Repository repository, String objectId) {
public static String getStringContent(Repository repository, String objectId, String... charsets) {
byte[] content = getByteContent(repository, objectId);
if (content == null) {
return null;
}
return new String(content, Charset.forName(Constants.CHARACTER_ENCODING));
return StringUtils.decodeString(content, charsets);
}
/**

+ 42
- 0
src/com/gitblit/utils/StringUtils.java View File

@@ -16,13 +16,23 @@
package com.gitblit.utils;
import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.IllegalCharsetNameException;
import java.nio.charset.UnsupportedCharsetException;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.PatternSyntaxException;
/**
@@ -550,4 +560,36 @@ public class StringUtils {
// remember to append any characters to the right of a match
return sb.toString();
}
/**
* Decodes a string by trying several charsets until one does not throw a
* coding exception. Last resort is to interpret as UTF-8 with illegal
* character substitution.
*
* @param content
* @param charsets optional
* @return a string
*/
public static String decodeString(byte [] content, String... charsets) {
Set<String> sets = new LinkedHashSet<String>();
if (!ArrayUtils.isEmpty(charsets)) {
sets.addAll(Arrays.asList(charsets));
}
sets.addAll(Arrays.asList("UTF-8", "ISO-8859-1", Charset.defaultCharset().name()));
for (String charset : sets) {
try {
Charset cs = Charset.forName(charset);
CharsetDecoder decoder = cs.newDecoder();
CharBuffer buffer = decoder.decode(ByteBuffer.wrap(content));
return buffer.toString();
} catch (CharacterCodingException e) {
// ignore and advance to the next charset
} catch (IllegalCharsetNameException e) {
// ignore illegal charset names
} catch (UnsupportedCharsetException e) {
// ignore unsupported charsets
}
}
return new String(content, Charset.forName("UTF-8"));
}
}

+ 6
- 5
src/com/gitblit/wicket/pages/BlobPage.java View File

@@ -41,7 +41,8 @@ public class BlobPage extends RepositoryPage {
Repository r = getRepository();
final String blobPath = WicketUtils.getPath(params);
String [] encodings = GitBlit.getEncodings();
if (StringUtils.isEmpty(blobPath)) {
// blob by objectid
@@ -54,7 +55,7 @@ public class BlobPage extends RepositoryPage {
add(new BookmarkablePageLink<Void>("headLink", BlobPage.class).setEnabled(false));
add(new CommitHeaderPanel("commitHeader", objectId));
add(new PathBreadcrumbsPanel("breadcrumbs", repositoryName, blobPath, objectId));
Component c = new Label("blobText", JGitUtils.getStringContent(r, objectId));
Component c = new Label("blobText", JGitUtils.getStringContent(r, objectId, encodings));
WicketUtils.setCssClass(c, "plainprint");
add(c);
} else {
@@ -111,7 +112,7 @@ public class BlobPage extends RepositoryPage {
case 1:
// PrettyPrint blob text
c = new Label("blobText", JGitUtils.getStringContent(r, commit.getTree(),
blobPath));
blobPath, encodings));
WicketUtils.setCssClass(c, "prettyprint linenums");
break;
case 2:
@@ -125,14 +126,14 @@ public class BlobPage extends RepositoryPage {
default:
// plain text
c = new Label("blobText", JGitUtils.getStringContent(r, commit.getTree(),
blobPath));
blobPath, encodings));
WicketUtils.setCssClass(c, "plainprint");
}
add(c);
} else {
// plain text
Label blobLabel = new Label("blobText", JGitUtils.getStringContent(r,
commit.getTree(), blobPath));
commit.getTree(), blobPath, encodings));
WicketUtils.setCssClass(blobLabel, "plainprint");
add(blobLabel);
}

+ 4
- 2
src/com/gitblit/wicket/pages/MarkdownPage.java View File

@@ -24,6 +24,7 @@ import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.Repository;
import org.eclipse.jgit.revwalk.RevCommit;
import com.gitblit.GitBlit;
import com.gitblit.utils.JGitUtils;
import com.gitblit.utils.MarkdownUtils;
import com.gitblit.wicket.WicketUtils;
@@ -37,7 +38,8 @@ public class MarkdownPage extends RepositoryPage {
Repository r = getRepository();
RevCommit commit = JGitUtils.getCommit(r, objectId);
String [] encodings = GitBlit.getEncodings();
// markdown page links
add(new BookmarkablePageLink<Void>("blameLink", BlamePage.class,
WicketUtils.newPathParameter(repositoryName, objectId, markdownPath)));
@@ -49,7 +51,7 @@ public class MarkdownPage extends RepositoryPage {
WicketUtils.newPathParameter(repositoryName, Constants.HEAD, markdownPath)));
// Read raw markdown content and transform it to html
String markdownText = JGitUtils.getStringContent(r, commit.getTree(), markdownPath);
String markdownText = JGitUtils.getStringContent(r, commit.getTree(), markdownPath, encodings);
String htmlText;
try {
htmlText = MarkdownUtils.transformMarkdown(markdownText);

+ 5
- 4
src/com/gitblit/wicket/pages/RawPage.java View File

@@ -43,7 +43,8 @@ public class RawPage extends WebPage {
final String repositoryName = WicketUtils.getRepositoryName(params);
final String objectId = WicketUtils.getObject(params);
final String blobPath = WicketUtils.getPath(params);
String [] encodings = GitBlit.getEncodings();
Repository r = GitBlit.self().getRepository(repositoryName);
if (r == null) {
error(getString("gb.canNotLoadRepository") + " " + repositoryName);
@@ -53,7 +54,7 @@ public class RawPage extends WebPage {
if (StringUtils.isEmpty(blobPath)) {
// objectid referenced raw view
Label blobLabel = new Label("rawText", JGitUtils.getStringContent(r, objectId));
Label blobLabel = new Label("rawText", JGitUtils.getStringContent(r, objectId, encodings));
WicketUtils.setCssClass(blobLabel, "plainprint");
add(blobLabel);
} else {
@@ -92,14 +93,14 @@ public class RawPage extends WebPage {
default:
// plain text
c = new Label("rawText", JGitUtils.getStringContent(r, commit.getTree(),
blobPath));
blobPath, encodings));
WicketUtils.setCssClass(c, "plainprint");
}
add(c);
} else {
// plain text
Label blobLabel = new Label("rawText", JGitUtils.getStringContent(r,
commit.getTree(), blobPath));
commit.getTree(), blobPath, encodings));
WicketUtils.setCssClass(blobLabel, "plainprint");
add(blobLabel);
}

+ 2
- 1
src/com/gitblit/wicket/pages/SummaryPage.java View File

@@ -158,7 +158,8 @@ public class SummaryPage extends RepositoryPage {
}
}
if (!StringUtils.isEmpty(readme)) {
String markdownText = JGitUtils.getStringContent(r, head.getTree(), readme);
String [] encodings = GitBlit.getEncodings();
String markdownText = JGitUtils.getStringContent(r, head.getTree(), readme, encodings);
htmlText = MarkdownUtils.transformMarkdown(markdownText);
}
} catch (ParseException p) {

+ 2
- 1
tests/com/gitblit/tests/JGitUtilsTest.java View File

@@ -37,6 +37,7 @@ import org.eclipse.jgit.lib.PersonIdent;
import org.eclipse.jgit.lib.Repository;
import org.eclipse.jgit.lib.RepositoryCache.FileKey;
import org.eclipse.jgit.revwalk.RevCommit;
import org.eclipse.jgit.revwalk.RevTree;
import org.eclipse.jgit.util.FS;
import org.eclipse.jgit.util.FileUtils;
import org.junit.Test;
@@ -265,7 +266,7 @@ public class JGitUtilsTest {
@Test
public void testStringContent() throws Exception {
Repository repository = GitBlitSuite.getHelloworldRepository();
String contentA = JGitUtils.getStringContent(repository, null, "java.java");
String contentA = JGitUtils.getStringContent(repository, (RevTree) null, "java.java");
RevCommit commit = JGitUtils.getCommit(repository, Constants.HEAD);
String contentB = JGitUtils.getStringContent(repository, commit.getTree(), "java.java");
String contentC = JGitUtils.getStringContent(repository, commit.getTree(), "missing.txt");

Loading…
Cancel
Save