# SINCE 0.5.0\r
web.repositoriesMessage = gitblit\r
\r
+# Ordered list of charsets/encodings to use when trying to display a blob.\r
+# If empty, UTF-8 and ISO-8859-1 are used. The server's default charset\r
+# is always appended to the encoding list. If all encodings fail to cleanly\r
+# decode the blob content, UTF-8 will be used with the standard malformed\r
+# input/unmappable character replacement strings.\r
+# \r
+# SPACE-DELIMITED\r
+# SINCE 1.0.0\r
+web.blobEncodings = UTF-8 ISO-8859-1\r
+\r
# Manually set the default timezone to be used by Gitblit for display in the \r
# web ui. This value is independent of the JVM timezone. Specifying a blank\r
# value will default to the JVM timezone.\r
# e.g.\r
# web.otherUrls = ssh://localhost/git/{0} git://localhost/git/{0}\r
#\r
+# SPACE-DELIMITED\r
# SINCE 0.5.0\r
web.otherUrls = \r
\r
\r
#### additions\r
\r
+- Added setting to control charsets for blob string decoding. Default encodings are UTF-8, ISO-8859-1, and server's default charset. (issue 97) \r
+ **New:** *web.blobEncodings = UTF-8 ISO-8859-1* \r
- Exposed JGit's internal configuration settings in gitblit.properties/web.xml (issue 93) \r
**New:** *git.packedGitWindowSize = 8k* \r
**New:** *git.packedGitLimit = 10m* \r
return self().timezone;\r
}\r
\r
+ /**\r
+ * Returns the user-defined blob encodings.\r
+ * \r
+ * @return an array of encodings, may be empty\r
+ */\r
+ public static String [] getEncodings() {\r
+ return getStrings(Keys.web.blobEncodings).toArray(new String[0]);\r
+ }\r
+ \r
\r
/**\r
* Returns the boolean value for the specified key. If the key does not\r
String branch, RevCommit commit) {\r
IndexResult result = new IndexResult();\r
try {\r
+ String [] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]);\r
List<PathChangeModel> changedPaths = JGitUtils.getFilesInCommit(repository, commit);\r
String revDate = DateTools.timeToString(commit.getCommitTime() * 1000L,\r
Resolution.MINUTE);\r
if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {\r
// read the blob content\r
String str = JGitUtils.getStringContent(repository, commit.getTree(),\r
- path.path);\r
+ path.path, encodings);\r
doc.add(new Field(FIELD_CONTENT, str, Store.YES, Index.ANALYZED));\r
writer.addDocument(doc);\r
}\r
}\r
response.setDateHeader("Last-Modified", JGitUtils.getCommitDate(commit).getTime());\r
\r
+ String [] encodings = GitBlit.getEncodings();\r
+\r
RevTree tree = commit.getTree();\r
byte[] content = null;\r
if (StringUtils.isEmpty(resource)) {\r
// find resource\r
String[] files = { "index.html", "index.htm", "index.mkd" };\r
for (String file : files) {\r
- content = JGitUtils.getStringContent(r, tree, file)\r
+ content = JGitUtils.getStringContent(r, tree, file, encodings)\r
.getBytes(Constants.ENCODING);\r
if (content != null) {\r
resource = file;\r
contentType = "text/plain";\r
}\r
if (contentType.startsWith("text")) {\r
- content = JGitUtils.getStringContent(r, tree, resource).getBytes(\r
+ content = JGitUtils.getStringContent(r, tree, resource, encodings).getBytes(\r
Constants.ENCODING);\r
} else {\r
content = JGitUtils.getByteContent(r, tree, resource);\r
\r
// no content, try custom 404 page\r
if (ArrayUtils.isEmpty(content)) {\r
- String custom404 = JGitUtils.getStringContent(r, tree, "404.html");\r
+ String custom404 = JGitUtils.getStringContent(r, tree, "404.html", encodings);\r
if (!StringUtils.isEmpty(custom404)) {\r
content = custom404.getBytes(Constants.ENCODING);\r
}\r
import java.io.IOException;\r
import java.io.InputStream;\r
import java.io.OutputStream;\r
-import java.nio.charset.Charset;\r
import java.text.MessageFormat;\r
import java.util.ArrayList;\r
import java.util.Arrays;\r
* @param tree\r
* if null, the RevTree from HEAD is assumed.\r
* @param blobPath\r
+ * @param charsets optional\r
* @return UTF-8 string content\r
*/\r
- public static String getStringContent(Repository repository, RevTree tree, String blobPath) {\r
+ public static String getStringContent(Repository repository, RevTree tree, String blobPath, String... charsets) {\r
byte[] content = getByteContent(repository, tree, blobPath);\r
if (content == null) {\r
return null;\r
}\r
- return new String(content, Charset.forName(Constants.CHARACTER_ENCODING));\r
+ return StringUtils.decodeString(content, charsets);\r
}\r
\r
/**\r
* \r
* @param repository\r
* @param objectId\r
+ * @param charsets optional\r
* @return UTF-8 string content\r
*/\r
- public static String getStringContent(Repository repository, String objectId) {\r
+ public static String getStringContent(Repository repository, String objectId, String... charsets) {\r
byte[] content = getByteContent(repository, objectId);\r
if (content == null) {\r
return null;\r
}\r
- return new String(content, Charset.forName(Constants.CHARACTER_ENCODING));\r
+ return StringUtils.decodeString(content, charsets);\r
}\r
\r
/**\r
package com.gitblit.utils;\r
\r
import java.io.UnsupportedEncodingException;\r
+import java.nio.ByteBuffer;\r
+import java.nio.CharBuffer;\r
+import java.nio.charset.CharacterCodingException;\r
+import java.nio.charset.Charset;\r
+import java.nio.charset.CharsetDecoder;\r
+import java.nio.charset.IllegalCharsetNameException;\r
+import java.nio.charset.UnsupportedCharsetException;\r
import java.security.MessageDigest;\r
import java.security.NoSuchAlgorithmException;\r
import java.util.ArrayList;\r
+import java.util.Arrays;\r
import java.util.Collection;\r
import java.util.Collections;\r
import java.util.Comparator;\r
+import java.util.LinkedHashSet;\r
import java.util.List;\r
+import java.util.Set;\r
import java.util.regex.PatternSyntaxException;\r
\r
/**\r
// remember to append any characters to the right of a match\r
return sb.toString();\r
}\r
+ \r
+ /**\r
+ * Decodes a string by trying several charsets until one does not throw a\r
+ * coding exception. Last resort is to interpret as UTF-8 with illegal\r
+ * character substitution.\r
+ * \r
+ * @param content\r
+ * @param charsets optional\r
+ * @return a string\r
+ */\r
+ public static String decodeString(byte [] content, String... charsets) {\r
+ Set<String> sets = new LinkedHashSet<String>();\r
+ if (!ArrayUtils.isEmpty(charsets)) {\r
+ sets.addAll(Arrays.asList(charsets));\r
+ }\r
+ sets.addAll(Arrays.asList("UTF-8", "ISO-8859-1", Charset.defaultCharset().name()));\r
+ for (String charset : sets) {\r
+ try {\r
+ Charset cs = Charset.forName(charset);\r
+ CharsetDecoder decoder = cs.newDecoder();\r
+ CharBuffer buffer = decoder.decode(ByteBuffer.wrap(content));\r
+ return buffer.toString();\r
+ } catch (CharacterCodingException e) {\r
+ // ignore and advance to the next charset\r
+ } catch (IllegalCharsetNameException e) {\r
+ // ignore illegal charset names\r
+ } catch (UnsupportedCharsetException e) {\r
+ // ignore unsupported charsets\r
+ }\r
+ }\r
+ return new String(content, Charset.forName("UTF-8"));\r
+ }\r
}
\ No newline at end of file
\r
Repository r = getRepository();\r
final String blobPath = WicketUtils.getPath(params);\r
-\r
+ String [] encodings = GitBlit.getEncodings();\r
+ \r
if (StringUtils.isEmpty(blobPath)) {\r
// blob by objectid\r
\r
add(new BookmarkablePageLink<Void>("headLink", BlobPage.class).setEnabled(false));\r
add(new CommitHeaderPanel("commitHeader", objectId));\r
add(new PathBreadcrumbsPanel("breadcrumbs", repositoryName, blobPath, objectId));\r
- Component c = new Label("blobText", JGitUtils.getStringContent(r, objectId));\r
+ Component c = new Label("blobText", JGitUtils.getStringContent(r, objectId, encodings));\r
WicketUtils.setCssClass(c, "plainprint");\r
add(c);\r
} else {\r
case 1:\r
// PrettyPrint blob text\r
c = new Label("blobText", JGitUtils.getStringContent(r, commit.getTree(),\r
- blobPath));\r
+ blobPath, encodings));\r
WicketUtils.setCssClass(c, "prettyprint linenums");\r
break;\r
case 2:\r
default:\r
// plain text\r
c = new Label("blobText", JGitUtils.getStringContent(r, commit.getTree(),\r
- blobPath));\r
+ blobPath, encodings));\r
WicketUtils.setCssClass(c, "plainprint");\r
}\r
add(c);\r
} else {\r
// plain text\r
Label blobLabel = new Label("blobText", JGitUtils.getStringContent(r,\r
- commit.getTree(), blobPath));\r
+ commit.getTree(), blobPath, encodings));\r
WicketUtils.setCssClass(blobLabel, "plainprint");\r
add(blobLabel);\r
}\r
import org.eclipse.jgit.lib.Repository;\r
import org.eclipse.jgit.revwalk.RevCommit;\r
\r
+import com.gitblit.GitBlit;\r
import com.gitblit.utils.JGitUtils;\r
import com.gitblit.utils.MarkdownUtils;\r
import com.gitblit.wicket.WicketUtils;\r
\r
Repository r = getRepository();\r
RevCommit commit = JGitUtils.getCommit(r, objectId);\r
-\r
+ String [] encodings = GitBlit.getEncodings();\r
+ \r
// markdown page links\r
add(new BookmarkablePageLink<Void>("blameLink", BlamePage.class,\r
WicketUtils.newPathParameter(repositoryName, objectId, markdownPath)));\r
WicketUtils.newPathParameter(repositoryName, Constants.HEAD, markdownPath)));\r
\r
// Read raw markdown content and transform it to html\r
- String markdownText = JGitUtils.getStringContent(r, commit.getTree(), markdownPath);\r
+ String markdownText = JGitUtils.getStringContent(r, commit.getTree(), markdownPath, encodings);\r
String htmlText;\r
try {\r
htmlText = MarkdownUtils.transformMarkdown(markdownText);\r
final String repositoryName = WicketUtils.getRepositoryName(params);\r
final String objectId = WicketUtils.getObject(params);\r
final String blobPath = WicketUtils.getPath(params);\r
-\r
+ String [] encodings = GitBlit.getEncodings();\r
+ \r
Repository r = GitBlit.self().getRepository(repositoryName);\r
if (r == null) {\r
error(getString("gb.canNotLoadRepository") + " " + repositoryName);\r
\r
if (StringUtils.isEmpty(blobPath)) {\r
// objectid referenced raw view\r
- Label blobLabel = new Label("rawText", JGitUtils.getStringContent(r, objectId));\r
+ Label blobLabel = new Label("rawText", JGitUtils.getStringContent(r, objectId, encodings));\r
WicketUtils.setCssClass(blobLabel, "plainprint");\r
add(blobLabel);\r
} else {\r
default:\r
// plain text\r
c = new Label("rawText", JGitUtils.getStringContent(r, commit.getTree(),\r
- blobPath));\r
+ blobPath, encodings));\r
WicketUtils.setCssClass(c, "plainprint");\r
}\r
add(c);\r
} else {\r
// plain text\r
Label blobLabel = new Label("rawText", JGitUtils.getStringContent(r,\r
- commit.getTree(), blobPath));\r
+ commit.getTree(), blobPath, encodings));\r
WicketUtils.setCssClass(blobLabel, "plainprint");\r
add(blobLabel);\r
}\r
}\r
}\r
if (!StringUtils.isEmpty(readme)) {\r
- String markdownText = JGitUtils.getStringContent(r, head.getTree(), readme);\r
+ String [] encodings = GitBlit.getEncodings();\r
+ String markdownText = JGitUtils.getStringContent(r, head.getTree(), readme, encodings);\r
htmlText = MarkdownUtils.transformMarkdown(markdownText);\r
}\r
} catch (ParseException p) {\r
import org.eclipse.jgit.lib.Repository;\r
import org.eclipse.jgit.lib.RepositoryCache.FileKey;\r
import org.eclipse.jgit.revwalk.RevCommit;\r
+import org.eclipse.jgit.revwalk.RevTree;\r
import org.eclipse.jgit.util.FS;\r
import org.eclipse.jgit.util.FileUtils;\r
import org.junit.Test;\r
@Test\r
public void testStringContent() throws Exception {\r
Repository repository = GitBlitSuite.getHelloworldRepository();\r
- String contentA = JGitUtils.getStringContent(repository, null, "java.java");\r
+ String contentA = JGitUtils.getStringContent(repository, (RevTree) null, "java.java");\r
RevCommit commit = JGitUtils.getCommit(repository, Constants.HEAD);\r
String contentB = JGitUtils.getStringContent(repository, commit.getTree(), "java.java");\r
String contentC = JGitUtils.getStringContent(repository, commit.getTree(), "missing.txt");\r