]> source.dussan.org Git - gitblit.git/commitdiff
Try multiple encodings when working with string blobs (issue 97)
authorJames Moger <james.moger@gitblit.com>
Thu, 7 Jun 2012 21:30:18 +0000 (17:30 -0400)
committerJames Moger <james.moger@gitblit.com>
Thu, 7 Jun 2012 21:30:18 +0000 (17:30 -0400)
12 files changed:
distrib/gitblit.properties
docs/04_releases.mkd
src/com/gitblit/GitBlit.java
src/com/gitblit/LuceneExecutor.java
src/com/gitblit/PagesServlet.java
src/com/gitblit/utils/JGitUtils.java
src/com/gitblit/utils/StringUtils.java
src/com/gitblit/wicket/pages/BlobPage.java
src/com/gitblit/wicket/pages/MarkdownPage.java
src/com/gitblit/wicket/pages/RawPage.java
src/com/gitblit/wicket/pages/SummaryPage.java
tests/com/gitblit/tests/JGitUtilsTest.java

index 5292a91e3cb932e92e8038538c5bd964456efdf3..8e768262b7b4dd5853d1bb342cf493624f42716d 100644 (file)
@@ -362,6 +362,16 @@ web.loginMessage = gitblit
 # SINCE 0.5.0\r
 web.repositoriesMessage = gitblit\r
 \r
+# Ordered list of charsets/encodings to use when trying to display a blob.\r
+# If empty, UTF-8 and ISO-8859-1 are used.  The server's default charset\r
+# is always appended to the encoding list.  If all encodings fail to cleanly\r
+# decode the blob content, UTF-8 will be used with the standard malformed\r
+# input/unmappable character replacement strings.\r
+# \r
+# SPACE-DELIMITED\r
+# SINCE 1.0.0\r
+web.blobEncodings = UTF-8 ISO-8859-1\r
+\r
 # Manually set the default timezone to be used by Gitblit for display in the \r
 # web ui.  This value is independent of the JVM timezone.  Specifying a blank\r
 # value will default to the JVM timezone.\r
@@ -432,6 +442,7 @@ web.forwardSlashCharacter = /
 # e.g.\r
 # web.otherUrls = ssh://localhost/git/{0} git://localhost/git/{0}\r
 #\r
+# SPACE-DELIMITED\r
 # SINCE 0.5.0\r
 web.otherUrls = \r
 \r
index d20000bf61d8ac0f9d1bb3ca1113a84d21da9534..00981978718b3feb61b3b7254521070adf0972cc 100644 (file)
@@ -16,6 +16,8 @@
 \r
 #### additions\r
 \r
+- Added setting to control charsets for blob string decoding.  Default encodings are UTF-8, ISO-8859-1, and server's default charset. (issue 97)  \r
+    **New:** *web.blobEncodings = UTF-8 ISO-8859-1*  \r
 - Exposed JGit's internal configuration settings in gitblit.properties/web.xml (issue 93)  \r
     **New:** *git.packedGitWindowSize = 8k*  \r
     **New:** *git.packedGitLimit = 10m*  \r
index f96340ae2ed1d51edd3a791aa84835ee7e84e077..dc53540e8c5b6296ec6ade27d52ac450fc1b3af0 100644 (file)
@@ -189,6 +189,15 @@ public class GitBlit implements ServletContextListener {
                return self().timezone;\r
        }\r
        \r
+       /**\r
+        * Returns the user-defined blob encodings.\r
+        * \r
+        * @return an array of encodings, may be empty\r
+        */\r
+       public static String [] getEncodings() {\r
+               return getStrings(Keys.web.blobEncodings).toArray(new String[0]);\r
+       }\r
+       \r
 \r
        /**\r
         * Returns the boolean value for the specified key. If the key does not\r
index afd1cc5aeb7510c6a9f35846792a2f3051ebbff3..b31654345ea7446edf1a6a00469b39fb2814f117 100644 (file)
@@ -642,6 +642,7 @@ public class LuceneExecutor implements Runnable {
                        String branch, RevCommit commit) {\r
                IndexResult result = new IndexResult();\r
                try {\r
+                       String [] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]);\r
                        List<PathChangeModel> changedPaths = JGitUtils.getFilesInCommit(repository, commit);\r
                        String revDate = DateTools.timeToString(commit.getCommitTime() * 1000L,\r
                                        Resolution.MINUTE);\r
@@ -674,7 +675,7 @@ public class LuceneExecutor implements Runnable {
                                        if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {\r
                                                // read the blob content\r
                                                String str = JGitUtils.getStringContent(repository, commit.getTree(),\r
-                                                               path.path);\r
+                                                               path.path, encodings);\r
                                                doc.add(new Field(FIELD_CONTENT, str, Store.YES, Index.ANALYZED));\r
                                                writer.addDocument(doc);\r
                                        }\r
index d6304f70a6639fa7d82dacd1121816a0a97132ab..ad9276b4cbd5a1bf5dcb93938024edc39b811a6a 100644 (file)
@@ -141,13 +141,15 @@ public class PagesServlet extends HttpServlet {
                        }\r
                        response.setDateHeader("Last-Modified", JGitUtils.getCommitDate(commit).getTime());\r
 \r
+                       String [] encodings = GitBlit.getEncodings();\r
+\r
                        RevTree tree = commit.getTree();\r
                        byte[] content = null;\r
                        if (StringUtils.isEmpty(resource)) {\r
                                // find resource\r
                                String[] files = { "index.html", "index.htm", "index.mkd" };\r
                                for (String file : files) {\r
-                                       content = JGitUtils.getStringContent(r, tree, file)\r
+                                       content = JGitUtils.getStringContent(r, tree, file, encodings)\r
                                                        .getBytes(Constants.ENCODING);\r
                                        if (content != null) {\r
                                                resource = file;\r
@@ -165,7 +167,7 @@ public class PagesServlet extends HttpServlet {
                                                contentType = "text/plain";\r
                                        }\r
                                        if (contentType.startsWith("text")) {\r
-                                               content = JGitUtils.getStringContent(r, tree, resource).getBytes(\r
+                                               content = JGitUtils.getStringContent(r, tree, resource, encodings).getBytes(\r
                                                                Constants.ENCODING);\r
                                        } else {\r
                                                content = JGitUtils.getByteContent(r, tree, resource);\r
@@ -177,7 +179,7 @@ public class PagesServlet extends HttpServlet {
 \r
                        // no content, try custom 404 page\r
                        if (ArrayUtils.isEmpty(content)) {\r
-                               String custom404 = JGitUtils.getStringContent(r, tree, "404.html");\r
+                               String custom404 = JGitUtils.getStringContent(r, tree, "404.html", encodings);\r
                                if (!StringUtils.isEmpty(custom404)) {\r
                                        content = custom404.getBytes(Constants.ENCODING);\r
                                }\r
index f5ca5efdf78062738cb8acf4ce1ad052c98dfafa..72a8ab3cb04f7a571fceaac007b6affb09933470 100644 (file)
@@ -20,7 +20,6 @@ import java.io.File;
 import java.io.IOException;\r
 import java.io.InputStream;\r
 import java.io.OutputStream;\r
-import java.nio.charset.Charset;\r
 import java.text.MessageFormat;\r
 import java.util.ArrayList;\r
 import java.util.Arrays;\r
@@ -543,14 +542,15 @@ public class JGitUtils {
         * @param tree\r
         *            if null, the RevTree from HEAD is assumed.\r
         * @param blobPath\r
+        * @param charsets optional\r
         * @return UTF-8 string content\r
         */\r
-       public static String getStringContent(Repository repository, RevTree tree, String blobPath) {\r
+       public static String getStringContent(Repository repository, RevTree tree, String blobPath, String... charsets) {\r
                byte[] content = getByteContent(repository, tree, blobPath);\r
                if (content == null) {\r
                        return null;\r
                }\r
-               return new String(content, Charset.forName(Constants.CHARACTER_ENCODING));\r
+               return StringUtils.decodeString(content, charsets);\r
        }\r
 \r
        /**\r
@@ -589,14 +589,15 @@ public class JGitUtils {
         * \r
         * @param repository\r
         * @param objectId\r
+        * @param charsets optional\r
         * @return UTF-8 string content\r
         */\r
-       public static String getStringContent(Repository repository, String objectId) {\r
+       public static String getStringContent(Repository repository, String objectId, String... charsets) {\r
                byte[] content = getByteContent(repository, objectId);\r
                if (content == null) {\r
                        return null;\r
                }\r
-               return new String(content, Charset.forName(Constants.CHARACTER_ENCODING));\r
+               return StringUtils.decodeString(content, charsets);\r
        }\r
 \r
        /**\r
index 2c35724156a4b3d8a7bc6bf16cfbe14681f17f98..baed5f0c5912f175d705d935e0cf264b002e8ef0 100644 (file)
 package com.gitblit.utils;\r
 \r
 import java.io.UnsupportedEncodingException;\r
+import java.nio.ByteBuffer;\r
+import java.nio.CharBuffer;\r
+import java.nio.charset.CharacterCodingException;\r
+import java.nio.charset.Charset;\r
+import java.nio.charset.CharsetDecoder;\r
+import java.nio.charset.IllegalCharsetNameException;\r
+import java.nio.charset.UnsupportedCharsetException;\r
 import java.security.MessageDigest;\r
 import java.security.NoSuchAlgorithmException;\r
 import java.util.ArrayList;\r
+import java.util.Arrays;\r
 import java.util.Collection;\r
 import java.util.Collections;\r
 import java.util.Comparator;\r
+import java.util.LinkedHashSet;\r
 import java.util.List;\r
+import java.util.Set;\r
 import java.util.regex.PatternSyntaxException;\r
 \r
 /**\r
@@ -550,4 +560,36 @@ public class StringUtils {
                // remember to append any characters to the right of a match\r
                return sb.toString();\r
        }\r
+       \r
+       /**\r
+        * Decodes a string by trying several charsets until one does not throw a\r
+        * coding exception.  Last resort is to interpret as UTF-8 with illegal\r
+        * character substitution.\r
+        * \r
+        * @param content\r
+        * @param charsets optional\r
+        * @return a string\r
+        */\r
+       public static String decodeString(byte [] content, String... charsets) {\r
+               Set<String> sets = new LinkedHashSet<String>();\r
+               if (!ArrayUtils.isEmpty(charsets)) {\r
+                       sets.addAll(Arrays.asList(charsets));\r
+               }\r
+               sets.addAll(Arrays.asList("UTF-8", "ISO-8859-1", Charset.defaultCharset().name()));\r
+               for (String charset : sets) {\r
+                       try {\r
+                               Charset cs = Charset.forName(charset);\r
+                               CharsetDecoder decoder = cs.newDecoder();\r
+                               CharBuffer buffer = decoder.decode(ByteBuffer.wrap(content));\r
+                               return buffer.toString();\r
+                       } catch (CharacterCodingException e) {\r
+                               // ignore and advance to the next charset\r
+                       } catch (IllegalCharsetNameException e) {\r
+                               // ignore illegal charset names\r
+                       } catch (UnsupportedCharsetException e) {\r
+                               // ignore unsupported charsets\r
+                       }\r
+               }\r
+               return new String(content, Charset.forName("UTF-8"));\r
+       }\r
 }
\ No newline at end of file
index 1c438370a6c7b7a0c2907aa95cc79a34b58a04db..fb5a962be740c52574aed76fc2f67dc5f7796d46 100644 (file)
@@ -41,7 +41,8 @@ public class BlobPage extends RepositoryPage {
 \r
                Repository r = getRepository();\r
                final String blobPath = WicketUtils.getPath(params);\r
-\r
+               String [] encodings = GitBlit.getEncodings();\r
+               \r
                if (StringUtils.isEmpty(blobPath)) {\r
                        // blob by objectid\r
 \r
@@ -54,7 +55,7 @@ public class BlobPage extends RepositoryPage {
                        add(new BookmarkablePageLink<Void>("headLink", BlobPage.class).setEnabled(false));\r
                        add(new CommitHeaderPanel("commitHeader", objectId));\r
                        add(new PathBreadcrumbsPanel("breadcrumbs", repositoryName, blobPath, objectId));\r
-                       Component c = new Label("blobText", JGitUtils.getStringContent(r, objectId));\r
+                       Component c = new Label("blobText", JGitUtils.getStringContent(r, objectId, encodings));\r
                        WicketUtils.setCssClass(c, "plainprint");\r
                        add(c);\r
                } else {\r
@@ -111,7 +112,7 @@ public class BlobPage extends RepositoryPage {
                                case 1:\r
                                        // PrettyPrint blob text\r
                                        c = new Label("blobText", JGitUtils.getStringContent(r, commit.getTree(),\r
-                                                       blobPath));\r
+                                                       blobPath, encodings));\r
                                        WicketUtils.setCssClass(c, "prettyprint linenums");\r
                                        break;\r
                                case 2:\r
@@ -125,14 +126,14 @@ public class BlobPage extends RepositoryPage {
                                default:\r
                                        // plain text\r
                                        c = new Label("blobText", JGitUtils.getStringContent(r, commit.getTree(),\r
-                                                       blobPath));\r
+                                                       blobPath, encodings));\r
                                        WicketUtils.setCssClass(c, "plainprint");\r
                                }\r
                                add(c);\r
                        } else {\r
                                // plain text\r
                                Label blobLabel = new Label("blobText", JGitUtils.getStringContent(r,\r
-                                               commit.getTree(), blobPath));\r
+                                               commit.getTree(), blobPath, encodings));\r
                                WicketUtils.setCssClass(blobLabel, "plainprint");\r
                                add(blobLabel);\r
                        }\r
index aaf12bad7a9fce6a733566016f235dc7f26633e1..5764235ac3e2fb6653f739c3fe1f1d7a305076d1 100644 (file)
@@ -24,6 +24,7 @@ import org.eclipse.jgit.lib.Constants;
 import org.eclipse.jgit.lib.Repository;\r
 import org.eclipse.jgit.revwalk.RevCommit;\r
 \r
+import com.gitblit.GitBlit;\r
 import com.gitblit.utils.JGitUtils;\r
 import com.gitblit.utils.MarkdownUtils;\r
 import com.gitblit.wicket.WicketUtils;\r
@@ -37,7 +38,8 @@ public class MarkdownPage extends RepositoryPage {
 \r
                Repository r = getRepository();\r
                RevCommit commit = JGitUtils.getCommit(r, objectId);\r
-\r
+               String [] encodings = GitBlit.getEncodings();\r
+               \r
                // markdown page links\r
                add(new BookmarkablePageLink<Void>("blameLink", BlamePage.class,\r
                                WicketUtils.newPathParameter(repositoryName, objectId, markdownPath)));\r
@@ -49,7 +51,7 @@ public class MarkdownPage extends RepositoryPage {
                                WicketUtils.newPathParameter(repositoryName, Constants.HEAD, markdownPath)));\r
 \r
                // Read raw markdown content and transform it to html\r
-               String markdownText = JGitUtils.getStringContent(r, commit.getTree(), markdownPath);\r
+               String markdownText = JGitUtils.getStringContent(r, commit.getTree(), markdownPath, encodings);\r
                String htmlText;\r
                try {\r
                        htmlText = MarkdownUtils.transformMarkdown(markdownText);\r
index f71d986f05bc66560cbb041015d122fdc41e5974..00cc5bf7d89f39359d3dd8ab3b1d7567ddb5d170 100644 (file)
@@ -43,7 +43,8 @@ public class RawPage extends WebPage {
                final String repositoryName = WicketUtils.getRepositoryName(params);\r
                final String objectId = WicketUtils.getObject(params);\r
                final String blobPath = WicketUtils.getPath(params);\r
-\r
+               String [] encodings = GitBlit.getEncodings();\r
+               \r
                Repository r = GitBlit.self().getRepository(repositoryName);\r
                if (r == null) {\r
                        error(getString("gb.canNotLoadRepository") + " " + repositoryName);\r
@@ -53,7 +54,7 @@ public class RawPage extends WebPage {
 \r
                if (StringUtils.isEmpty(blobPath)) {\r
                        // objectid referenced raw view\r
-                       Label blobLabel = new Label("rawText", JGitUtils.getStringContent(r, objectId));\r
+                       Label blobLabel = new Label("rawText", JGitUtils.getStringContent(r, objectId, encodings));\r
                        WicketUtils.setCssClass(blobLabel, "plainprint");\r
                        add(blobLabel);\r
                } else {\r
@@ -92,14 +93,14 @@ public class RawPage extends WebPage {
                                default:\r
                                        // plain text\r
                                        c = new Label("rawText", JGitUtils.getStringContent(r, commit.getTree(),\r
-                                                       blobPath));\r
+                                                       blobPath, encodings));\r
                                        WicketUtils.setCssClass(c, "plainprint");\r
                                }\r
                                add(c);\r
                        } else {\r
                                // plain text\r
                                Label blobLabel = new Label("rawText", JGitUtils.getStringContent(r,\r
-                                               commit.getTree(), blobPath));\r
+                                               commit.getTree(), blobPath, encodings));\r
                                WicketUtils.setCssClass(blobLabel, "plainprint");\r
                                add(blobLabel);\r
                        }\r
index 2996b662df12d8a5c06fd9c4b91f6639729ec67c..8e145c8db5198bd63cb4205504dddd2e107c585c 100644 (file)
@@ -158,7 +158,8 @@ public class SummaryPage extends RepositoryPage {
                                        }\r
                                }\r
                                if (!StringUtils.isEmpty(readme)) {\r
-                                       String markdownText = JGitUtils.getStringContent(r, head.getTree(), readme);\r
+                                       String [] encodings = GitBlit.getEncodings();\r
+                                       String markdownText = JGitUtils.getStringContent(r, head.getTree(), readme, encodings);\r
                                        htmlText = MarkdownUtils.transformMarkdown(markdownText);\r
                                }\r
                        } catch (ParseException p) {\r
index 616ea837b5914708cb86341b32747a3938d03639..dc4d3c5016195cffe64480a31851a342c41e002e 100644 (file)
@@ -37,6 +37,7 @@ import org.eclipse.jgit.lib.PersonIdent;
 import org.eclipse.jgit.lib.Repository;\r
 import org.eclipse.jgit.lib.RepositoryCache.FileKey;\r
 import org.eclipse.jgit.revwalk.RevCommit;\r
+import org.eclipse.jgit.revwalk.RevTree;\r
 import org.eclipse.jgit.util.FS;\r
 import org.eclipse.jgit.util.FileUtils;\r
 import org.junit.Test;\r
@@ -265,7 +266,7 @@ public class JGitUtilsTest {
        @Test\r
        public void testStringContent() throws Exception {\r
                Repository repository = GitBlitSuite.getHelloworldRepository();\r
-               String contentA = JGitUtils.getStringContent(repository, null, "java.java");\r
+               String contentA = JGitUtils.getStringContent(repository, (RevTree) null, "java.java");\r
                RevCommit commit = JGitUtils.getCommit(repository, Constants.HEAD);\r
                String contentB = JGitUtils.getStringContent(repository, commit.getTree(), "java.java");\r
                String contentC = JGitUtils.getStringContent(repository, commit.getTree(), "missing.txt");\r