]> source.dussan.org Git - redmine.git/commitdiff
Auto guess file encoding when importing CSV file (#34718).
authorGo MAEDA <maeda@farend.jp>
Fri, 7 Jan 2022 01:29:55 +0000 (01:29 +0000)
committerGo MAEDA <maeda@farend.jp>
Fri, 7 Jan 2022 01:29:55 +0000 (01:29 +0000)
Patch by Go MAEDA.

git-svn-id: http://svn.redmine.org/redmine/trunk@21352 e93f8b46-1217-0410-a6f0-8f06a7374b81

app/models/import.rb
lib/redmine/codeset_util.rb
test/unit/issue_import_test.rb
test/unit/lib/redmine/codeset_util_test.rb

index 92752a3db68ee4368728c6a43f48f80c89866547..ffa69d22eee2afa1896cbf019bcb2192da4b8b4f 100644 (file)
@@ -65,15 +65,23 @@ class Import < ActiveRecord::Base
 
   def set_default_settings(options={})
     separator = lu(user, :general_csv_separator)
+    encoding = lu(user, :general_csv_encoding)
     if file_exists?
       begin
         content = File.read(filepath, 256)
+
         separator = [',', ';'].sort_by {|sep| content.count(sep)}.last
+
+        guessed_encoding = Redmine::CodesetUtil.guess_encoding(file_content)
+        encoding =
+          (guessed_encoding && (
+            Setting::ENCODINGS.detect {|e| e.casecmp?(guessed_encoding)} ||
+            Setting::ENCODINGS.detect {|e| Encoding.find(e) == Encoding.find(guessed_encoding)}
+          )) || lu(user, :general_csv_encoding)
       rescue => e
       end
     end
     wrapper = '"'
-    encoding = lu(user, :general_csv_encoding)
 
     date_format = lu(user, "date.formats.default", :default => "foo")
     date_format = DATE_FORMATS.first unless DATE_FORMATS.include?(date_format)
index 8261e572b7178ba7ef1959b303e2e9a4ccd1ad35..875689de2cbc6e387df91c3156c80fc55120765a 100644 (file)
@@ -75,5 +75,23 @@ module Redmine
         str = self.replace_invalid_utf8(str)
       end
     end
+
+    def self.guess_encoding(str)
+      return if str.nil?
+
+      str = str.dup
+      encodings = Setting.repositories_encodings.split(',').collect(&:strip)
+      encodings = encodings.presence || ['UTF-8']
+
+      encodings.each do |encoding|
+        begin
+          str.force_encoding(encoding)
+        rescue Encoding::ConverterNotFoundError
+          # ignore if the encoding name is invalid
+        end
+        return encoding if str.valid_encoding?
+      end
+      nil
+    end
   end
 end
index b08629b553c59e1ed1898e42cec39220927214fc..3f98f03725de4ae287d0e0d3ef1c4db3f3d704d5 100644 (file)
@@ -411,4 +411,31 @@ class IssueImportTest < ActiveSupport::TestCase
 
     assert_empty import.mapping
   end
+
+  def test_set_default_settings_should_guess_encoding
+    import = generate_import('import_iso8859-1.csv')
+    with_settings :repositories_encodings => 'UTF-8,ISO-8859-1' do
+      import.set_default_settings
+      guessed_encoding = import.settings['encoding']
+      assert_equal 'ISO-8859-1', guessed_encoding
+    end
+    with_settings :repositories_encodings => 'UTF-8,iso8859-1' do
+      import.set_default_settings
+      guessed_encoding = import.settings['encoding']
+      assert_equal 'ISO-8859-1', guessed_encoding
+      assert_includes Setting::ENCODINGS, guessed_encoding
+    end
+  end
+
+  def test_set_default_settings_should_use_general_csv_encoding_when_cannnot_guess_encoding
+    import = generate_import('import_iso8859-1.csv')
+    user = User.generate!(:language => 'ja')
+    import.user = user
+    with_settings :repositories_encodings => 'UTF-8' do
+      import.set_default_settings
+      guessed_encoding = import.settings['encoding']
+      assert_equal 'CP932', lu(user, :general_csv_encoding)
+      assert_equal 'CP932', guessed_encoding
+    end
+  end
 end
index aaf664047ccbba9fcf9fec27bc60e974e5bdc083..56094ecfa79732c79e43f0bf67063d9e81e43759 100644 (file)
@@ -101,4 +101,21 @@ class Redmine::CodesetUtilTest < ActiveSupport::TestCase
     assert_equal "UTF-8", s2.encoding.to_s
     assert_equal 'こんにち?', s2
   end
+
+  def test_guess_encoding_should_return_guessed_encoding
+    str = '日本語'.encode('Windows-31J').b
+    with_settings :repositories_encodings => 'UTF-8,Windows-31J' do
+      assert_equal 'Windows-31J', Redmine::CodesetUtil.guess_encoding(str)
+    end
+    with_settings :repositories_encodings => 'UTF-8,csWindows31J' do
+      assert_equal 'csWindows31J', Redmine::CodesetUtil.guess_encoding(str)
+    end
+  end
+
+  def guess_encoding_should_return_nil_if_cannot_guess_encoding
+    str = '日本語'.encode('Windows-31J').b
+    with_settings :repositories_encodings => 'UTF-8,EUC-JP' do
+      assert_nil Redmine::CodesetUtil.guess_encoding(str)
+    end
+  end
 end