Browse Source

Auto guess file encoding when importing CSV file (#34718).

Patch by Go MAEDA.


git-svn-id: http://svn.redmine.org/redmine/trunk@21352 e93f8b46-1217-0410-a6f0-8f06a7374b81
tags/5.0.0
Go MAEDA 2 years ago
parent
commit
ec0ce00b95

+ 9
- 1
app/models/import.rb View File

@@ -65,15 +65,23 @@ class Import < ActiveRecord::Base

def set_default_settings(options={})
separator = lu(user, :general_csv_separator)
encoding = lu(user, :general_csv_encoding)
if file_exists?
begin
content = File.read(filepath, 256)

separator = [',', ';'].sort_by {|sep| content.count(sep)}.last

guessed_encoding = Redmine::CodesetUtil.guess_encoding(file_content)
encoding =
(guessed_encoding && (
Setting::ENCODINGS.detect {|e| e.casecmp?(guessed_encoding)} ||
Setting::ENCODINGS.detect {|e| Encoding.find(e) == Encoding.find(guessed_encoding)}
)) || lu(user, :general_csv_encoding)
rescue => e
end
end
wrapper = '"'
encoding = lu(user, :general_csv_encoding)

date_format = lu(user, "date.formats.default", :default => "foo")
date_format = DATE_FORMATS.first unless DATE_FORMATS.include?(date_format)

+ 18
- 0
lib/redmine/codeset_util.rb View File

@@ -75,5 +75,23 @@ module Redmine
str = self.replace_invalid_utf8(str)
end
end

def self.guess_encoding(str)
return if str.nil?

str = str.dup
encodings = Setting.repositories_encodings.split(',').collect(&:strip)
encodings = encodings.presence || ['UTF-8']

encodings.each do |encoding|
begin
str.force_encoding(encoding)
rescue Encoding::ConverterNotFoundError
# ignore if the encoding name is invalid
end
return encoding if str.valid_encoding?
end
nil
end
end
end

+ 27
- 0
test/unit/issue_import_test.rb View File

@@ -411,4 +411,31 @@ class IssueImportTest < ActiveSupport::TestCase

assert_empty import.mapping
end

def test_set_default_settings_should_guess_encoding
import = generate_import('import_iso8859-1.csv')
with_settings :repositories_encodings => 'UTF-8,ISO-8859-1' do
import.set_default_settings
guessed_encoding = import.settings['encoding']
assert_equal 'ISO-8859-1', guessed_encoding
end
with_settings :repositories_encodings => 'UTF-8,iso8859-1' do
import.set_default_settings
guessed_encoding = import.settings['encoding']
assert_equal 'ISO-8859-1', guessed_encoding
assert_includes Setting::ENCODINGS, guessed_encoding
end
end

def test_set_default_settings_should_use_general_csv_encoding_when_cannnot_guess_encoding
import = generate_import('import_iso8859-1.csv')
user = User.generate!(:language => 'ja')
import.user = user
with_settings :repositories_encodings => 'UTF-8' do
import.set_default_settings
guessed_encoding = import.settings['encoding']
assert_equal 'CP932', lu(user, :general_csv_encoding)
assert_equal 'CP932', guessed_encoding
end
end
end

+ 17
- 0
test/unit/lib/redmine/codeset_util_test.rb View File

@@ -101,4 +101,21 @@ class Redmine::CodesetUtilTest < ActiveSupport::TestCase
assert_equal "UTF-8", s2.encoding.to_s
assert_equal 'こんにち?', s2
end

def test_guess_encoding_should_return_guessed_encoding
str = '日本語'.encode('Windows-31J').b
with_settings :repositories_encodings => 'UTF-8,Windows-31J' do
assert_equal 'Windows-31J', Redmine::CodesetUtil.guess_encoding(str)
end
with_settings :repositories_encodings => 'UTF-8,csWindows31J' do
assert_equal 'csWindows31J', Redmine::CodesetUtil.guess_encoding(str)
end
end

def guess_encoding_should_return_nil_if_cannot_guess_encoding
str = '日本語'.encode('Windows-31J').b
with_settings :repositories_encodings => 'UTF-8,EUC-JP' do
assert_nil Redmine::CodesetUtil.guess_encoding(str)
end
end
end

Loading…
Cancel
Save