diff options
author | Go MAEDA <maeda@farend.jp> | 2024-10-20 06:47:28 +0000 |
---|---|---|
committer | Go MAEDA <maeda@farend.jp> | 2024-10-20 06:47:28 +0000 |
commit | 1d46be8b0f037e173ba4bf2c68ab0d0c33f1eb48 (patch) | |
tree | 9be98ebfb892e7726543079138f521997cfbf23b /app | |
parent | 7c66cdaaaf22dae1a9089dba96abf591d1852d78 (diff) | |
download | redmine-1d46be8b0f037e173ba4bf2c68ab0d0c33f1eb48.tar.gz redmine-1d46be8b0f037e173ba4bf2c68ab0d0c33f1eb48.zip |
Fix CSV import file encoding auto-detection failure with multibyte characters (#41464).
Patch by Go MAEDA (user:maeda).
git-svn-id: https://svn.redmine.org/redmine/trunk@23150 e93f8b46-1217-0410-a6f0-8f06a7374b81
Diffstat (limited to 'app')
-rw-r--r-- | app/models/import.rb | 16 |
1 files changed, 15 insertions, 1 deletions
diff --git a/app/models/import.rb b/app/models/import.rb index 94e44c5e2..caf673e9a 100644 --- a/app/models/import.rb +++ b/app/models/import.rb @@ -69,7 +69,7 @@ class Import < ApplicationRecord encoding = lu(user, :general_csv_encoding) if file_exists? begin - content = File.read(filepath, 256) + content = read_file_head separator = [',', ';'].max_by {|sep| content.count(sep)} wrapper = ['"', "'"].max_by {|quote_char| content.count(quote_char)} @@ -248,6 +248,20 @@ class Import < ApplicationRecord private + # Reads lines from the beginning of the file, up to the specified number + # of bytes (max_read_bytes). + def read_file_head(max_read_bytes = 4096) + return '' unless file_exists? + return File.read(filepath, mode: 'rb') if File.size(filepath) <= max_read_bytes + + # The last byte of the chunk may be part of a multi-byte character, + # causing an invalid byte sequence. To avoid this, it truncates + # the chunk at the last LF character, if found. + chunk = File.read(filepath, max_read_bytes) + last_lf_index = chunk.rindex("\n") + last_lf_index ? chunk[..last_lf_index] : chunk + end + def read_rows return unless file_exists? |