From 3a3fe668c77cdb3266bfd1b067a30a1c09713763 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Lang Date: Fri, 9 Oct 2015 07:35:48 +0000 Subject: [PATCH] Fix tokenization of phrases with non-ascii chars (#20730). MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Patch by Jens Krämer. git-svn-id: http://svn.redmine.org/redmine/trunk@14662 e93f8b46-1217-0410-a6f0-8f06a7374b81 --- lib/redmine/search.rb | 2 +- test/unit/search_test.rb | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/lib/redmine/search.rb b/lib/redmine/search.rb index 54eab7745..99795a0d1 100644 --- a/lib/redmine/search.rb +++ b/lib/redmine/search.rb @@ -58,7 +58,7 @@ module Redmine # extract tokens from the question # eg. hello "bye bye" => ["hello", "bye bye"] - @tokens = @question.scan(%r{((\s|^)"[\s\w]+"(\s|$)|\S+)}).collect {|m| m.first.gsub(%r{(^\s*"\s*|\s*"\s*$)}, '')} + @tokens = @question.scan(%r{((\s|^)"[^"]+"(\s|$)|\S+)}).collect {|m| m.first.gsub(%r{(^\s*"\s*|\s*"\s*$)}, '')} # tokens must be at least 2 characters long @tokens = @tokens.uniq.select {|w| w.length > 1 } # no more than 5 tokens to search for diff --git a/test/unit/search_test.rb b/test/unit/search_test.rb index bd9477025..b640202e8 100644 --- a/test/unit/search_test.rb +++ b/test/unit/search_test.rb @@ -190,6 +190,14 @@ class SearchTest < ActiveSupport::TestCase Redmine::Database.reset end + def test_fetcher_should_handle_accents_in_phrases + f = Redmine::Search::Fetcher.new('No special chars "in a phrase"', User.anonymous, %w(issues), Project.all) + assert_equal ['No', 'special', 'chars', 'in a phrase'], f.tokens + + f = Redmine::Search::Fetcher.new('Special chars "in a phrase Öö"', User.anonymous, %w(issues), Project.all) + assert_equal ['Special', 'chars', 'in a phrase Öö'], f.tokens + end + private def remove_permission(role, permission) -- 2.39.5