]> source.dussan.org Git - redmine.git/commitdiff
Support multiple multi-word phrases in the search engine (#38446).
authorGo MAEDA <maeda@farend.jp>
Wed, 19 Jun 2024 14:27:20 +0000 (14:27 +0000)
committerGo MAEDA <maeda@farend.jp>
Wed, 19 Jun 2024 14:27:20 +0000 (14:27 +0000)
Patch by Go MAEDA (@maeda).

git-svn-id: https://svn.redmine.org/redmine/trunk@22886 e93f8b46-1217-0410-a6f0-8f06a7374b81

lib/redmine/search.rb
test/unit/lib/redmine/search_test.rb

index c07f93aad1f16e2885d6d475183b2358e553b215..6c3ee835c8b0254a46e9022f80bed4be88a76910 100644 (file)
@@ -135,7 +135,11 @@ module Redmine
       def tokens
         # extract tokens from the question
         # eg. hello "bye bye" => ["hello", "bye bye"]
-        tokens = @question.scan(%r{(([[:space:]]|^)"[^"]+"([[:space:]]|$)|[[:^space:]]+)}).collect {|m| m.first.gsub(%r{(^[[:space:]]*"[[:space:]]*|[[:space:]]*"[[:space:]]*$)}, '')}
+        tokens = @question.scan(/"[^"]+"|[^\p{Zs}]+/).map do |token|
+          # Remove quotes from quoted tokens, strip surrounding whitespace
+          # e.g. "\" foo bar \"" => "foo bar"
+          token.gsub(/\A"\p{Zs}*|\p{Zs}*"\Z/, '')
+        end
         # tokens must be at least 2 characters long
         # but for Chinese characters (Chinese HANZI/Japanese KANJI), tokens can be one character
         # no more than 5 tokens to search for
index 72cf14492695215cca45531f29048abac517451f..e2d77cca04600ef5d817bc7e6206591381ea620c 100644 (file)
@@ -30,4 +30,9 @@ class Redmine::Search::Tokenize < ActiveSupport::TestCase
     value = "全角\u3000スペース"
     assert_equal %w[全角 スペース], Redmine::Search::Tokenizer.new(value).tokens
   end
+
+  def test_tokenize_should_support_multiple_phrases
+    value = '"phrase one" "phrase two"'
+    assert_equal ["phrase one", "phrase two"], Redmine::Search::Tokenizer.new(value).tokens
+  end
 end