From 3ae42cb32617670cb6c99a60f5cda2cf961d110c Mon Sep 17 00:00:00 2001 From: Jean-Philippe Lang Date: Mon, 15 Jun 2015 21:16:42 +0000 Subject: [PATCH] Better handle html-only emails (#16962). git-svn-id: http://svn.redmine.org/redmine/trunk@14313 e93f8b46-1217-0410-a6f0-8f06a7374b81 --- Gemfile | 1 + app/models/mail_handler.rb | 16 +- lib/redmine.rb | 7 +- lib/redmine/wiki_formatting.rb | 18 +- lib/redmine/wiki_formatting/html_parser.rb | 54 + .../wiki_formatting/markdown/html_parser.rb | 40 + .../wiki_formatting/textile/html_parser.rb | 41 + .../mail_handler/outlook_2010_html_only.eml | 966 ++++++++++++++++++ .../outlook_web_access_2010_html_only.eml | 65 ++ .../mail_handler/ticket_html_only.eml | 3 +- .../wiki_formatting/html_parser_test.rb | 30 + .../markdown_html_parser_test.rb | 30 + .../textile_html_parser_test.rb | 30 + test/unit/mail_handler_test.rb | 30 +- 14 files changed, 1312 insertions(+), 19 deletions(-) create mode 100644 lib/redmine/wiki_formatting/html_parser.rb create mode 100644 lib/redmine/wiki_formatting/markdown/html_parser.rb create mode 100644 lib/redmine/wiki_formatting/textile/html_parser.rb create mode 100644 test/fixtures/mail_handler/outlook_2010_html_only.eml create mode 100644 test/fixtures/mail_handler/outlook_web_access_2010_html_only.eml create mode 100644 test/unit/lib/redmine/wiki_formatting/html_parser_test.rb create mode 100644 test/unit/lib/redmine/wiki_formatting/markdown_html_parser_test.rb create mode 100644 test/unit/lib/redmine/wiki_formatting/textile_html_parser_test.rb diff --git a/Gemfile b/Gemfile index ea108bc20..af34e4e6f 100644 --- a/Gemfile +++ b/Gemfile @@ -13,6 +13,7 @@ gem "mime-types" gem "protected_attributes" gem "actionpack-action_caching" gem "actionpack-xml_parser" +gem "loofah", "~> 2.0" # Windows does not include zoneinfo files, so bundle the tzinfo-data gem gem 'tzinfo-data', platforms: [:mingw, :x64_mingw, :mswin, :jruby] diff --git a/app/models/mail_handler.rb b/app/models/mail_handler.rb index 78cff699b..5ea370e2a 100644 --- a/app/models/mail_handler.rb +++ b/app/models/mail_handler.rb @@ -433,14 +433,11 @@ class MailHandler < ActionMailer::Base @plain_text_body = parts.map do |p| body_charset = Mail::RubyVer.respond_to?(:pick_encoding) ? Mail::RubyVer.pick_encoding(p.charset).to_s : p.charset - Redmine::CodesetUtil.to_utf8(p.body.decoded, body_charset) - end.join("\r\n") - # strip html tags and remove doctype directive - if parts.any? {|p| p.mime_type == 'text/html'} - @plain_text_body = strip_tags(@plain_text_body.strip) - @plain_text_body.sub! %r{^ 'Markdown' - end + format.register :textile + format.register :markdown if Object.const_defined?(:Redcarpet) end ActionView::Template.register_template_handler :rsb, Redmine::Views::ApiTemplateHandler diff --git a/lib/redmine/wiki_formatting.rb b/lib/redmine/wiki_formatting.rb index c4e4bb7c3..4fd5ea8e8 100644 --- a/lib/redmine/wiki_formatting.rb +++ b/lib/redmine/wiki_formatting.rb @@ -28,12 +28,19 @@ module Redmine yield self end - def register(name, formatter, helper, options={}) + def register(name, *args) + options = args.last.is_a?(Hash) ? args.pop : {} name = name.to_s raise ArgumentError, "format name '#{name}' is already taken" if @@formatters[name] + + formatter, helper, parser = args.any? ? + args : + %w(Formatter Helper HtmlParser).map {|m| "Redmine::WikiFormatting::#{name.classify}::#{m}".constantize} + @@formatters[name] = { :formatter => formatter, :helper => helper, + :html_parser => parser, :label => options[:label] || name.humanize } end @@ -42,6 +49,10 @@ module Redmine formatter_for(Setting.text_formatting) end + def html_parser + html_parser_for(Setting.text_formatting) + end + def formatter_for(name) entry = @@formatters[name.to_s] (entry && entry[:formatter]) || Redmine::WikiFormatting::NullFormatter::Formatter @@ -52,6 +63,11 @@ module Redmine (entry && entry[:helper]) || Redmine::WikiFormatting::NullFormatter::Helper end + def html_parser_for(name) + entry = @@formatters[name.to_s] + (entry && entry[:html_parser]) || Redmine::WikiFormatting::HtmlParser + end + def format_names @@formatters.keys.map end diff --git a/lib/redmine/wiki_formatting/html_parser.rb b/lib/redmine/wiki_formatting/html_parser.rb new file mode 100644 index 000000000..9d83497bd --- /dev/null +++ b/lib/redmine/wiki_formatting/html_parser.rb @@ -0,0 +1,54 @@ +# Redmine - project management software +# Copyright (C) 2006-2015 Jean-Philippe Lang +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +require 'loofah/helpers' + +module Redmine + module WikiFormatting + class HtmlParser + + class_attribute :tags + self.tags = { + 'br' => {:post => "\n"} + } + + def self.to_text(html) + html = html.gsub(/[\n\r]/, '').squeeze(' ') + + doc = Loofah.document(html) + doc.scrub!(WikiTags.new(tags)) + doc.scrub!(:newline_block_elements) + + Loofah::Helpers.remove_extraneous_whitespace(doc.text).strip + end + + class WikiTags < ::Loofah::Scrubber + def initialize(tags_to_text) + @direction = :bottom_up + @tags_to_text = tags_to_text || {} + end + + def scrub(node) + formatting = @tags_to_text[node.name] + return CONTINUE unless formatting + node.add_next_sibling Nokogiri::XML::Text.new("#{formatting[:pre]}#{node.content}#{formatting[:post]}", node.document) + node.remove + end + end + end + end +end diff --git a/lib/redmine/wiki_formatting/markdown/html_parser.rb b/lib/redmine/wiki_formatting/markdown/html_parser.rb new file mode 100644 index 000000000..14f89373a --- /dev/null +++ b/lib/redmine/wiki_formatting/markdown/html_parser.rb @@ -0,0 +1,40 @@ +# Redmine - project management software +# Copyright (C) 2006-2015 Jean-Philippe Lang +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +module Redmine + module WikiFormatting + module Markdown + class HtmlParser < Redmine::WikiFormatting::HtmlParser + + self.tags = { + 'b' => {:pre => '**', :post => '**'}, + 'strong' => {:pre => '**', :post => '**'}, + 'i' => {:pre => '_', :post => '_'}, + 'em' => {:pre => '_', :post => '_'}, + 'strike' => {:pre => '~~', :post => '~~'}, + 'br' => {:post => "\n"}, + 'h1' => {:pre => "\n\n# ", :post => "\n\n"}, + 'h2' => {:pre => "\n\n## ", :post => "\n\n"}, + 'h3' => {:pre => "\n\n### ", :post => "\n\n"}, + 'h4' => {:pre => "\n\n#### ", :post => "\n\n"}, + 'h5' => {:pre => "\n\n##### ", :post => "\n\n"}, + 'h6' => {:pre => "\n\n###### ", :post => "\n\n"} + } + end + end + end +end diff --git a/lib/redmine/wiki_formatting/textile/html_parser.rb b/lib/redmine/wiki_formatting/textile/html_parser.rb new file mode 100644 index 000000000..201e69c64 --- /dev/null +++ b/lib/redmine/wiki_formatting/textile/html_parser.rb @@ -0,0 +1,41 @@ +# Redmine - project management software +# Copyright (C) 2006-2015 Jean-Philippe Lang +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +module Redmine + module WikiFormatting + module Textile + class HtmlParser < Redmine::WikiFormatting::HtmlParser + + self.tags = { + 'b' => {:pre => '*', :post => '*'}, + 'strong' => {:pre => '*', :post => '*'}, + 'i' => {:pre => '_', :post => '_'}, + 'em' => {:pre => '_', :post => '_'}, + 'u' => {:pre => '+', :post => '+'}, + 'strike' => {:pre => '-', :post => '-'}, + 'br' => {:post => "\n"}, + 'h1' => {:pre => "\n\nh1. ", :post => "\n\n"}, + 'h2' => {:pre => "\n\nh2. ", :post => "\n\n"}, + 'h3' => {:pre => "\n\nh3. ", :post => "\n\n"}, + 'h4' => {:pre => "\n\nh4. ", :post => "\n\n"}, + 'h5' => {:pre => "\n\nh5. ", :post => "\n\n"}, + 'h6' => {:pre => "\n\nh6. ", :post => "\n\n"} + } + end + end + end +end diff --git a/test/fixtures/mail_handler/outlook_2010_html_only.eml b/test/fixtures/mail_handler/outlook_2010_html_only.eml new file mode 100644 index 000000000..0e1428098 --- /dev/null +++ b/test/fixtures/mail_handler/outlook_2010_html_only.eml @@ -0,0 +1,966 @@ +From: jsmith@somenet.foo +To: testuser@example.org +Subject: =?utf-8?Q?Test_email?= +Date: Mon, 11 May 2015 10:50:31 -0500 +MIME-Version: 1.0 +Content-Type: multipart/alternative; + boundary="Mark=_539924359269962179476" +X-Priority: 3 + +This is a multi-part message in MIME format. + +--Mark=_539924359269962179476 +Content-Type: text/plain; + charset="utf-8" +Content-Transfer-Encoding: quoted-printable + +Simple, unadorned test email generated by Outlook 2010. It is in HTML f= +ormat, but no special formatting has been chosen. I=E2=80=99m going to = +save this as a draft and then manually drop it into the Inbox for scrap= +ing by Redmine 3.0.2. + +--Mark=_539924359269962179476 +Content-Type: text/html; + charset="utf-8" +Content-Transfer-Encoding: quoted-printable + + +
<= +p class=3DMsoPlainText>Simple, unadorned test email generated by Outloo= +k 2010. It is in HTML format, but no special formatting has been chosen= +. I=E2=80=99m going to save this as a draft and then manually drop it i= +nto the Inbox for scraping by Redmine 3.0.2.

+ +--Mark=_539924359269962179476-- diff --git a/test/fixtures/mail_handler/outlook_web_access_2010_html_only.eml b/test/fixtures/mail_handler/outlook_web_access_2010_html_only.eml new file mode 100644 index 000000000..b2f6a566f --- /dev/null +++ b/test/fixtures/mail_handler/outlook_web_access_2010_html_only.eml @@ -0,0 +1,65 @@ +From: "John Smith" +To: redmine +Subject: Upgrade Redmine to 3.0.x +Thread-Topic: Upgrade Redmine to 3.0.x +Thread-Index: AQHQknBe94y5Or7Yl02JransMRF41p2Dv6Hu +Date: Tue, 19 May 2015 16:27:43 -0400 +Message-ID: +Accept-Language: en-US +Content-Language: en-US +X-MS-Exchange-Organization-AuthAs: Internal +X-MS-Exchange-Organization-AuthMechanism: 04 +X-MS-Exchange-Organization-AuthSource: EHUB01.exch.local +X-MS-Has-Attach: +X-MS-Exchange-Organization-SCL: -1 +X-MS-TNEF-Correlator: +Content-Type: text/html; charset="iso-8859-1" +Content-Transfer-Encoding: quoted-printable +MIME-Version: 1.0 + + + + + + + +
A mess.
+

+
--Geoff Maciolek
+MYCOMPANYNAME, LLC
+
+
+
+ + \ No newline at end of file diff --git a/test/fixtures/mail_handler/ticket_html_only.eml b/test/fixtures/mail_handler/ticket_html_only.eml index 511e5f107..83710bd65 100644 --- a/test/fixtures/mail_handler/ticket_html_only.eml +++ b/test/fixtures/mail_handler/ticket_html_only.eml @@ -15,8 +15,9 @@ Content-Transfer-Encoding: 7bit + -This is a html-only email.
+This is a html-only email.

With a title

and a paragraph.

diff --git a/test/unit/lib/redmine/wiki_formatting/html_parser_test.rb b/test/unit/lib/redmine/wiki_formatting/html_parser_test.rb new file mode 100644 index 000000000..4662aeaeb --- /dev/null +++ b/test/unit/lib/redmine/wiki_formatting/html_parser_test.rb @@ -0,0 +1,30 @@ +# Redmine - project management software +# Copyright (C) 2006-2015 Jean-Philippe Lang +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +require File.expand_path('../../../../../test_helper', __FILE__) + +class Redmine::WikiFormatting::HtmlParserTest < ActiveSupport::TestCase + + def setup + @parser = Redmine::WikiFormatting::HtmlParser + end + + def test_convert_line_breaks + assert_equal "A html snippet with\na new line.", + @parser.to_text('

A html snippet with
a new line.

') + end +end diff --git a/test/unit/lib/redmine/wiki_formatting/markdown_html_parser_test.rb b/test/unit/lib/redmine/wiki_formatting/markdown_html_parser_test.rb new file mode 100644 index 000000000..01e248021 --- /dev/null +++ b/test/unit/lib/redmine/wiki_formatting/markdown_html_parser_test.rb @@ -0,0 +1,30 @@ +# Redmine - project management software +# Copyright (C) 2006-2015 Jean-Philippe Lang +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +require File.expand_path('../../../../../test_helper', __FILE__) + +class Redmine::WikiFormatting::MarkdownHtmlParserTest < ActiveSupport::TestCase + + def setup + @parser = Redmine::WikiFormatting::Markdown::HtmlParser + end + + def test_should_convert_tags + assert_equal 'A **simple** html snippet.', + @parser.to_text('

A simple html snippet.

') + end +end diff --git a/test/unit/lib/redmine/wiki_formatting/textile_html_parser_test.rb b/test/unit/lib/redmine/wiki_formatting/textile_html_parser_test.rb new file mode 100644 index 000000000..cc838bbf0 --- /dev/null +++ b/test/unit/lib/redmine/wiki_formatting/textile_html_parser_test.rb @@ -0,0 +1,30 @@ +# Redmine - project management software +# Copyright (C) 2006-2015 Jean-Philippe Lang +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +require File.expand_path('../../../../../test_helper', __FILE__) + +class Redmine::WikiFormatting::TextileHtmlParserTest < ActiveSupport::TestCase + + def setup + @parser = Redmine::WikiFormatting::Textile::HtmlParser + end + + def test_should_convert_tags + assert_equal 'A *simple* html snippet.', + @parser.to_text('

A simple html snippet.

') + end +end diff --git a/test/unit/mail_handler_test.rb b/test/unit/mail_handler_test.rb index 99900666d..4fc710ba2 100644 --- a/test/unit/mail_handler_test.rb +++ b/test/unit/mail_handler_test.rb @@ -797,13 +797,33 @@ class MailHandlerTest < ActiveSupport::TestCase assert_equal Message.find(1), m.parent end - def test_should_strip_tags_of_html_only_emails - issue = submit_email('ticket_html_only.eml', :issue => {:project => 'ecookbook'}) + def test_should_convert_tags_of_html_only_emails + with_settings :text_formatting => 'textile' do + issue = submit_email('ticket_html_only.eml', :issue => {:project => 'ecookbook'}) + assert issue.is_a?(Issue) + assert !issue.new_record? + issue.reload + assert_equal 'HTML email', issue.subject + assert_equal "This is a *html-only* email.\r\n\r\nh1. With a title\r\n\r\nand a paragraph.", issue.description + end + end + + def test_should_handle_outlook_web_access_2010_html_only + issue = submit_email('outlook_web_access_2010_html_only.eml', :issue => {:project => 'ecookbook'}) + assert issue.is_a?(Issue) + issue.reload + assert_equal 'Upgrade Redmine to 3.0.x', issue.subject + assert_equal "A mess.\r\n\r\n--Geoff Maciolek\r\nMYCOMPANYNAME, LLC", issue.description + end + + def test_should_handle_outlook_2010_html_only + issue = submit_email('outlook_2010_html_only.eml', :issue => {:project => 'ecookbook'}) assert issue.is_a?(Issue) - assert !issue.new_record? issue.reload - assert_equal 'HTML email', issue.subject - assert_equal 'This is a html-only email.', issue.description + assert_equal 'Test email', issue.subject + assert_equal "Simple, unadorned test email generated by Outlook 2010. It is in HTML format, but" + + " no special formatting has been chosen. I’m going to save this as a draft and then manually" + + " drop it into the Inbox for scraping by Redmine 3.0.2.", issue.description end test "truncate emails with no setting should add the entire email into the issue" do -- 2.39.5