From 145e26698791221b007c7dd460fb506cb0237235 Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Thu, 7 Nov 2024 11:57:07 +0800 Subject: Support quote selected comments to reply (#32431) Many existing tests were quite hacky, these could be improved later.
![image](https://github.com/user-attachments/assets/93aebb4f-9de5-4cb8-910b-50c64cbcd25a)
--- web_src/js/markup/html2markdown.test.ts | 24 +++++++ web_src/js/markup/html2markdown.ts | 119 ++++++++++++++++++++++++++++++++ 2 files changed, 143 insertions(+) create mode 100644 web_src/js/markup/html2markdown.test.ts create mode 100644 web_src/js/markup/html2markdown.ts (limited to 'web_src/js/markup') diff --git a/web_src/js/markup/html2markdown.test.ts b/web_src/js/markup/html2markdown.test.ts new file mode 100644 index 0000000000..99a63956a0 --- /dev/null +++ b/web_src/js/markup/html2markdown.test.ts @@ -0,0 +1,24 @@ +import {convertHtmlToMarkdown} from './html2markdown.ts'; +import {createElementFromHTML} from '../utils/dom.ts'; + +const h = createElementFromHTML; + +test('convertHtmlToMarkdown', () => { + expect(convertHtmlToMarkdown(h(`

h

`))).toBe('# h'); + expect(convertHtmlToMarkdown(h(`txt`))).toBe('**txt**'); + expect(convertHtmlToMarkdown(h(`txt`))).toBe('_txt_'); + expect(convertHtmlToMarkdown(h(`txt`))).toBe('~~txt~~'); + + expect(convertHtmlToMarkdown(h(`txt`))).toBe('[txt](link)'); + expect(convertHtmlToMarkdown(h(`https://link`))).toBe('https://link'); + + expect(convertHtmlToMarkdown(h(``))).toBe('![image](link)'); + expect(convertHtmlToMarkdown(h(`name`))).toBe('![name](link)'); + expect(convertHtmlToMarkdown(h(``))).toBe('image'); + + expect(convertHtmlToMarkdown(h(`

txt

`))).toBe('txt\n'); + expect(convertHtmlToMarkdown(h(`
a\nb
`))).toBe('> a\n> b\n'); + + expect(convertHtmlToMarkdown(h(`
  1. a
    • b
`))).toBe('1. a\n * b\n\n'); + expect(convertHtmlToMarkdown(h(`
  1. a
`))).toBe('1. [x] a\n'); +}); diff --git a/web_src/js/markup/html2markdown.ts b/web_src/js/markup/html2markdown.ts new file mode 100644 index 0000000000..c690e0c8b1 --- /dev/null +++ b/web_src/js/markup/html2markdown.ts @@ -0,0 +1,119 @@ +import {htmlEscape} from 'escape-goat'; + +type Processors = { + [tagName: string]: (el: HTMLElement) => string | HTMLElement | void; +} + +type ProcessorContext = { + elementIsFirst: boolean; + elementIsLast: boolean; + listNestingLevel: number; +} + +function prepareProcessors(ctx:ProcessorContext): Processors { + const processors = { + H1(el) { + const level = parseInt(el.tagName.slice(1)); + el.textContent = `${'#'.repeat(level)} ${el.textContent.trim()}`; + }, + STRONG(el) { + return `**${el.textContent}**`; + }, + EM(el) { + return `_${el.textContent}_`; + }, + DEL(el) { + return `~~${el.textContent}~~`; + }, + + A(el) { + const text = el.textContent || 'link'; + const href = el.getAttribute('href'); + if (/^https?:/.test(text) && text === href) { + return text; + } + return href ? `[${text}](${href})` : text; + }, + IMG(el) { + const alt = el.getAttribute('alt') || 'image'; + const src = el.getAttribute('src'); + const widthAttr = el.hasAttribute('width') ? ` width="${htmlEscape(el.getAttribute('width') || '')}"` : ''; + const heightAttr = el.hasAttribute('height') ? ` height="${htmlEscape(el.getAttribute('height') || '')}"` : ''; + if (widthAttr || heightAttr) { + return `${htmlEscape(alt)}`; + } + return `![${alt}](${src})`; + }, + + P(el) { + el.textContent = `${el.textContent}\n`; + }, + BLOCKQUOTE(el) { + el.textContent = `${el.textContent.replace(/^/mg, '> ')}\n`; + }, + + OL(el) { + const preNewLine = ctx.listNestingLevel ? '\n' : ''; + el.textContent = `${preNewLine}${el.textContent}\n`; + }, + LI(el) { + const parent = el.parentNode; + const bullet = parent.tagName === 'OL' ? `1. ` : '* '; + const nestingIdentLevel = Math.max(0, ctx.listNestingLevel - 1); + el.textContent = `${' '.repeat(nestingIdentLevel * 4)}${bullet}${el.textContent}${ctx.elementIsLast ? '' : '\n'}`; + return el; + }, + INPUT(el) { + return el.checked ? '[x] ' : '[ ] '; + }, + + CODE(el) { + const text = el.textContent; + if (el.parentNode && el.parentNode.tagName === 'PRE') { + el.textContent = `\`\`\`\n${text}\n\`\`\`\n`; + return el; + } + if (text.includes('`')) { + return `\`\` ${text} \`\``; + } + return `\`${text}\``; + }, + }; + processors['UL'] = processors.OL; + for (let level = 2; level <= 6; level++) { + processors[`H${level}`] = processors.H1; + } + return processors; +} + +function processElement(ctx :ProcessorContext, processors: Processors, el: HTMLElement) { + if (el.hasAttribute('data-markdown-generated-content')) return el.textContent; + if (el.tagName === 'A' && el.children.length === 1 && el.children[0].tagName === 'IMG') { + return processElement(ctx, processors, el.children[0] as HTMLElement); + } + + const isListContainer = el.tagName === 'OL' || el.tagName === 'UL'; + if (isListContainer) ctx.listNestingLevel++; + for (let i = 0; i < el.children.length; i++) { + ctx.elementIsFirst = i === 0; + ctx.elementIsLast = i === el.children.length - 1; + processElement(ctx, processors, el.children[i] as HTMLElement); + } + if (isListContainer) ctx.listNestingLevel--; + + if (processors[el.tagName]) { + const ret = processors[el.tagName](el); + if (ret && ret !== el) { + el.replaceWith(typeof ret === 'string' ? document.createTextNode(ret) : ret); + } + } +} + +export function convertHtmlToMarkdown(el: HTMLElement): string { + const div = document.createElement('div'); + div.append(el); + const ctx = {} as ProcessorContext; + ctx.listNestingLevel = 0; + processElement(ctx, prepareProcessors(ctx), el); + return div.textContent; +} -- cgit v1.2.3