From 6c7259eec8b94900763e3410a9d37825e313568b Mon Sep 17 00:00:00 2001 From: David Baker Date: Thu, 22 Sep 2016 17:18:12 +0100 Subject: [PATCH 1/6] Better detection of when input contains markdown --- src/Markdown.js | 99 +++++++++++++++++++ .../views/rooms/MessageComposerInputOld.js | 37 ++++--- 2 files changed, 121 insertions(+), 15 deletions(-) create mode 100644 src/Markdown.js diff --git a/src/Markdown.js b/src/Markdown.js new file mode 100644 index 0000000000..e4161c0b76 --- /dev/null +++ b/src/Markdown.js @@ -0,0 +1,99 @@ +/* +Copyright 2016 OpenMarket Ltd + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +import marked from 'marked'; + +// marked only applies the default options on the high +// level marked() interface, so we do it here. +const marked_options = Object.assign({}, { + renderer: new marked.Renderer(), + gfm: true, + tables: true, + breaks: true, + pedantic: false, + sanitize: true, + smartLists: true, + smartypants: false, +}, marked.defaults); + +const real_parser = new marked.Parser(marked_options); + +/** + * Class that wraps marked, adding the ability to see whether + * a given message actually uses any markdown syntax or whether + * it's plain text. + */ +export default class Markdown { + constructor(input) { + const lexer = new marked.Lexer(marked_options); + this.tokens = lexer.lex(input); + } + + _copyTokens() { + // copy tokens (the parser modifies it's input arg) + const tokens_copy = this.tokens.slice(); + // it also has a 'links' property, because this is javascript + // and why wouldn't you have an array that also has properties? + return Object.assign(tokens_copy, this.tokens); + } + + isPlainText() { + // we determine if the message requires markdown by + // running the parser on the tokens with a dummy + // rendered and seeing if any of the renderer's + // functions are called other than those noted below. + // In case you were wondering, no we can't just examine + // the tokens because the tokens we have are only the + // output of the *first* tokenizer: any line-based + // markdown is processed by marked within Parser by + // the 'inline lexer'... + let is_plain = true; + + function setNotPlain() { + is_plain = false; + } + + const dummyRenderer = {}; + for (const k of Object.keys(marked.Renderer.prototype)) { + dummyRenderer[k] = setNotPlain; + } + // text and paragraph are just text + dummyRenderer.text = function(t){return t;} + dummyRenderer.paragraph = function(t){return t;} + + // ignore links where text is just the url: + // this ignores plain URLs that markdown has + // detected whilst preserving markdown syntax links + dummyRenderer.link = function(href, title, text) { + if (text != href) { + is_plain = false; + } + } + + const dummyOptions = {}; + Object.assign(dummyOptions, marked_options, { + renderer: dummyRenderer, + }); + const dummyParser = new marked.Parser(dummyOptions); + dummyParser.parse(this._copyTokens()); + + return is_plain; + } + + toHTML() { + return real_parser.parse(this._copyTokens()); + } +} diff --git a/src/components/views/rooms/MessageComposerInputOld.js b/src/components/views/rooms/MessageComposerInputOld.js index 20b57fb246..5a1ca4e461 100644 --- a/src/components/views/rooms/MessageComposerInputOld.js +++ b/src/components/views/rooms/MessageComposerInputOld.js @@ -16,6 +16,18 @@ var React = require("react"); var marked = require("marked"); + +var marked_options = { + renderer: new marked.Renderer(), + gfm: true, + tables: true, + breaks: true, + pedantic: false, + sanitize: true, + smartLists: true, + smartypants: false +}; + marked.setOptions({ renderer: new marked.Renderer(), gfm: true, @@ -35,24 +47,12 @@ var sdk = require('../../../index'); var dis = require("../../../dispatcher"); var KeyCode = require("../../../KeyCode"); +var Markdown = require("../../../Markdown"); var TYPING_USER_TIMEOUT = 10000; var TYPING_SERVER_TIMEOUT = 30000; var MARKDOWN_ENABLED = true; -function mdownToHtml(mdown) { - var html = marked(mdown) || ""; - html = html.trim(); - // strip start and end

tags else you get 'orrible spacing - if (html.indexOf("

") === 0) { - html = html.substring("

".length); - } - if (html.lastIndexOf("

") === (html.length - "

".length)) { - html = html.substring(0, html.length - "

".length); - } - return html; -} - /* * The textInput part of the MessageComposer */ @@ -341,8 +341,15 @@ module.exports = React.createClass({ contentText = contentText.substring(1); } - var htmlText; - if (this.markdownEnabled && (htmlText = mdownToHtml(contentText)) !== contentText) { + let send_markdown = false; + let mdown; + if (this.markdownEnabled) { + mdown = new Markdown(contentText); + send_markdown = !mdown.isPlainText(); + } + + if (send_markdown) { + const htmlText = mdown.toHTML(); sendMessagePromise = isEmote ? MatrixClientPeg.get().sendHtmlEmote(this.props.room.roomId, contentText, htmlText) : MatrixClientPeg.get().sendHtmlMessage(this.props.room.roomId, contentText, htmlText); From 0eddea19378d60386f31d43592d67acbf132e4c6 Mon Sep 17 00:00:00 2001 From: David Baker Date: Thu, 22 Sep 2016 17:51:34 +0100 Subject: [PATCH 2/6] Disable link detection, as per comment --- src/Markdown.js | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/src/Markdown.js b/src/Markdown.js index e4161c0b76..0151d0ae77 100644 --- a/src/Markdown.js +++ b/src/Markdown.js @@ -16,10 +16,25 @@ limitations under the License. import marked from 'marked'; +// replace the default link renderer function +// to prevent marked from turning plain URLs +// into links, because tits algorithm is fairly +// poor, so let's send plain URLs rather than +// badly linkified ones (the linkifier Vector +// uses on message display is way better, eg. +// handles URLs with closing parens at the end). +const renderer = new marked.Renderer(); +renderer.link = function(href, title, text) { + if (text == href) { + return href; + } + return marked.Renderer.prototype.apply(this, arguments); +} + // marked only applies the default options on the high // level marked() interface, so we do it here. -const marked_options = Object.assign({}, { - renderer: new marked.Renderer(), +const marked_options = Object.assign({}, marked.defaults, { + renderer: renderer, gfm: true, tables: true, breaks: true, @@ -27,7 +42,7 @@ const marked_options = Object.assign({}, { sanitize: true, smartLists: true, smartypants: false, -}, marked.defaults); +}); const real_parser = new marked.Parser(marked_options); From ca240b0b852a2e980bfc5d4b62ccb379e3cf2a9e Mon Sep 17 00:00:00 2001 From: David Baker Date: Thu, 22 Sep 2016 18:17:02 +0100 Subject: [PATCH 3/6] Don't wrap everything in p tags Preserves the old behaviour of not wrapping everything in p tags, but also returns valid markup if the resulting markdown contains multiple paragraphs (previously it stripped the

from the start and the

from the end, leaving closing and opening paragraph tags in the middle of the returned markup). Also turn on the 'xhtml' option so marked uses self-closing tags for br's, hr's and so forth. --- src/Markdown.js | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/Markdown.js b/src/Markdown.js index 0151d0ae77..785aa4abfd 100644 --- a/src/Markdown.js +++ b/src/Markdown.js @@ -30,6 +30,14 @@ renderer.link = function(href, title, text) { } return marked.Renderer.prototype.apply(this, arguments); } +const PARAGRAPH_SUFFIX = '

'; +// suffix paragraphs with double line breaks instead of +// wrapping them in 'p' tags: this makes it much easier +// for us to just strip one set of these off at the end, +// leaving valid markup if there were multiple paragraphs. +renderer.paragraph = function(text) { + return text + PARAGRAPH_SUFFIX; +} // marked only applies the default options on the high // level marked() interface, so we do it here. @@ -42,6 +50,7 @@ const marked_options = Object.assign({}, marked.defaults, { sanitize: true, smartLists: true, smartypants: false, + xhtml: true, // return self closing tags (ie.
not
) }); const real_parser = new marked.Parser(marked_options); @@ -109,6 +118,8 @@ export default class Markdown { } toHTML() { - return real_parser.parse(this._copyTokens()); + return real_parser.parse(this._copyTokens()).slice( + 0, 0 - PARAGRAPH_SUFFIX.length + ); } } From de0c92dadf9ce0a303bfe62f04739afa9adcb4be Mon Sep 17 00:00:00 2001 From: David Baker Date: Thu, 22 Sep 2016 18:37:14 +0100 Subject: [PATCH 4/6] Remove unused code --- .../views/rooms/MessageComposerInputOld.js | 24 ------------------- 1 file changed, 24 deletions(-) diff --git a/src/components/views/rooms/MessageComposerInputOld.js b/src/components/views/rooms/MessageComposerInputOld.js index 5a1ca4e461..28e3186c50 100644 --- a/src/components/views/rooms/MessageComposerInputOld.js +++ b/src/components/views/rooms/MessageComposerInputOld.js @@ -15,30 +15,6 @@ */ var React = require("react"); -var marked = require("marked"); - -var marked_options = { - renderer: new marked.Renderer(), - gfm: true, - tables: true, - breaks: true, - pedantic: false, - sanitize: true, - smartLists: true, - smartypants: false -}; - -marked.setOptions({ - renderer: new marked.Renderer(), - gfm: true, - tables: true, - breaks: true, - pedantic: false, - sanitize: true, - smartLists: true, - smartypants: false -}); - var MatrixClientPeg = require("../../../MatrixClientPeg"); var SlashCommands = require("../../../SlashCommands"); var Modal = require("../../../Modal"); From 6ba20ec012425319c3bda3c9eb3918be1de3ba1d Mon Sep 17 00:00:00 2001 From: David Baker Date: Thu, 22 Sep 2016 18:57:46 +0100 Subject: [PATCH 5/6] Better logic for wrapping in p tags or not --- src/Markdown.js | 81 +++++++++++++++++++++++++------------------------ 1 file changed, 42 insertions(+), 39 deletions(-) diff --git a/src/Markdown.js b/src/Markdown.js index 785aa4abfd..3a9cad6401 100644 --- a/src/Markdown.js +++ b/src/Markdown.js @@ -16,33 +16,9 @@ limitations under the License. import marked from 'marked'; -// replace the default link renderer function -// to prevent marked from turning plain URLs -// into links, because tits algorithm is fairly -// poor, so let's send plain URLs rather than -// badly linkified ones (the linkifier Vector -// uses on message display is way better, eg. -// handles URLs with closing parens at the end). -const renderer = new marked.Renderer(); -renderer.link = function(href, title, text) { - if (text == href) { - return href; - } - return marked.Renderer.prototype.apply(this, arguments); -} -const PARAGRAPH_SUFFIX = '

'; -// suffix paragraphs with double line breaks instead of -// wrapping them in 'p' tags: this makes it much easier -// for us to just strip one set of these off at the end, -// leaving valid markup if there were multiple paragraphs. -renderer.paragraph = function(text) { - return text + PARAGRAPH_SUFFIX; -} - // marked only applies the default options on the high // level marked() interface, so we do it here. const marked_options = Object.assign({}, marked.defaults, { - renderer: renderer, gfm: true, tables: true, breaks: true, @@ -53,8 +29,6 @@ const marked_options = Object.assign({}, marked.defaults, { xhtml: true, // return self closing tags (ie.
not
) }); -const real_parser = new marked.Parser(marked_options); - /** * Class that wraps marked, adding the ability to see whether * a given message actually uses any markdown syntax or whether @@ -90,36 +64,65 @@ export default class Markdown { is_plain = false; } - const dummyRenderer = {}; + const dummy_renderer = {}; for (const k of Object.keys(marked.Renderer.prototype)) { - dummyRenderer[k] = setNotPlain; + dummy_renderer[k] = setNotPlain; } // text and paragraph are just text - dummyRenderer.text = function(t){return t;} - dummyRenderer.paragraph = function(t){return t;} + dummy_renderer.text = function(t){return t;} + dummy_renderer.paragraph = function(t){return t;} // ignore links where text is just the url: // this ignores plain URLs that markdown has // detected whilst preserving markdown syntax links - dummyRenderer.link = function(href, title, text) { + dummy_renderer.link = function(href, title, text) { if (text != href) { is_plain = false; } } - const dummyOptions = {}; - Object.assign(dummyOptions, marked_options, { - renderer: dummyRenderer, + const dummy_options = Object.assign({}, marked_options, { + renderer: dummy_renderer, }); - const dummyParser = new marked.Parser(dummyOptions); - dummyParser.parse(this._copyTokens()); + const dummy_parser = new marked.Parser(dummy_options); + dummy_parser.parse(this._copyTokens()); return is_plain; } toHTML() { - return real_parser.parse(this._copyTokens()).slice( - 0, 0 - PARAGRAPH_SUFFIX.length - ); + const real_renderer = new marked.Renderer(); + real_renderer.link = function(href, title, text) { + // prevent marked from turning plain URLs + // into links, because tits algorithm is fairly + // poor. Let's send plain URLs rather than + // badly linkified ones (the linkifier Vector + // uses on message display is way better, eg. + // handles URLs with closing parens at the end). + if (text == href) { + return href; + } + return marked.Renderer.prototype.apply(this, arguments); + } + + real_renderer.paragraph = (text) => { + // The tokens at the top level are the 'blocks', so if we + // have more than one, there are multiple 'paragraphs'. + // If there is only one top level token, just return the + // bare text: it's a single line of text and so should be + // 'inline', rather than necessarily wrapped in its own + // p tag. If, however, we have multiple tokens, each gets + // its own p tag to keep them as separate paragraphs. + if (this.tokens.length == 1) { + return text; + } + return '

' + text + '

'; + } + + const real_options = Object.assign({}, marked_options, { + renderer: real_renderer, + }); + const real_parser = new marked.Parser(real_options); + return real_parser.parse(this._copyTokens()); } } From 90c9d51c7d191bd638a8dfee32dc6e614ea9ee63 Mon Sep 17 00:00:00 2001 From: David Baker Date: Fri, 23 Sep 2016 14:15:48 +0100 Subject: [PATCH 6/6] Typos --- src/Markdown.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Markdown.js b/src/Markdown.js index 3a9cad6401..f7b97cf621 100644 --- a/src/Markdown.js +++ b/src/Markdown.js @@ -41,7 +41,7 @@ export default class Markdown { } _copyTokens() { - // copy tokens (the parser modifies it's input arg) + // copy tokens (the parser modifies its input arg) const tokens_copy = this.tokens.slice(); // it also has a 'links' property, because this is javascript // and why wouldn't you have an array that also has properties? @@ -94,7 +94,7 @@ export default class Markdown { const real_renderer = new marked.Renderer(); real_renderer.link = function(href, title, text) { // prevent marked from turning plain URLs - // into links, because tits algorithm is fairly + // into links, because its algorithm is fairly // poor. Let's send plain URLs rather than // badly linkified ones (the linkifier Vector // uses on message display is way better, eg.