Mercurial > wikked
diff static/js/pagedown/Markdown.Sanitizer.js @ 60:8250c977bc50
Moved static files to the root directory.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Tue, 05 Feb 2013 14:49:34 -0800 |
parents | wikked/static/js/pagedown/Markdown.Sanitizer.js@59cad6ce1a1c |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/static/js/pagedown/Markdown.Sanitizer.js Tue Feb 05 14:49:34 2013 -0800 @@ -0,0 +1,108 @@ +(function () { + var output, Converter; + if (typeof exports === "object" && typeof require === "function") { // we're in a CommonJS (e.g. Node.js) module + output = exports; + Converter = require("./Markdown.Converter").Converter; + } else { + output = window.Markdown; + Converter = output.Converter; + } + + output.getSanitizingConverter = function () { + var converter = new Converter(); + converter.hooks.chain("postConversion", sanitizeHtml); + converter.hooks.chain("postConversion", balanceTags); + return converter; + } + + function sanitizeHtml(html) { + return html.replace(/<[^>]*>?/gi, sanitizeTag); + } + + // (tags that can be opened/closed) | (tags that stand alone) + var basic_tag_whitelist = /^(<\/?(b|blockquote|code|del|dd|dl|dt|em|h1|h2|h3|i|kbd|li|ol|p|pre|s|sup|sub|strong|strike|ul)>|<(br|hr)\s?\/?>)$/i; + // <a href="url..." optional title>|</a> + var a_white = /^(<a\shref="((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+"(\stitle="[^"<>]+")?\s?>|<\/a>)$/i; + + // <img src="url..." optional width optional height optional alt optional title + var img_white = /^(<img\ssrc="(https?:\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+"(\swidth="\d{1,3}")?(\sheight="\d{1,3}")?(\salt="[^"<>]*")?(\stitle="[^"<>]*")?\s?\/?>)$/i; + + function sanitizeTag(tag) { + if (tag.match(basic_tag_whitelist) || tag.match(a_white) || tag.match(img_white)) + return tag; + else + return ""; + } + + /// <summary> + /// attempt to balance HTML tags in the html string + /// by removing any unmatched opening or closing tags + /// IMPORTANT: we *assume* HTML has *already* been + /// sanitized and is safe/sane before balancing! + /// + /// adapted from CODESNIPPET: A8591DBA-D1D3-11DE-947C-BA5556D89593 + /// </summary> + function balanceTags(html) { + + if (html == "") + return ""; + + var re = /<\/?\w+[^>]*(\s|$|>)/g; + // convert everything to lower case; this makes + // our case insensitive comparisons easier + var tags = html.toLowerCase().match(re); + + // no HTML tags present? nothing to do; exit now + var tagcount = (tags || []).length; + if (tagcount == 0) + return html; + + var tagname, tag; + var ignoredtags = "<p><img><br><li><hr>"; + var match; + var tagpaired = []; + var tagremove = []; + var needsRemoval = false; + + // loop through matched tags in forward order + for (var ctag = 0; ctag < tagcount; ctag++) { + tagname = tags[ctag].replace(/<\/?(\w+).*/, "$1"); + // skip any already paired tags + // and skip tags in our ignore list; assume they're self-closed + if (tagpaired[ctag] || ignoredtags.search("<" + tagname + ">") > -1) + continue; + + tag = tags[ctag]; + match = -1; + + if (!/^<\//.test(tag)) { + // this is an opening tag + // search forwards (next tags), look for closing tags + for (var ntag = ctag + 1; ntag < tagcount; ntag++) { + if (!tagpaired[ntag] && tags[ntag] == "</" + tagname + ">") { + match = ntag; + break; + } + } + } + + if (match == -1) + needsRemoval = tagremove[ctag] = true; // mark for removal + else + tagpaired[match] = true; // mark paired + } + + if (!needsRemoval) + return html; + + // delete all orphaned tags from the string + + var ctag = 0; + html = html.replace(re, function (match) { + var res = tagremove[ctag] ? "" : match; + ctag++; + return res; + }); + return html; + } +})();