From ed1d212ae2daea5e4bd043417610177093e99f19 Mon Sep 17 00:00:00 2001 From: Aleksander Machniak <alec@alec.pl> Date: Sat, 16 Jan 2016 03:03:51 -0500 Subject: [PATCH] Improved SVG cleanup code --- program/lib/Roundcube/rcube_html2text.php | 64 +++++++++++++++++++++++--------- 1 files changed, 46 insertions(+), 18 deletions(-) diff --git a/program/lib/Roundcube/rcube_html2text.php b/program/lib/Roundcube/rcube_html2text.php index 3d32fe7..d20d7b7 100644 --- a/program/lib/Roundcube/rcube_html2text.php +++ b/program/lib/Roundcube/rcube_html2text.php @@ -136,13 +136,16 @@ * @see $replace */ protected $search = array( - "/\r/", // Non-legal carriage return - "/[\n\t]+/", // Newlines and tabs + '/\r/', // Non-legal carriage return + '/^.*<body[^>]*>\n*/i', // Anything before <body> '/<head[^>]*>.*?<\/head>/i', // <head> - '/<script[^>]*>.*?<\/script>/i', // <script>s -- which strip_tags supposedly has problems with - '/<style[^>]*>.*?<\/style>/i', // <style>s -- which strip_tags supposedly has problems with - '/<p[^>]*>/i', // <P> - '/<br[^>]*>/i', // <br> + '/<script[^>]*>.*?<\/script>/i', // <script> + '/<style[^>]*>.*?<\/style>/i', // <style> + '/[\n\t]+/', // Newlines and tabs + '/<p[^>]*>/i', // <p> + '/<\/p>[\s\n\t]*<div[^>]*>/i', // </p> before <div> + '/<br[^>]*>[\s\n\t]*<div[^>]*>/i', // <br> before <div> + '/<br[^>]*>\s*/i', // <br> '/<i[^>]*>(.*?)<\/i>/i', // <i> '/<em[^>]*>(.*?)<\/em>/i', // <em> '/(<ul[^>]*>|<\/ul>)/i', // <ul> and </ul> @@ -164,11 +167,14 @@ */ protected $replace = array( '', // Non-legal carriage return - ' ', // Newlines and tabs + '', // Anything before <body> '', // <head> - '', // <script>s -- which strip_tags supposedly has problems with - '', // <style>s -- which strip_tags supposedly has problems with - "\n\n", // <P> + '', // <script> + '', // <style> + ' ', // Newlines and tabs + "\n\n", // <p> + "\n<div>", // </p> before <div> + '<div>', // <br> before <div> "\n", // <br> '_\\1_', // <i> '_\\1_', // <em> @@ -216,7 +222,7 @@ * @see $ent_search */ protected $ent_replace = array( - ' ', // Non-breaking space + "\xC2\xA0", // Non-breaking space '"', // Double quotes "'", // Single quotes '>', @@ -423,7 +429,7 @@ // Variables used for building the link list $this->_link_list = array(); - $text = trim(stripslashes($this->html)); + $text = $this->html; // Convert HTML to TXT $this->_converter($text); @@ -473,6 +479,9 @@ // Replace known html entities $text = html_entity_decode($text, ENT_QUOTES, $this->charset); + // Replace unicode nbsp to regular spaces + $text = preg_replace('/\xC2\xA0/', ' ', $text); + // Remove unknown/unhandled entities (this cannot be done in search-and-replace block) $text = preg_replace('/&([a-zA-Z0-9]{2,6}|#[0-9]{2,4});/', '', $text); @@ -506,7 +515,7 @@ * @param string $link URL of the link * @param string $display Part of the text to associate number with */ - protected function _build_link_list( $link, $display ) + protected function _build_link_list($link, $display) { if (!$this->_do_links || empty($link)) { return $display; @@ -514,6 +523,11 @@ // Ignored link types if (preg_match('!^(javascript:|mailto:|#)!i', $link)) { + return $display; + } + + // skip links with href == content (#1490434) + if ($link === $display) { return $display; } @@ -597,28 +611,42 @@ // get blockquote content $body = trim(substr($text, $startpos, $end - $startpos)); + // adjust text wrapping width + $p_width = $this->width; + if ($this->width > 0) $this->width -= 2; + // replace content with inner blockquotes $this->_converter($body); + // resore text width + $this->width = $p_width; + // Add citation markers and create <pre> block - $body = preg_replace_callback('/((?:^|\n)>*)([^\n]*)/', array($this, 'blockquote_citation_ballback'), trim($body)); + $body = preg_replace_callback('/((?:^|\n)>*)([^\n]*)/', array($this, 'blockquote_citation_callback'), trim($body)); $body = '<pre>' . htmlspecialchars($body) . '</pre>'; - $text = substr($text, 0, $start) . $body . "\n" . substr($text, $end + 13); + $text = substr_replace($text, $body . "\n", $start, $end + 13 - $start); $offset = 0; + break; } - } while ($end || $next); + // abort on invalid tag structure (e.g. no closing tag found) + else { + break; + } + } + while ($end || $next); } } /** * Callback function to correctly add citation markers for blockquote contents */ - public function blockquote_citation_ballback($m) + public function blockquote_citation_callback($m) { - $line = ltrim($m[2]); + $line = ltrim($m[2]); $space = $line[0] == '>' ? '' : ' '; + return $m[1] . '>' . $space . $line; } -- Gitblit v1.9.1