From 25c8fe4eeb1e9c1a3055bb27984f1274f99fc1d7 Mon Sep 17 00:00:00 2001 From: Aleksander Machniak <alec@alec.pl> Date: Wed, 17 Jun 2015 08:03:13 -0400 Subject: [PATCH] Fix handling of non-break spaces in html to text conversion (#1490436) --- program/lib/Roundcube/rcube_html2text.php | 20 ++++++++++++++++---- 1 files changed, 16 insertions(+), 4 deletions(-) diff --git a/program/lib/Roundcube/rcube_html2text.php b/program/lib/Roundcube/rcube_html2text.php index 01362e6..a2f6288 100644 --- a/program/lib/Roundcube/rcube_html2text.php +++ b/program/lib/Roundcube/rcube_html2text.php @@ -142,7 +142,7 @@ '/<script[^>]*>.*?<\/script>/i', // <script>s -- which strip_tags supposedly has problems with '/<style[^>]*>.*?<\/style>/i', // <style>s -- which strip_tags supposedly has problems with '/<p[^>]*>/i', // <P> - '/<br[^>]*>/i', // <br> + '/<br[^>]*>\s*/i', // <br> '/<i[^>]*>(.*?)<\/i>/i', // <i> '/<em[^>]*>(.*?)<\/em>/i', // <em> '/(<ul[^>]*>|<\/ul>)/i', // <ul> and </ul> @@ -216,7 +216,7 @@ * @see $ent_search */ protected $ent_replace = array( - ' ', // Non-breaking space + "\xC2\xA0", // Non-breaking space '"', // Double quotes "'", // Single quotes '>', @@ -423,7 +423,7 @@ // Variables used for building the link list $this->_link_list = array(); - $text = trim(stripslashes($this->html)); + $text = $this->html; // Convert HTML to TXT $this->_converter($text); @@ -473,6 +473,9 @@ // Replace known html entities $text = html_entity_decode($text, ENT_QUOTES, $this->charset); + // Replace unicode nbsp to regular spaces + $text = preg_replace('/\xC2\xA0/', ' ', $text); + // Remove unknown/unhandled entities (this cannot be done in search-and-replace block) $text = preg_replace('/&([a-zA-Z0-9]{2,6}|#[0-9]{2,4});/', '', $text); @@ -506,7 +509,7 @@ * @param string $link URL of the link * @param string $display Part of the text to associate number with */ - protected function _build_link_list( $link, $display ) + protected function _build_link_list($link, $display) { if (!$this->_do_links || empty($link)) { return $display; @@ -514,6 +517,11 @@ // Ignored link types if (preg_match('!^(javascript:|mailto:|#)!i', $link)) { + return $display; + } + + // skip links with href == content (#1490434) + if ($link === $display) { return $display; } @@ -616,6 +624,10 @@ break; } + // abort on invalid tag structure (e.g. no closing tag found) + else { + break; + } } while ($end || $next); } -- Gitblit v1.9.1