From 25c8fe4eeb1e9c1a3055bb27984f1274f99fc1d7 Mon Sep 17 00:00:00 2001
From: Aleksander Machniak <alec@alec.pl>
Date: Wed, 17 Jun 2015 08:03:13 -0400
Subject: [PATCH] Fix handling of non-break spaces in html to text conversion (#1490436)

---
 program/lib/Roundcube/rcube_html2text.php |   20 ++++++++++++++++----
 1 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/program/lib/Roundcube/rcube_html2text.php b/program/lib/Roundcube/rcube_html2text.php
index 01362e6..a2f6288 100644
--- a/program/lib/Roundcube/rcube_html2text.php
+++ b/program/lib/Roundcube/rcube_html2text.php
@@ -142,7 +142,7 @@
         '/<script[^>]*>.*?<\/script>/i',         // <script>s -- which strip_tags supposedly has problems with
         '/<style[^>]*>.*?<\/style>/i',           // <style>s -- which strip_tags supposedly has problems with
         '/<p[^>]*>/i',                           // <P>
-        '/<br[^>]*>/i',                          // <br>
+        '/<br[^>]*>\s*/i',                       // <br>
         '/<i[^>]*>(.*?)<\/i>/i',                 // <i>
         '/<em[^>]*>(.*?)<\/em>/i',               // <em>
         '/(<ul[^>]*>|<\/ul>)/i',                 // <ul> and </ul>
@@ -216,7 +216,7 @@
      * @see $ent_search
      */
     protected $ent_replace = array(
-        ' ',                                    // Non-breaking space
+        "\xC2\xA0",                             // Non-breaking space
         '"',                                    // Double quotes
         "'",                                    // Single quotes
         '>',
@@ -423,7 +423,7 @@
         // Variables used for building the link list
         $this->_link_list = array();
 
-        $text = trim(stripslashes($this->html));
+        $text = $this->html;
 
         // Convert HTML to TXT
         $this->_converter($text);
@@ -473,6 +473,9 @@
         // Replace known html entities
         $text = html_entity_decode($text, ENT_QUOTES, $this->charset);
 
+        // Replace unicode nbsp to regular spaces
+        $text = preg_replace('/\xC2\xA0/', ' ', $text);
+
         // Remove unknown/unhandled entities (this cannot be done in search-and-replace block)
         $text = preg_replace('/&([a-zA-Z0-9]{2,6}|#[0-9]{2,4});/', '', $text);
 
@@ -506,7 +509,7 @@
      * @param string $link URL of the link
      * @param string $display Part of the text to associate number with
      */
-    protected function _build_link_list( $link, $display )
+    protected function _build_link_list($link, $display)
     {
         if (!$this->_do_links || empty($link)) {
             return $display;
@@ -514,6 +517,11 @@
 
         // Ignored link types
         if (preg_match('!^(javascript:|mailto:|#)!i', $link)) {
+            return $display;
+        }
+
+        // skip links with href == content (#1490434)
+        if ($link === $display) {
             return $display;
         }
 
@@ -616,6 +624,10 @@
 
                     break;
                 }
+                // abort on invalid tag structure (e.g. no closing tag found)
+                else {
+                    break;
+                }
             }
             while ($end || $next);
         }

--
Gitblit v1.9.1