From 8e7f32fddc7eb5752c30c97005c71cf102cb5021 Mon Sep 17 00:00:00 2001
From: Aleksander Machniak <alec@alec.pl>
Date: Fri, 30 Oct 2015 15:26:12 -0400
Subject: [PATCH] Small improvements in HTML to text conversion.

---
 program/lib/Roundcube/rcube_html2text.php |   33 ++++++++++++++++++++++-----------
 1 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/program/lib/Roundcube/rcube_html2text.php b/program/lib/Roundcube/rcube_html2text.php
index 00a59a7..8072c3e 100644
--- a/program/lib/Roundcube/rcube_html2text.php
+++ b/program/lib/Roundcube/rcube_html2text.php
@@ -136,12 +136,15 @@
      * @see $replace
      */
     protected $search = array(
-        "/\r/",                                  // Non-legal carriage return
-        "/[\n\t]+/",                             // Newlines and tabs
+        '/\r/',                                  // Non-legal carriage return
+        '/^.*<body[^>]*>\n*/i',                  // Anything before <body>
         '/<head[^>]*>.*?<\/head>/i',             // <head>
-        '/<script[^>]*>.*?<\/script>/i',         // <script>s -- which strip_tags supposedly has problems with
-        '/<style[^>]*>.*?<\/style>/i',           // <style>s -- which strip_tags supposedly has problems with
-        '/<p[^>]*>/i',                           // <P>
+        '/<script[^>]*>.*?<\/script>/i',         // <script>
+        '/<style[^>]*>.*?<\/style>/i',           // <style>
+        '/[\n\t]+/',                             // Newlines and tabs
+        '/<p[^>]*>/i',                           // <p>
+        '/<\/p>[\s\n\t]*<div[^>]*>/i',           // </p> before <div>
+        '/<br[^>]*>[\s\n\t]*<div[^>]*>/i',       // <br> before <div>
         '/<br[^>]*>\s*/i',                       // <br>
         '/<i[^>]*>(.*?)<\/i>/i',                 // <i>
         '/<em[^>]*>(.*?)<\/em>/i',               // <em>
@@ -164,11 +167,14 @@
      */
     protected $replace = array(
         '',                                     // Non-legal carriage return
-        ' ',                                    // Newlines and tabs
+        '',                                     // Anything before <body>
         '',                                     // <head>
-        '',                                     // <script>s -- which strip_tags supposedly has problems with
-        '',                                     // <style>s -- which strip_tags supposedly has problems with
-        "\n\n",                                 // <P>
+        '',                                     // <script>
+        '',                                     // <style>
+        ' ',                                    // Newlines and tabs
+        "\n\n",                                 // <p>
+        "\n<div>",                              // </p> before <div>
+        '<div>',                                // <br> before <div>
         "\n",                                   // <br>
         '_\\1_',                                // <i>
         '_\\1_',                                // <em>
@@ -216,7 +222,7 @@
      * @see $ent_search
      */
     protected $ent_replace = array(
-        ' ',                                    // Non-breaking space
+        "\xC2\xA0",                             // Non-breaking space
         '"',                                    // Double quotes
         "'",                                    // Single quotes
         '>',
@@ -509,7 +515,7 @@
      * @param string $link URL of the link
      * @param string $display Part of the text to associate number with
      */
-    protected function _build_link_list( $link, $display )
+    protected function _build_link_list($link, $display)
     {
         if (!$this->_do_links || empty($link)) {
             return $display;
@@ -520,6 +526,11 @@
             return $display;
         }
 
+        // skip links with href == content (#1490434)
+        if ($link === $display) {
+            return $display;
+        }
+
         if (preg_match('!^([a-z][a-z0-9.+-]+:)!i', $link)) {
             $url = $link;
         }

--
Gitblit v1.9.1