Aleksander Machniak
2016-05-22 77b5d7ee304a688a2eb115ce04b460b43c0dd700
program/lib/Roundcube/rcube_html2text.php
@@ -136,13 +136,16 @@
     * @see $replace
     */
    protected $search = array(
        "/\r/",                                  // Non-legal carriage return
        "/[\n\t]+/",                             // Newlines and tabs
        '/<head[^>]*>.*?<\/head>/i',             // <head>
        '/<script[^>]*>.*?<\/script>/i',         // <script>s -- which strip_tags supposedly has problems with
        '/<style[^>]*>.*?<\/style>/i',           // <style>s -- which strip_tags supposedly has problems with
        '/<p[^>]*>/i',                           // <P>
        '/<br[^>]*>/i',                          // <br>
        '/\r/',                                  // Non-legal carriage return
        '/^.*<body[^>]*>\n*/is',                 // Anything before <body>
        '/<head[^>]*>.*?<\/head>/is',            // <head>
        '/<script[^>]*>.*?<\/script>/is',        // <script>
        '/<style[^>]*>.*?<\/style>/is',          // <style>
        '/[\n\t]+/',                             // Newlines and tabs
        '/<p[^>]*>/i',                           // <p>
        '/<\/p>[\s\n\t]*<div[^>]*>/i',           // </p> before <div>
        '/<br[^>]*>[\s\n\t]*<div[^>]*>/i',       // <br> before <div>
        '/<br[^>]*>\s*/i',                       // <br>
        '/<i[^>]*>(.*?)<\/i>/i',                 // <i>
        '/<em[^>]*>(.*?)<\/em>/i',               // <em>
        '/(<ul[^>]*>|<\/ul>)/i',                 // <ul> and </ul>
@@ -164,11 +167,14 @@
     */
    protected $replace = array(
        '',                                     // Non-legal carriage return
        ' ',                                    // Newlines and tabs
        '',                                     // Anything before <body>
        '',                                     // <head>
        '',                                     // <script>s -- which strip_tags supposedly has problems with
        '',                                     // <style>s -- which strip_tags supposedly has problems with
        "\n\n",                                 // <P>
        '',                                     // <script>
        '',                                     // <style>
        ' ',                                    // Newlines and tabs
        "\n\n",                                 // <p>
        "\n<div>",                              // </p> before <div>
        '<div>',                                // <br> before <div>
        "\n",                                   // <br>
        '_\\1_',                                // <i>
        '_\\1_',                                // <em>
@@ -216,7 +222,7 @@
     * @see $ent_search
     */
    protected $ent_replace = array(
        ' ',                                    // Non-breaking space
        "\xC2\xA0",                             // Non-breaking space
        '"',                                    // Double quotes
        "'",                                    // Single quotes
        '>',
@@ -423,7 +429,7 @@
        // Variables used for building the link list
        $this->_link_list = array();
        $text = trim(stripslashes($this->html));
        $text = $this->html;
        // Convert HTML to TXT
        $this->_converter($text);
@@ -473,6 +479,9 @@
        // Replace known html entities
        $text = html_entity_decode($text, ENT_QUOTES, $this->charset);
        // Replace unicode nbsp to regular spaces
        $text = preg_replace('/\xC2\xA0/', ' ', $text);
        // Remove unknown/unhandled entities (this cannot be done in search-and-replace block)
        $text = preg_replace('/&([a-zA-Z0-9]{2,6}|#[0-9]{2,4});/', '', $text);
@@ -506,7 +515,7 @@
     * @param string $link URL of the link
     * @param string $display Part of the text to associate number with
     */
    protected function _build_link_list( $link, $display )
    protected function _build_link_list($link, $display)
    {
        if (!$this->_do_links || empty($link)) {
            return $display;
@@ -514,6 +523,11 @@
        // Ignored link types
        if (preg_match('!^(javascript:|mailto:|#)!i', $link)) {
            return $display;
        }
        // skip links with href == content (#1490434)
        if ($link === $display) {
            return $display;
        }
@@ -616,6 +630,10 @@
                    break;
                }
                // abort on invalid tag structure (e.g. no closing tag found)
                else {
                    break;
                }
            }
            while ($end || $next);
        }