Aleksander Machniak
2016-01-16 ed1d212ae2daea5e4bd043417610177093e99f19
program/lib/Roundcube/rcube_html2text.php
@@ -136,13 +136,16 @@
     * @see $replace
     */
    protected $search = array(
        "/\r/",                                  // Non-legal carriage return
        "/[\n\t]+/",                             // Newlines and tabs
        '/\r/',                                  // Non-legal carriage return
        '/^.*<body[^>]*>\n*/i',                  // Anything before <body>
        '/<head[^>]*>.*?<\/head>/i',             // <head>
        '/<script[^>]*>.*?<\/script>/i',         // <script>s -- which strip_tags supposedly has problems with
        '/<style[^>]*>.*?<\/style>/i',           // <style>s -- which strip_tags supposedly has problems with
        '/<p[^>]*>/i',                           // <P>
        '/<br[^>]*>/i',                          // <br>
        '/<script[^>]*>.*?<\/script>/i',         // <script>
        '/<style[^>]*>.*?<\/style>/i',           // <style>
        '/[\n\t]+/',                             // Newlines and tabs
        '/<p[^>]*>/i',                           // <p>
        '/<\/p>[\s\n\t]*<div[^>]*>/i',           // </p> before <div>
        '/<br[^>]*>[\s\n\t]*<div[^>]*>/i',       // <br> before <div>
        '/<br[^>]*>\s*/i',                       // <br>
        '/<i[^>]*>(.*?)<\/i>/i',                 // <i>
        '/<em[^>]*>(.*?)<\/em>/i',               // <em>
        '/(<ul[^>]*>|<\/ul>)/i',                 // <ul> and </ul>
@@ -164,11 +167,14 @@
     */
    protected $replace = array(
        '',                                     // Non-legal carriage return
        ' ',                                    // Newlines and tabs
        '',                                     // Anything before <body>
        '',                                     // <head>
        '',                                     // <script>s -- which strip_tags supposedly has problems with
        '',                                     // <style>s -- which strip_tags supposedly has problems with
        "\n\n",                                 // <P>
        '',                                     // <script>
        '',                                     // <style>
        ' ',                                    // Newlines and tabs
        "\n\n",                                 // <p>
        "\n<div>",                              // </p> before <div>
        '<div>',                                // <br> before <div>
        "\n",                                   // <br>
        '_\\1_',                                // <i>
        '_\\1_',                                // <em>
@@ -216,7 +222,7 @@
     * @see $ent_search
     */
    protected $ent_replace = array(
        ' ',                                    // Non-breaking space
        "\xC2\xA0",                             // Non-breaking space
        '"',                                    // Double quotes
        "'",                                    // Single quotes
        '>',
@@ -423,7 +429,7 @@
        // Variables used for building the link list
        $this->_link_list = array();
        $text = trim(stripslashes($this->html));
        $text = $this->html;
        // Convert HTML to TXT
        $this->_converter($text);
@@ -473,6 +479,9 @@
        // Replace known html entities
        $text = html_entity_decode($text, ENT_QUOTES, $this->charset);
        // Replace unicode nbsp to regular spaces
        $text = preg_replace('/\xC2\xA0/', ' ', $text);
        // Remove unknown/unhandled entities (this cannot be done in search-and-replace block)
        $text = preg_replace('/&([a-zA-Z0-9]{2,6}|#[0-9]{2,4});/', '', $text);
@@ -506,7 +515,7 @@
     * @param string $link URL of the link
     * @param string $display Part of the text to associate number with
     */
    protected function _build_link_list( $link, $display )
    protected function _build_link_list($link, $display)
    {
        if (!$this->_do_links || empty($link)) {
            return $display;
@@ -514,6 +523,11 @@
        // Ignored link types
        if (preg_match('!^(javascript:|mailto:|#)!i', $link)) {
            return $display;
        }
        // skip links with href == content (#1490434)
        if ($link === $display) {
            return $display;
        }
@@ -597,28 +611,42 @@
                    // get blockquote content
                    $body = trim(substr($text, $startpos, $end - $startpos));
                    // adjust text wrapping width
                    $p_width = $this->width;
                    if ($this->width > 0) $this->width -= 2;
                    // replace content with inner blockquotes
                    $this->_converter($body);
                    // resore text width
                    $this->width = $p_width;
                    // Add citation markers and create <pre> block
                    $body = preg_replace_callback('/((?:^|\n)>*)([^\n]*)/', array($this, 'blockquote_citation_ballback'), trim($body));
                    $body = preg_replace_callback('/((?:^|\n)>*)([^\n]*)/', array($this, 'blockquote_citation_callback'), trim($body));
                    $body = '<pre>' . htmlspecialchars($body) . '</pre>';
                    $text = substr($text, 0, $start) . $body . "\n" . substr($text, $end + 13);
                    $text = substr_replace($text, $body . "\n", $start, $end + 13 - $start);
                    $offset = 0;
                    break;
                }
            } while ($end || $next);
                // abort on invalid tag structure (e.g. no closing tag found)
                else {
                    break;
                }
            }
            while ($end || $next);
        }
    }
    /**
     * Callback function to correctly add citation markers for blockquote contents
     */
    public function blockquote_citation_ballback($m)
    public function blockquote_citation_callback($m)
    {
        $line = ltrim($m[2]);
        $line  = ltrim($m[2]);
        $space = $line[0] == '>' ? '' : ' ';
        return $m[1] . '>' . $space . $line;
    }