From 63d6e6dfc35e6d82c4a64f37c408794c163becd4 Mon Sep 17 00:00:00 2001 From: thomascube <thomas@roundcube.net> Date: Wed, 28 Sep 2011 15:16:41 -0400 Subject: [PATCH] Bump versions to 0.6 stable --- program/lib/html2text.php | 47 ++++++++++++++++++++++++++++++++++++----------- 1 files changed, 36 insertions(+), 11 deletions(-) diff --git a/program/lib/html2text.php b/program/lib/html2text.php index aa5df0e..1ab1605 100644 --- a/program/lib/html2text.php +++ b/program/lib/html2text.php @@ -167,7 +167,6 @@ '/&(apos|rsquo|lsquo|#8216|#8217);/i', // Single quotes '/>/i', // Greater-than '/</i', // Less-than - '/&(amp|#38);/i', // Ampersand '/&(copy|#169);/i', // Copyright '/&(trade|#8482|#153);/i', // Trademark '/&(reg|#174);/i', // Registered @@ -176,6 +175,7 @@ '/&(bull|#149|#8226);/i', // Bullet '/&(pound|#163);/i', // Pound sign '/&(euro|#8364);/i', // Euro sign + '/&(amp|#38);/i', // Ampersand: see _converter() '/[ ]{2,}/' // Runs of spaces, post-handling ); @@ -210,7 +210,6 @@ "'", // Single quotes '>', '<', - '&', '(c)', '(tm)', '(R)', @@ -219,6 +218,7 @@ '*', '£', 'EUR', // Euro sign. � ? + '|+|amp|+|', // Ampersand: see _converter() ' ' // Runs of spaces, post-handling ); @@ -502,7 +502,11 @@ $text = preg_replace_callback($this->callback_search, array('html2text', '_preg_callback'), $text); // Remove unknown/unhandled entities (this cannot be done in search-and-replace block) - $text = preg_replace('/&#?[a-z0-9]{2,7};/i', '', $text); + $text = preg_replace('/&([a-zA-Z0-9]{2,6}|#[0-9]{2,4});/', '', $text); + + // Convert "|+|amp|+|" into "&", need to be done after handling of unknown entities + // This properly handles situation of "&quot;" in input string + $text = str_replace('|+|amp|+|', '&', $text); // Strip any other HTML tags $text = strip_tags($text, $this->allowed_tags); @@ -510,6 +514,9 @@ // Bring down number of empty lines to 2 max $text = preg_replace("/\n\s+\n/", "\n\n", $text); $text = preg_replace("/[\n]{3,}/", "\n\n", $text); + + // remove leading empty lines (can be produced by eg. P tag on the beginning) + $text = preg_replace('/^\n+/', '', $text); // Wrap the text to a readable format // for PHP versions >= 4.0.2. Default width is 75 @@ -568,9 +575,16 @@ */ function _convert_pre(&$text) { + // get the content of PRE element while (preg_match('/<pre[^>]*>(.*)<\/pre>/ismU', $text, $matches)) { - $result = preg_replace($this->pre_search, $this->pre_replace, $matches[1]); - $text = preg_replace('/<pre[^>]*>.*<\/pre>/ismU', '<div><br>' . $result . '<br></div>', $text, 1); + // convert the content + $this->pre_content = sprintf('<div><br>%s<br></div>', + preg_replace($this->pre_search, $this->pre_replace, $matches[1])); + // replace the content (use callback because content can contain $0 variable) + $text = preg_replace_callback('/<pre[^>]*>.*<\/pre>/ismU', + array('html2text', '_preg_pre_callback'), $text, 1); + // free memory + $this->pre_content = ''; } } @@ -635,9 +649,8 @@ * * @param array PREG matches * @return string - * @access private */ - function _preg_callback($matches) + private function _preg_callback($matches) { switch($matches[1]) { case 'b': @@ -648,18 +661,30 @@ case 'h': return $this->_strtoupper("\n\n". $matches[2] ."\n\n"); case 'a': - return $this->_build_link_list($matches[3], $matches[4]); + // Remove spaces in URL (#1487805) + $url = str_replace(' ', '', $matches[3]); + return $this->_build_link_list($url, $matches[4]); } } - + + /** + * Callback function for preg_replace_callback use in PRE content handler. + * + * @param array PREG matches + * @return string + */ + private function _preg_pre_callback($matches) + { + return $this->pre_content; + } + /** * Strtoupper multibyte wrapper function * * @param string * @return string - * @access private */ - function _strtoupper($str) + private function _strtoupper($str) { if (function_exists('mb_strtoupper')) return mb_strtoupper($str); -- Gitblit v1.9.1