| | |
| | | * out that extra spaces should be compressed--a problem addressed with |
| | | * Marcus Bointon's fixes but that I had not yet incorporated. |
| | | * |
| | | * Thanks to Daniel Schledermann (http://www.typoconsult.dk/) for |
| | | * Thanks to Daniel Schledermann (http://www.typoconsult.dk/) for |
| | | * suggesting a valuable fix with <a> tag handling. |
| | | * |
| | | * Thanks to Wojciech Bajon (again!) for suggesting fixes and additions, |
| | |
| | | var $ent_search = array( |
| | | '/&(nbsp|#160);/i', // Non-breaking space |
| | | '/&(quot|rdquo|ldquo|#8220|#8221|#147|#148);/i', |
| | | // Double quotes |
| | | // Double quotes |
| | | '/&(apos|rsquo|lsquo|#8216|#8217);/i', // Single quotes |
| | | '/>/i', // Greater-than |
| | | '/</i', // Less-than |
| | |
| | | * @access public |
| | | */ |
| | | var $callback_search = array( |
| | | '/<(a) [^>]*href=("|\')([^"\']+)\2[^>]*>(.*?)<\/a>/i', |
| | | // <a href=""> |
| | | '/<(h)[123456][^>]*>(.*?)<\/h[123456]>/i', // H1 - H3 |
| | | '/<(b)[^>]*>(.*?)<\/b>/i', // <b> |
| | | '/<(strong)[^>]*>(.*?)<\/strong>/i', // <strong> |
| | | '/<(th)[^>]*>(.*?)<\/th>/i', // <th> and </th> |
| | | '/<(a) [^>]*href=("|\')([^"\']+)\2[^>]*>(.*?)<\/a>/i', // <a href=""> |
| | | '/<(h)[123456]( [^>]*)?>(.*?)<\/h[123456]>/i', // h1 - h6 |
| | | '/<(b)( [^>]*)?>(.*?)<\/b>/i', // <b> |
| | | '/<(strong)( [^>]*)?>(.*?)<\/strong>/i', // <strong> |
| | | '/<(th)( [^>]*)?>(.*?)<\/th>/i', // <th> and </th> |
| | | ); |
| | | |
| | | /** |
| | |
| | | function set_html( $source, $from_file = false ) |
| | | { |
| | | if ( $from_file && file_exists($source) ) { |
| | | $this->html = file_get_contents($source); |
| | | $this->html = file_get_contents($source); |
| | | } |
| | | else |
| | | $this->html = $source; |
| | |
| | | function set_base_url( $url = '' ) |
| | | { |
| | | if ( empty($url) ) { |
| | | if ( !empty($_SERVER['HTTP_HOST']) ) { |
| | | $this->url = 'http://' . $_SERVER['HTTP_HOST']; |
| | | } else { |
| | | $this->url = ''; |
| | | } |
| | | if ( !empty($_SERVER['HTTP_HOST']) ) { |
| | | $this->url = 'http://' . $_SERVER['HTTP_HOST']; |
| | | } else { |
| | | $this->url = ''; |
| | | } |
| | | } else { |
| | | // Strip any trailing slashes for consistency (relative |
| | | // URLs may already start with a slash like "/file.html") |
| | |
| | | $text = preg_replace($this->ent_search, $this->ent_replace, $text); |
| | | |
| | | // Replace known html entities |
| | | $text = html_entity_decode($text, ENT_COMPAT, 'UTF-8'); |
| | | $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8'); |
| | | |
| | | // Remove unknown/unhandled entities (this cannot be done in search-and-replace block) |
| | | $text = preg_replace('/&([a-zA-Z0-9]{2,6}|#[0-9]{2,4});/', '', $text); |
| | |
| | | // for PHP versions >= 4.0.2. Default width is 75 |
| | | // If width is 0 or less, don't wrap the text. |
| | | if ( $this->width > 0 ) { |
| | | $text = wordwrap($text, $this->width); |
| | | $text = wordwrap($text, $this->width); |
| | | } |
| | | } |
| | | |
| | |
| | | */ |
| | | function _build_link_list( $link, $display ) |
| | | { |
| | | if (!$this->_do_links || empty($link)) { |
| | | return $display; |
| | | } |
| | | |
| | | // Ignored link types |
| | | if (preg_match('!^(javascript|mailto|#):!i', $link)) { |
| | | return $display; |
| | | if (!$this->_do_links || empty($link)) { |
| | | return $display; |
| | | } |
| | | |
| | | if (preg_match('!^(https?://)!i', $link)) { |
| | | // Ignored link types |
| | | if (preg_match('!^(javascript:|mailto:|#)!i', $link)) { |
| | | return $display; |
| | | } |
| | | |
| | | if (preg_match('!^([a-z][a-z0-9.+-]+:)!i', $link)) { |
| | | $url = $link; |
| | | } |
| | | else { |
| | |
| | | } |
| | | |
| | | if (($index = array_search($url, $this->_link_list)) === false) { |
| | | $this->_link_list[] = $url; |
| | | $index = count($this->_link_list); |
| | | $this->_link_list[] = $url; |
| | | } |
| | | |
| | | return $display . ' [' . ($index+1) . ']'; |
| | |
| | | { |
| | | // get the content of PRE element |
| | | while (preg_match('/<pre[^>]*>(.*)<\/pre>/ismU', $text, $matches)) { |
| | | $this->pre_content = $matches[1]; |
| | | |
| | | // Run our defined tags search-and-replace with callback |
| | | $this->pre_content = preg_replace_callback($this->callback_search, |
| | | array('html2text', '_preg_callback'), $this->pre_content); |
| | | |
| | | // convert the content |
| | | $this->pre_content = sprintf('<div><br>%s<br></div>', |
| | | preg_replace($this->pre_search, $this->pre_replace, $matches[1])); |
| | | preg_replace($this->pre_search, $this->pre_replace, $this->pre_content)); |
| | | |
| | | // replace the content (use callback because content can contain $0 variable) |
| | | $text = preg_replace_callback('/<pre[^>]*>.*<\/pre>/ismU', |
| | | $text = preg_replace_callback('/<pre[^>]*>.*<\/pre>/ismU', |
| | | array('html2text', '_preg_pre_callback'), $text, 1); |
| | | |
| | | // free memory |
| | | $this->pre_content = ''; |
| | | } |
| | |
| | | */ |
| | | private function _preg_callback($matches) |
| | | { |
| | | switch($matches[1]) { |
| | | switch (strtolower($matches[1])) { |
| | | case 'b': |
| | | case 'strong': |
| | | return $this->_toupper($matches[2]); |
| | | return $this->_toupper($matches[3]); |
| | | case 'th': |
| | | return $this->_toupper("\t\t". $matches[2] ."\n"); |
| | | return $this->_toupper("\t\t". $matches[3] ."\n"); |
| | | case 'h': |
| | | return $this->_toupper("\n\n". $matches[2] ."\n\n"); |
| | | return $this->_toupper("\n\n". $matches[3] ."\n\n"); |
| | | case 'a': |
| | | // Remove spaces in URL (#1487805) |
| | | $url = str_replace(' ', '', $matches[3]); |