| | |
| | | * @see $replace |
| | | */ |
| | | protected $search = array( |
| | | "/\r/", // Non-legal carriage return |
| | | "/[\n\t]+/", // Newlines and tabs |
| | | '/\r/', // Non-legal carriage return |
| | | '/^.*<body[^>]*>\n*/i', // Anything before <body> |
| | | '/<head[^>]*>.*?<\/head>/i', // <head> |
| | | '/<script[^>]*>.*?<\/script>/i', // <script>s -- which strip_tags supposedly has problems with |
| | | '/<style[^>]*>.*?<\/style>/i', // <style>s -- which strip_tags supposedly has problems with |
| | | '/<p[^>]*>/i', // <P> |
| | | '/<br[^>]*>/i', // <br> |
| | | '/<script[^>]*>.*?<\/script>/i', // <script> |
| | | '/<style[^>]*>.*?<\/style>/i', // <style> |
| | | '/[\n\t]+/', // Newlines and tabs |
| | | '/<p[^>]*>/i', // <p> |
| | | '/<\/p>[\s\n\t]*<div[^>]*>/i', // </p> before <div> |
| | | '/<br[^>]*>[\s\n\t]*<div[^>]*>/i', // <br> before <div> |
| | | '/<br[^>]*>\s*/i', // <br> |
| | | '/<i[^>]*>(.*?)<\/i>/i', // <i> |
| | | '/<em[^>]*>(.*?)<\/em>/i', // <em> |
| | | '/(<ul[^>]*>|<\/ul>)/i', // <ul> and </ul> |
| | |
| | | */ |
| | | protected $replace = array( |
| | | '', // Non-legal carriage return |
| | | ' ', // Newlines and tabs |
| | | '', // Anything before <body> |
| | | '', // <head> |
| | | '', // <script>s -- which strip_tags supposedly has problems with |
| | | '', // <style>s -- which strip_tags supposedly has problems with |
| | | "\n\n", // <P> |
| | | '', // <script> |
| | | '', // <style> |
| | | ' ', // Newlines and tabs |
| | | "\n\n", // <p> |
| | | "\n<div>", // </p> before <div> |
| | | '<div>', // <br> before <div> |
| | | "\n", // <br> |
| | | '_\\1_', // <i> |
| | | '_\\1_', // <em> |
| | |
| | | * @see $ent_search |
| | | */ |
| | | protected $ent_replace = array( |
| | | ' ', // Non-breaking space |
| | | "\xC2\xA0", // Non-breaking space |
| | | '"', // Double quotes |
| | | "'", // Single quotes |
| | | '>', |
| | |
| | | // Variables used for building the link list |
| | | $this->_link_list = array(); |
| | | |
| | | $text = trim(stripslashes($this->html)); |
| | | $text = $this->html; |
| | | |
| | | // Convert HTML to TXT |
| | | $this->_converter($text); |
| | |
| | | // Replace known html entities |
| | | $text = html_entity_decode($text, ENT_QUOTES, $this->charset); |
| | | |
| | | // Replace unicode nbsp to regular spaces |
| | | $text = preg_replace('/\xC2\xA0/', ' ', $text); |
| | | |
| | | // Remove unknown/unhandled entities (this cannot be done in search-and-replace block) |
| | | $text = preg_replace('/&([a-zA-Z0-9]{2,6}|#[0-9]{2,4});/', '', $text); |
| | | |
| | |
| | | * @param string $link URL of the link |
| | | * @param string $display Part of the text to associate number with |
| | | */ |
| | | protected function _build_link_list( $link, $display ) |
| | | protected function _build_link_list($link, $display) |
| | | { |
| | | if (!$this->_do_links || empty($link)) { |
| | | return $display; |
| | |
| | | |
| | | // Ignored link types |
| | | if (preg_match('!^(javascript:|mailto:|#)!i', $link)) { |
| | | return $display; |
| | | } |
| | | |
| | | // skip links with href == content (#1490434) |
| | | if ($link === $display) { |
| | | return $display; |
| | | } |
| | | |
| | |
| | | $this->width = $p_width; |
| | | |
| | | // Add citation markers and create <pre> block |
| | | $body = preg_replace_callback('/((?:^|\n)>*)([^\n]*)/', array($this, 'blockquote_citation_ballback'), trim($body)); |
| | | $body = preg_replace_callback('/((?:^|\n)>*)([^\n]*)/', array($this, 'blockquote_citation_callback'), trim($body)); |
| | | $body = '<pre>' . htmlspecialchars($body) . '</pre>'; |
| | | |
| | | $text = substr_replace($text, $body . "\n", $start, $end + 13 - $start); |
| | | $offset = 0; |
| | | |
| | | break; |
| | | } |
| | | // abort on invalid tag structure (e.g. no closing tag found) |
| | | else { |
| | | break; |
| | | } |
| | | } |
| | |
| | | /** |
| | | * Callback function to correctly add citation markers for blockquote contents |
| | | */ |
| | | public function blockquote_citation_ballback($m) |
| | | public function blockquote_citation_callback($m) |
| | | { |
| | | $line = ltrim($m[2]); |
| | | $space = $line[0] == '>' ? '' : ' '; |