| | |
| | | { |
| | | // special replacements (not properly handled by washtml class) |
| | | $html_search = array( |
| | | '/(<\/nobr>)(\s+)(<nobr>)/i', // space(s) between <NOBR> |
| | | '/<title[^>]*>[^<]*<\/title>/i', // PHP bug #32547 workaround: remove title tag |
| | | '/^(\0\0\xFE\xFF|\xFF\xFE\0\0|\xFE\xFF|\xFF\xFE|\xEF\xBB\xBF)/', // byte-order mark (only outlook?) |
| | | '/<html\s[^>]+>/i', // washtml/DOMDocument cannot handle xml namespaces |
| | | // space(s) between <NOBR> |
| | | '/(<\/nobr>)(\s+)(<nobr>)/i', |
| | | // PHP bug #32547 workaround: remove title tag |
| | | '/<title[^>]*>[^<]*<\/title>/i', |
| | | // remove <!doctype> before BOM (#1490291) |
| | | '/<\!doctype[^>]+>[^<]*/im', |
| | | // byte-order mark (only outlook?) |
| | | '/^(\0\0\xFE\xFF|\xFF\xFE\0\0|\xFE\xFF|\xFF\xFE|\xEF\xBB\xBF)/', |
| | | // washtml/DOMDocument cannot handle xml namespaces |
| | | '/<html\s[^>]+>/i', |
| | | ); |
| | | |
| | | $html_replace = array( |
| | | '\\1'.' '.'\\3', |
| | | '', |
| | | '', |
| | | '', |
| | | '<html>', |
| | | ); |
| | | $html = preg_replace($html_search, $html_replace, trim($html)); |