From 759566fe99fe1fcf2857afc51d0b343b67aa4d44 Mon Sep 17 00:00:00 2001 From: Aleksander Machniak <alec@alec.pl> Date: Fri, 27 Feb 2015 03:13:39 -0500 Subject: [PATCH] Fix parsing invalid HTML messages with BOM after <!DOCTYPE> (#1490291) --- program/lib/Roundcube/rcube_washtml.php | 15 +++++++++++---- 1 files changed, 11 insertions(+), 4 deletions(-) diff --git a/program/lib/Roundcube/rcube_washtml.php b/program/lib/Roundcube/rcube_washtml.php index e0cce68..b042f5f 100644 --- a/program/lib/Roundcube/rcube_washtml.php +++ b/program/lib/Roundcube/rcube_washtml.php @@ -403,16 +403,23 @@ { // special replacements (not properly handled by washtml class) $html_search = array( - '/(<\/nobr>)(\s+)(<nobr>)/i', // space(s) between <NOBR> - '/<title[^>]*>[^<]*<\/title>/i', // PHP bug #32547 workaround: remove title tag - '/^(\0\0\xFE\xFF|\xFF\xFE\0\0|\xFE\xFF|\xFF\xFE|\xEF\xBB\xBF)/', // byte-order mark (only outlook?) - '/<html\s[^>]+>/i', // washtml/DOMDocument cannot handle xml namespaces + // space(s) between <NOBR> + '/(<\/nobr>)(\s+)(<nobr>)/i', + // PHP bug #32547 workaround: remove title tag + '/<title[^>]*>[^<]*<\/title>/i', + // remove <!doctype> before BOM (#1490291) + '/<\!doctype[^>]+>[^<]*/im', + // byte-order mark (only outlook?) + '/^(\0\0\xFE\xFF|\xFF\xFE\0\0|\xFE\xFF|\xFF\xFE|\xEF\xBB\xBF)/', + // washtml/DOMDocument cannot handle xml namespaces + '/<html\s[^>]+>/i', ); $html_replace = array( '\\1'.' '.'\\3', '', '', + '', '<html>', ); $html = preg_replace($html_search, $html_replace, trim($html)); -- Gitblit v1.9.1