githubFork/roundcubemail.git

			@@ -184,7 +184,7 @@
			'\|rgb$\s[0-9]+\s,\s[0-9]+\s,\s[0-9]+\s$'.
			'\|-?[0-9.]+\s*(em\|ex\|px\|cm\|mm\|in\|pt\|pc\|deg\|rad\|grad\|ms\|s\|hz\|khz\|%)?'.
			'\|#[0-9a-f]{3,6}'.
			'\|[a-z0-9", -]+'.
			'\|[a-z0-9"\', -]+'.
			')\s*/i', $str, $match)
			) {
			if ($match[2]) {
			@@ -283,10 +283,12 @@

			/**
			* The main loop that recurse on a node tree.
			* It output only allowed tags with allowed attributes
			* and allowed inline styles
			* It output only allowed tags with allowed attributes and allowed inline styles
			*
			* @param DOMNode $node HTML element
			* @param int $level Recurrence level (safe initial value found empirically)
			*/
			private function dumpHtml($node, $level = 0)
			private function dumpHtml($node, $level = 20)
			{
			if (!$node->hasChildNodes()) {
			return '';
			@@ -418,7 +420,7 @@
			$html = preg_replace($html_search, $html_replace, trim($html));

			//-> Replace all of those weird MS Word quotes and other high characters
			$badwordchars=array(
			$badwordchars = array(
			"\xe2\x80\x98", // left single quote
			"\xe2\x80\x99", // right single quote
			"\xe2\x80\x9c", // left double quote
			@@ -426,7 +428,7 @@
			"\xe2\x80\x94", // em dash
			"\xe2\x80\xa6" // elipses
			);
			$fixedwordchars=array(
			$fixedwordchars = array(
			"'",
			"'",
			'"',
			@@ -434,7 +436,7 @@
			'—',
			'...'
			);
			$html = str_replace($badwordchars,$fixedwordchars, $html);
			$html = str_replace($badwordchars, $fixedwordchars, $html);

			// PCRE errors handling (#1486856), should we use something like for every preg_* use?
			if ($html === null && ($preg_error = preg_last_error()) != PREG_NO_ERROR) {
			@@ -455,12 +457,15 @@
			}

			// fix (unknown/malformed) HTML tags before "wash"
			$html = preg_replace_callback('/(<(?!\!)[\/]*)([^\s>]+)/', array($this, 'html_tag_callback'), $html);
			$html = preg_replace_callback('/(<(?!\!)[\/])([^\s>]+)([^>])/', array($this, 'html_tag_callback'), $html);

			// Remove invalid HTML comments (#1487759)
			// Don't remove valid conditional comments
			// Don't remove MSOutlook (<!-->) conditional comments (#1489004)
			$html = preg_replace('/<!--[^->\[\n]+>/', '', $html);

			// fix broken nested lists
			self::fix_broken_lists($html);

			// turn relative into absolute urls
			$html = self::resolve_base($html);
			@@ -479,7 +484,12 @@
			'/[^a-z0-9_\[\]\!-]/i', // forbidden characters
			), '', $tagname);

			return $matches[1] . $tagname;
			// fix invalid closing tags - remove any attributes (#1489446)
			if ($matches[1] == '</') {
			$matches[3] = '';
			}

			return $matches[1] . $tagname . $matches[3];
			}

			/**
			@@ -495,5 +505,77 @@

			return $body;
			}
			}

			/**
			* Fix broken nested lists, they are not handled properly by DOMDocument (#1488768)
			*/
			public static function fix_broken_lists(&$html)
			{
			// do two rounds, one for <ol>, one for <ul>
			foreach (array('ol', 'ul') as $tag) {
			$pos = 0;
			while (($pos = stripos($html, '<' . $tag, $pos)) !== false) {
			$pos++;

			// make sure this is an ol/ul tag
			if (!in_array($html[$pos+2], array(' ', '>'))) {
			continue;
			}

			$p = $pos;
			$in_li = false;
			$li_pos = 0;

			while (($p = strpos($html, '<', $p)) !== false) {
			$tt = strtolower(substr($html, $p, 4));

			// li open tag
			if ($tt == '<li>' \|\| $tt == '<li ') {
			$in_li = true;
			$p += 4;
			}
			// li close tag
			else if ($tt == '</li' && in_array($html[$p+4], array(' ', '>'))) {
			$li_pos = $p;
			$p += 4;
			$in_li = false;
			}
			// ul/ol closing tag
			else if ($tt == '</' . $tag && in_array($html[$p+4], array(' ', '>'))) {
			break;
			}
			// nested ol/ul element out of li
			else if (!$in_li && $li_pos && ($tt == '<ol>' \|\| $tt == '<ol ' \|\| $tt == '<ul>' \|\| $tt == '<ul ')) {
			// find closing tag of this ul/ol element
			$element = substr($tt, 1, 2);
			$cpos = $p;
			do {
			$tpos = stripos($html, '<' . $element, $cpos+1);
			$cpos = stripos($html, '</' . $element, $cpos+1);
			}
			while ($tpos !== false && $cpos !== false && $cpos > $tpos);

			// not found, this is invalid HTML, skip it
			if ($cpos === false) {
			break;
			}

			// get element content
			$end = strpos($html, '>', $cpos);
			$len = $end - $p + 1;
			$element = substr($html, $p, $len);

			// move element to the end of the last li
			$html = substr_replace($html, '', $p, $len);
			$html = substr_replace($html, $element, $li_pos, 0);

			$p = $end;
			}
			else {
			$p++;
			}
			}
			}
			}
			}
			}

			@@ -184,7 +184,7 @@
			'\|rgb\(\s[0-9]+\s,\s[0-9]+\s,\s[0-9]+\s\)'.
			'\|-?[0-9.]+\s*(em\|ex\|px\|cm\|mm\|in\|pt\|pc\|deg\|rad\|grad\|ms\|s\|hz\|khz\|%)?'.
			'\|#[0-9a-f]{3,6}'.
			'\|[a-z0-9", -]+'.
			'\|[a-z0-9"\', -]+'.
			')\s*/i', $str, $match)
			) {
			if ($match[2]) {
			@@ -283,10 +283,12 @@

			/**
			* The main loop that recurse on a node tree.
			* It output only allowed tags with allowed attributes
			* and allowed inline styles
			* It output only allowed tags with allowed attributes and allowed inline styles
			*
			* @param DOMNode $node HTML element
			* @param int $level Recurrence level (safe initial value found empirically)
			*/
			private function dumpHtml($node, $level = 0)
			private function dumpHtml($node, $level = 20)
			{
			if (!$node->hasChildNodes()) {
			return '';
			@@ -418,7 +420,7 @@
			$html = preg_replace($html_search, $html_replace, trim($html));

			//-> Replace all of those weird MS Word quotes and other high characters
			$badwordchars=array(
			$badwordchars = array(
			"\xe2\x80\x98", // left single quote
			"\xe2\x80\x99", // right single quote
			"\xe2\x80\x9c", // left double quote
			@@ -426,7 +428,7 @@
			"\xe2\x80\x94", // em dash
			"\xe2\x80\xa6" // elipses
			);
			$fixedwordchars=array(
			$fixedwordchars = array(
			"'",
			"'",
			'"',
			@@ -434,7 +436,7 @@
			'—',
			'...'
			);
			$html = str_replace($badwordchars,$fixedwordchars, $html);
			$html = str_replace($badwordchars, $fixedwordchars, $html);

			// PCRE errors handling (#1486856), should we use something like for every preg_* use?
			if ($html === null && ($preg_error = preg_last_error()) != PREG_NO_ERROR) {
			@@ -455,12 +457,15 @@
			}

			// fix (unknown/malformed) HTML tags before "wash"
			$html = preg_replace_callback('/(<(?!\!)[\/]*)([^\s>]+)/', array($this, 'html_tag_callback'), $html);
			$html = preg_replace_callback('/(<(?!\!)[\/])([^\s>]+)([^>])/', array($this, 'html_tag_callback'), $html);

			// Remove invalid HTML comments (#1487759)
			// Don't remove valid conditional comments
			// Don't remove MSOutlook (<!-->) conditional comments (#1489004)
			$html = preg_replace('/<!--[^->\[\n]+>/', '', $html);

			// fix broken nested lists
			self::fix_broken_lists($html);

			// turn relative into absolute urls
			$html = self::resolve_base($html);
			@@ -479,7 +484,12 @@
			'/[^a-z0-9_\[\]\!-]/i', // forbidden characters
			), '', $tagname);

			return $matches[1] . $tagname;
			// fix invalid closing tags - remove any attributes (#1489446)
			if ($matches[1] == '</') {
			$matches[3] = '';
			}

			return $matches[1] . $tagname . $matches[3];
			}

			/**
			@@ -495,5 +505,77 @@

			return $body;
			}
			}

			/**
			* Fix broken nested lists, they are not handled properly by DOMDocument (#1488768)
			*/
			public static function fix_broken_lists(&$html)
			{
			// do two rounds, one for <ol>, one for <ul>
			foreach (array('ol', 'ul') as $tag) {
			$pos = 0;
			while (($pos = stripos($html, '<' . $tag, $pos)) !== false) {
			$pos++;

			// make sure this is an ol/ul tag
			if (!in_array($html[$pos+2], array(' ', '>'))) {
			continue;
			}

			$p = $pos;
			$in_li = false;
			$li_pos = 0;

			while (($p = strpos($html, '<', $p)) !== false) {
			$tt = strtolower(substr($html, $p, 4));

			// li open tag
			if ($tt == '<li>' \|\| $tt == '<li ') {
			$in_li = true;
			$p += 4;
			}
			// li close tag
			else if ($tt == '</li' && in_array($html[$p+4], array(' ', '>'))) {
			$li_pos = $p;
			$p += 4;
			$in_li = false;
			}
			// ul/ol closing tag
			else if ($tt == '</' . $tag && in_array($html[$p+4], array(' ', '>'))) {
			break;
			}
			// nested ol/ul element out of li
			else if (!$in_li && $li_pos && ($tt == '<ol>' \|\| $tt == '<ol ' \|\| $tt == '<ul>' \|\| $tt == '<ul ')) {
			// find closing tag of this ul/ol element
			$element = substr($tt, 1, 2);
			$cpos = $p;
			do {
			$tpos = stripos($html, '<' . $element, $cpos+1);
			$cpos = stripos($html, '</' . $element, $cpos+1);
			}
			while ($tpos !== false && $cpos !== false && $cpos > $tpos);

			// not found, this is invalid HTML, skip it
			if ($cpos === false) {
			break;
			}

			// get element content
			$end = strpos($html, '>', $cpos);
			$len = $end - $p + 1;
			$element = substr($html, $p, $len);

			// move element to the end of the last li
			$html = substr_replace($html, '', $p, $len);
			$html = substr_replace($html, $element, $li_pos, 0);

			$p = $end;
			}
			else {
			$p++;
			}
			}
			}
			}
			}
			}