From 2f8b1036da42ec3d15a51c6b17a473f9f4df71d3 Mon Sep 17 00:00:00 2001
From: Thomas Bruederli <bruederli@kolabsys.com>
Date: Sat, 07 Feb 2015 12:33:24 -0500
Subject: [PATCH] Bump version and copyright year

---
 program/lib/Roundcube/rcube_utils.php |  325 ++++++++++++++++++++++++++++++++++++++++--------------
 1 files changed, 241 insertions(+), 84 deletions(-)

diff --git a/program/lib/Roundcube/rcube_utils.php b/program/lib/Roundcube/rcube_utils.php
index 2540f77..f4c0e90 100644
--- a/program/lib/Roundcube/rcube_utils.php
+++ b/program/lib/Roundcube/rcube_utils.php
@@ -103,13 +103,14 @@
             }
 
             foreach ($domain_array as $part) {
-                if (!preg_match('/^(([A-Za-z0-9][A-Za-z0-9-]{0,61}[A-Za-z0-9])|([A-Za-z0-9]))$/', $part)) {
+                if (!preg_match('/^((xn--)?([A-Za-z0-9][A-Za-z0-9-]{0,61}[A-Za-z0-9])|([A-Za-z0-9]))$/', $part)) {
                     return false;
                 }
             }
 
             // last domain part
-            if (preg_match('/[^a-zA-Z]/', array_pop($domain_array))) {
+            $last_part = array_pop($domain_array);
+            if (strpos($last_part, 'xn--') !== 0 && preg_match('/[^a-zA-Z]/', $last_part)) {
                 return false;
             }
 
@@ -117,17 +118,6 @@
 
             if (!$dns_check || !$rcube->config->get('email_dns_check')) {
                 return true;
-            }
-
-            if (strtoupper(substr(PHP_OS, 0, 3)) == 'WIN' && version_compare(PHP_VERSION, '5.3.0', '<')) {
-                $lookup = array();
-                @exec("nslookup -type=MX " . escapeshellarg($domain_part) . " 2>&1", $lookup);
-                foreach ($lookup as $line) {
-                    if (strpos($line, 'MX preference')) {
-                        return true;
-                    }
-                }
-                return false;
             }
 
             // find MX record(s)
@@ -390,12 +380,13 @@
      * Convert array of request parameters (prefixed with _)
      * to a regular array with non-prefixed keys.
      *
-     * @param int    $mode   Source to get value from (GPC)
-     * @param string $ignore PCRE expression to skip parameters by name
+     * @param int     $mode       Source to get value from (GPC)
+     * @param string  $ignore     PCRE expression to skip parameters by name
+     * @param boolean $allow_html Allow HTML tags in field value
      *
      * @return array Hash array with all request parameters
      */
-    public static function request2param($mode = null, $ignore = 'task|action')
+    public static function request2param($mode = null, $ignore = 'task|action', $allow_html = false)
     {
         $out = array();
         $src = $mode == self::INPUT_GET ? $_GET : ($mode == self::INPUT_POST ? $_POST : $_REQUEST);
@@ -403,7 +394,7 @@
         foreach (array_keys($src) as $key) {
             $fname = $key[0] == '_' ? substr($key, 1) : $key;
             if ($ignore && !preg_match('/^(' . $ignore . ')$/', $fname)) {
-                $out[$fname] = self::get_input_value($key, $mode);
+                $out[$fname] = self::get_input_value($key, $mode, $allow_html);
             }
         }
 
@@ -444,41 +435,48 @@
         $source   = self::xss_entity_decode($source);
         $stripped = preg_replace('/[^a-z\(:;]/i', '', $source);
         $evilexpr = 'expression|behavior|javascript:|import[^a]' . (!$allow_remote ? '|url\(' : '');
+
         if (preg_match("/$evilexpr/i", $stripped)) {
             return '/* evil! */';
         }
 
+        $strict_url_regexp = '!url\s*\([ "\'](https?:)//[a-z0-9/._+-]+["\' ]\)!Uims';
+
         // cut out all contents between { and }
         while (($pos = strpos($source, '{', $last_pos)) && ($pos2 = strpos($source, '}', $pos))) {
-            $styles = substr($source, $pos+1, $pos2-($pos+1));
+            $nested = strpos($source, '{', $pos+1);
+            if ($nested && $nested < $pos2)  // when dealing with nested blocks (e.g. @media), take the inner one
+                $pos = $nested;
+            $length = $pos2 - $pos - 1;
+            $styles = substr($source, $pos+1, $length);
 
             // check every line of a style block...
             if ($allow_remote) {
                 $a_styles = preg_split('/;[\r\n]*/', $styles, -1, PREG_SPLIT_NO_EMPTY);
+
                 foreach ($a_styles as $line) {
                     $stripped = preg_replace('/[^a-z\(:;]/i', '', $line);
                     // ... and only allow strict url() values
-                    $regexp = '!url\s*\([ "\'](https?:)//[a-z0-9/._+-]+["\' ]\)!Uims';
-                    if (stripos($stripped, 'url(') && !preg_match($regexp, $line)) {
+                    if (stripos($stripped, 'url(') && !preg_match($strict_url_regexp, $line)) {
                         $a_styles = array('/* evil! */');
                         break;
                     }
                 }
+
                 $styles = join(";\n", $a_styles);
             }
 
-            $key = $replacements->add($styles);
-            $source = substr($source, 0, $pos+1)
-                . $replacements->get_replacement($key)
-                . substr($source, $pos2, strlen($source)-$pos2);
-            $last_pos = $pos+2;
+            $key      = $replacements->add($styles);
+            $repl     = $replacements->get_replacement($key);
+            $source   = substr_replace($source, $repl, $pos+1, $length);
+            $last_pos = $pos2 - ($length - strlen($repl));
         }
 
         // remove html comments and add #container to each tag selector.
         // also replace body definition because we also stripped off the <body> tag
-        $styles = preg_replace(
+        $source = preg_replace(
             array(
-                '/(^\s*<!--)|(-->\s*$)/',
+                '/(^\s*<\!--)|(-->\s*$)/m',
                 '/(^\s*|,\s*|\}\s*)([a-z0-9\._#\*][a-z0-9\.\-_]*)/im',
                 '/'.preg_quote($container_id, '/').'\s+body/i',
             ),
@@ -490,9 +488,9 @@
             $source);
 
         // put block contents back in
-        $styles = $replacements->resolve($styles);
+        $source = $replacements->resolve($source);
 
-        return $styles;
+        return $source;
     }
 
 
@@ -585,18 +583,18 @@
      */
     public static function https_check($port=null, $use_https=true)
     {
-        global $RCMAIL;
-
         if (!empty($_SERVER['HTTPS']) && strtolower($_SERVER['HTTPS']) != 'off') {
             return true;
         }
-        if (!empty($_SERVER['HTTP_X_FORWARDED_PROTO']) && strtolower($_SERVER['HTTP_X_FORWARDED_PROTO']) == 'https') {
+        if (!empty($_SERVER['HTTP_X_FORWARDED_PROTO'])
+            && strtolower($_SERVER['HTTP_X_FORWARDED_PROTO']) == 'https'
+            && in_array($_SERVER['REMOTE_ADDR'], rcube::get_instance()->config->get('proxy_whitelist', array()))) {
             return true;
         }
         if ($port && $_SERVER['SERVER_PORT'] == $port) {
             return true;
         }
-        if ($use_https && isset($RCMAIL) && $RCMAIL->config->get('use_https')) {
+        if ($use_https && rcube::get_instance()->config->get('use_https')) {
             return true;
         }
 
@@ -614,6 +612,10 @@
      */
     public static function parse_host($name, $host = '')
     {
+        if (!is_string($name)) {
+            return $name;
+        }
+
         // %n - host
         $n = preg_replace('/:\d+$/', '', $_SERVER['SERVER_NAME']);
         // %t - host name without first part, e.g. %n=mail.domain.tld, %t=domain.tld
@@ -634,8 +636,7 @@
             }
         }
 
-        $name = str_replace(array('%n', '%t', '%d', '%h', '%z', '%s'), array($n, $t, $d, $h, $z, $s[2]), $name);
-        return $name;
+        return str_replace(array('%n', '%t', '%d', '%h', '%z', '%s'), array($n, $t, $d, $h, $z, $s[2]), $name);
     }
 
 
@@ -672,9 +673,26 @@
      */
     public static function remote_addr()
     {
-        foreach (array('HTTP_X_FORWARDED_FOR','HTTP_X_REAL_IP','REMOTE_ADDR') as $prop) {
-            if (!empty($_SERVER[$prop]))
-                return $_SERVER[$prop];
+        // Check if any of the headers are set first to improve performance
+        if (!empty($_SERVER['HTTP_X_FORWARDED_FOR']) || !empty($_SERVER['HTTP_X_REAL_IP'])) {
+            $proxy_whitelist = rcube::get_instance()->config->get('proxy_whitelist', array());
+            if (in_array($_SERVER['REMOTE_ADDR'], $proxy_whitelist)) {
+                if (!empty($_SERVER['HTTP_X_FORWARDED_FOR'])) {
+                    foreach(array_reverse(explode(',', $_SERVER['HTTP_X_FORWARDED_FOR'])) as $forwarded_ip) {
+                        if (!in_array($forwarded_ip, $proxy_whitelist)) {
+                            return $forwarded_ip;
+                        }
+                    }
+                }
+
+                if (!empty($_SERVER['HTTP_X_REAL_IP'])) {
+                    return $_SERVER['HTTP_X_REAL_IP'];
+                }
+            }
+        }
+
+        if (!empty($_SERVER['REMOTE_ADDR'])) {
+            return $_SERVER['REMOTE_ADDR'];
         }
 
         return '';
@@ -734,48 +752,23 @@
      * Improved equivalent to strtotime()
      *
      * @param string $date  Date string
+     * @param object DateTimeZone to use for DateTime object
      *
      * @return int Unix timestamp
      */
-    public static function strtotime($date)
+    public static function strtotime($date, $timezone = null)
     {
-        $date = trim($date);
-
-        // check for MS Outlook vCard date format YYYYMMDD
-        if (preg_match('/^([12][90]\d\d)([01]\d)([0123]\d)$/', $date, $m)) {
-            return mktime(0,0,0, intval($m[2]), intval($m[3]), intval($m[1]));
-        }
-
-        // common little-endian formats, e.g. dd/mm/yyyy (not all are supported by strtotime)
-        if (preg_match('/^(\d{1,2})[.\/-](\d{1,2})[.\/-](\d{4})$/', $date, $m)
-            && $m[1] > 0 && $m[1] <= 31 && $m[2] > 0 && $m[2] <= 12 && $m[3] >= 1970
-        ) {
-            return mktime(0,0,0, intval($m[2]), intval($m[1]), intval($m[3]));
-        }
+        $date = self::clean_datestr($date);
+        $tzname = $timezone ? ' ' . $timezone->getName() : '';
 
         // unix timestamp
         if (is_numeric($date)) {
             return (int) $date;
         }
 
-        // Clean malformed data
-        $date = preg_replace(
-            array(
-                '/GMT\s*([+-][0-9]+)/',                     // support non-standard "GMTXXXX" literal
-                '/[^a-z0-9\x20\x09:+-]/i',                  // remove any invalid characters
-                '/\s*(Mon|Tue|Wed|Thu|Fri|Sat|Sun)\s*/i',   // remove weekday names
-            ),
-            array(
-                '\\1',
-                '',
-                '',
-            ), $date);
-
-        $date = trim($date);
-
         // if date parsing fails, we have a date in non-rfc format.
         // remove token from the end and try again
-        while ((($ts = @strtotime($date)) === false) || ($ts < 0)) {
+        while ((($ts = @strtotime($date . $tzname)) === false) || ($ts < 0)) {
             $d = explode(' ', $date);
             array_pop($d);
             if (!$d) {
@@ -787,6 +780,94 @@
         return (int) $ts;
     }
 
+    /**
+     * Date parsing function that turns the given value into a DateTime object
+     *
+     * @param string $date  Date string
+     * @param object DateTimeZone to use for DateTime object
+     *
+     * @return object DateTime instance or false on failure
+     */
+    public static function anytodatetime($date, $timezone = null)
+    {
+        if (is_object($date) && is_a($date, 'DateTime')) {
+            return $date;
+        }
+
+        $dt   = false;
+        $date = self::clean_datestr($date);
+
+        // try to parse string with DateTime first
+        if (!empty($date)) {
+            try {
+                $dt = $timezone ? new DateTime($date, $timezone) : new DateTime($date);
+            }
+            catch (Exception $e) {
+                // ignore
+            }
+        }
+
+        // try our advanced strtotime() method
+        if (!$dt && ($timestamp = self::strtotime($date, $timezone))) {
+            try {
+                $dt = new DateTime("@".$timestamp);
+                if ($timezone) {
+                    $dt->setTimezone($timezone);
+                }
+            }
+            catch (Exception $e) {
+                // ignore
+            }
+        }
+
+        return $dt;
+    }
+
+    /**
+     * Clean up date string for strtotime() input
+     *
+     * @param string $date Date string
+     *
+     * @return string Date string
+     */
+    public static function clean_datestr($date)
+    {
+        $date = trim($date);
+
+        // check for MS Outlook vCard date format YYYYMMDD
+        if (preg_match('/^([12][90]\d\d)([01]\d)([0123]\d)$/', $date, $m)) {
+            return sprintf('%04d-%02d-%02d 00:00:00', intval($m[1]), intval($m[2]), intval($m[3]));
+        }
+
+        // Clean malformed data
+        $date = preg_replace(
+            array(
+                '/GMT\s*([+-][0-9]+)/',                     // support non-standard "GMTXXXX" literal
+                '/[^a-z0-9\x20\x09:+-\/]/i',                  // remove any invalid characters
+                '/\s*(Mon|Tue|Wed|Thu|Fri|Sat|Sun)\s*/i',   // remove weekday names
+            ),
+            array(
+                '\\1',
+                '',
+                '',
+            ), $date);
+
+        $date = trim($date);
+
+        // try to fix dd/mm vs. mm/dd discrepancy, we can't do more here
+        if (preg_match('/^(\d{1,2})[.\/-](\d{1,2})[.\/-](\d{4})$/', $date, $m)) {
+            $mdy   = $m[2] > 12 && $m[1] <= 12;
+            $day   = $mdy ? $m[2] : $m[1];
+            $month = $mdy ? $m[1] : $m[2];
+            $date  = sprintf('%04d-%02d-%02d 00:00:00', intval($m[3]), $month, $day);
+        }
+        // I've found that YYYY.MM.DD is recognized wrong, so here's a fix
+        else if (preg_match('/^(\d{4})\.(\d{1,2})\.(\d{1,2})$/', $date)) {
+            $date = str_replace('.', '-', $date) . ' 00:00:00';
+        }
+
+        return $date;
+    }
 
     /*
      * Idn_to_ascii wrapper.
@@ -831,38 +912,73 @@
      * Split the given string into word tokens
      *
      * @param string Input to tokenize
+     * @param integer Minimum length of a single token
      * @return array List of tokens
      */
-    public static function tokenize_string($str)
+    public static function tokenize_string($str, $minlen = 2)
     {
-        return explode(" ", preg_replace(
-            array('/[\s;\/+-]+/i', '/(\d)[-.\s]+(\d)/', '/\s\w{1,3}\s/u'),
-            array(' ', '\\1\\2', ' '),
-            $str));
+        $expr = array('/[\s;\/+-]+/ui', '/(\d)[-.\s]+(\d)/u');
+        $repl = array(' ', '\\1\\2');
+
+        if ($minlen > 1) {
+            $minlen--;
+            $expr[] = "/(^|\s+)\w{1,$minlen}(\s+|$)/u";
+            $repl[] = ' ';
+        }
+
+        return array_filter(explode(" ", preg_replace($expr, $repl, $str)));
     }
 
     /**
      * Normalize the given string for fulltext search.
-     * Currently only optimized for Latin-1 characters; to be extended
+     * Currently only optimized for ISO-8859-1 and ISO-8859-2 characters; to be extended
      *
      * @param string  Input string (UTF-8)
      * @param boolean True to return list of words as array
+     * @param integer Minimum length of tokens
+     *
      * @return mixed  Normalized string or a list of normalized tokens
      */
-    public static function normalize_string($str, $as_array = false)
+    public static function normalize_string($str, $as_array = false, $minlen = 2)
     {
+        // replace 4-byte unicode characters with '?' character,
+        // these are not supported in default utf-8 charset on mysql,
+        // the chance we'd need them in searching is very low
+        $str = preg_replace('/('
+            . '\xF0[\x90-\xBF][\x80-\xBF]{2}'
+            . '|[\xF1-\xF3][\x80-\xBF]{3}'
+            . '|\xF4[\x80-\x8F][\x80-\xBF]{2}'
+            . ')/', '?', $str);
+
         // split by words
-        $arr = self::tokenize_string($str);
+        $arr = self::tokenize_string($str, $minlen);
+
+        // detect character set
+        if (utf8_encode(utf8_decode($str)) == $str) {
+            // ISO-8859-1 (or ASCII)
+            preg_match_all('/./u', 'äâàåáãæçéêëèïîìíñöôòøõóüûùúýÿ', $keys);
+            preg_match_all('/./',  'aaaaaaaceeeeiiiinoooooouuuuyy', $values);
+
+            $mapping = array_combine($keys[0], $values[0]);
+            $mapping = array_merge($mapping, array('ß' => 'ss', 'ae' => 'a', 'oe' => 'o', 'ue' => 'u'));
+        }
+        else if (rcube_charset::convert(rcube_charset::convert($str, 'UTF-8', 'ISO-8859-2'), 'ISO-8859-2', 'UTF-8') == $str) {
+            // ISO-8859-2
+            preg_match_all('/./u', 'ąáâäćçčéęëěíîłľĺńňóôöŕřśšşťţůúűüźžżý', $keys);
+            preg_match_all('/./',  'aaaaccceeeeiilllnnooorrsssttuuuuzzzy', $values);
+
+            $mapping = array_combine($keys[0], $values[0]);
+            $mapping = array_merge($mapping, array('ß' => 'ss', 'ae' => 'a', 'oe' => 'o', 'ue' => 'u'));
+        }
 
         foreach ($arr as $i => $part) {
-            if (utf8_encode(utf8_decode($part)) == $part) {  // is latin-1 ?
-                $arr[$i] = utf8_encode(strtr(strtolower(strtr(utf8_decode($part),
-                    'ÇçäâàåéêëèïîìÅÉöôòüûùÿøØáíóúñÑÁÂÀãÃÊËÈÍÎÏÓÔõÕÚÛÙýÝ',
-                    'ccaaaaeeeeiiiaeooouuuyooaiounnaaaaaeeeiiioooouuuyy')),
-                    array('ß' => 'ss', 'ae' => 'a', 'oe' => 'o', 'ue' => 'u')));
+            $part = mb_strtolower($part);
+
+            if (!empty($mapping)) {
+                $part = strtr($part, $mapping);
             }
-            else
-                $arr[$i] = mb_strtolower($part);
+
+            $arr[$i] = $part;
         }
 
         return $as_array ? $arr : join(" ", $arr);
@@ -944,7 +1060,6 @@
         }
     }
 
-
     /**
      * Find out if the string content means true or false
      *
@@ -959,4 +1074,46 @@
         return !in_array($str, array('false', '0', 'no', 'off', 'nein', ''), true);
     }
 
+    /**
+     * OS-dependent absolute path detection
+     */
+    public static function is_absolute_path($path)
+    {
+        if (strtoupper(substr(PHP_OS, 0, 3)) == 'WIN') {
+            return (bool) preg_match('!^[a-z]:[\\\\/]!i', $path);
+        }
+        else {
+            return $path[0] == '/';
+        }
+    }
+
+    /**
+     * Resolve relative URL
+     *
+     * @param string $url Relative URL
+     *
+     * @return string Absolute URL
+     */
+    public static function resolve_url($url)
+    {
+        // prepend protocol://hostname:port
+        if (!preg_match('|^https?://|', $url)) {
+            $schema       = 'http';
+            $default_port = 80;
+
+            if (self::https_check()) {
+                $schema       = 'https';
+                $default_port = 443;
+            }
+
+            $prefix = $schema . '://' . preg_replace('/:\d+$/', '', $_SERVER['HTTP_HOST']);
+            if ($_SERVER['SERVER_PORT'] != $default_port) {
+                $prefix .= ':' . $_SERVER['SERVER_PORT'];
+            }
+
+            $url = $prefix . ($url[0] == '/' ? '' : '/') . $url;
+        }
+
+        return $url;
+    }
 }

--
Gitblit v1.9.1