| | |
| | | |
| | | /* |
| | | +-----------------------------------------------------------------------+ |
| | | | program/include/rcube_spellchecker.php | |
| | | | | |
| | | | This file is part of the Roundcube Webmail client | |
| | | | Copyright (C) 2011, Kolab Systems AG | |
| | | | Copyright (C) 2008-2011, The Roundcube Dev Team | |
| | | | Copyright (C) 2011-2013, Kolab Systems AG | |
| | | | Copyright (C) 2008-2013, The Roundcube Dev Team | |
| | | | | |
| | | | Licensed under the GNU General Public License version 3 or | |
| | | | any later version with exceptions for skins & plugins. | |
| | |
| | | | | |
| | | | PURPOSE: | |
| | | | Spellchecking using different backends | |
| | | | | |
| | | +-----------------------------------------------------------------------+ |
| | | | Author: Aleksander Machniak <machniak@kolabsys.com> | |
| | | | Author: Thomas Bruederli <roundcube@gmail.com> | |
| | | +-----------------------------------------------------------------------+ |
| | | */ |
| | | |
| | | |
| | | /** |
| | | * Helper class for spellchecking with Googielspell and PSpell support. |
| | |
| | | { |
| | | private $matches = array(); |
| | | private $engine; |
| | | private $backend; |
| | | private $lang; |
| | | private $rc; |
| | | private $error; |
| | | private $separator = '/[\s\r\n\t\(\)\/\[\]{}<>\\"]+|[:;?!,\.]([^\w]|$)/'; |
| | | private $options = array(); |
| | | private $dict; |
| | | private $have_dict; |
| | | |
| | | |
| | | // default settings |
| | | const GOOGLE_HOST = 'ssl://www.google.com'; |
| | | const GOOGLE_PORT = 443; |
| | | const MAX_SUGGESTIONS = 10; |
| | | |
| | | |
| | | /** |
| | |
| | | 'ignore_caps' => $this->rc->config->get('spellcheck_ignore_caps'), |
| | | 'dictionary' => $this->rc->config->get('spellcheck_dictionary'), |
| | | ); |
| | | |
| | | $cls = 'rcube_spellcheck_' . $this->engine; |
| | | if (class_exists($cls)) { |
| | | $this->backend = new $cls($this, $this->lang); |
| | | $this->backend->options = $this->options; |
| | | } |
| | | else { |
| | | $this->error = "Unknown spellcheck engine '$this->engine'"; |
| | | } |
| | | } |
| | | |
| | | /** |
| | | * Return a list of supported languages |
| | | */ |
| | | function languages() |
| | | { |
| | | // trust configuration |
| | | $configured = $this->rc->config->get('spellcheck_languages'); |
| | | if (!empty($configured) && is_array($configured) && !$configured[0]) { |
| | | return $configured; |
| | | } |
| | | else if (!empty($configured)) { |
| | | $langs = (array)$configured; |
| | | } |
| | | else if ($this->backend) { |
| | | $langs = $this->backend->languages(); |
| | | } |
| | | |
| | | // load index |
| | | @include(RCUBE_LOCALIZATION_DIR . 'index.inc'); |
| | | |
| | | // add correct labels |
| | | $languages = array(); |
| | | foreach ($langs as $lang) { |
| | | $langc = strtolower(substr($lang, 0, 2)); |
| | | $alias = $rcube_language_aliases[$langc]; |
| | | if (!$alias) { |
| | | $alias = $langc.'_'.strtoupper($langc); |
| | | } |
| | | if ($rcube_languages[$lang]) { |
| | | $languages[$lang] = $rcube_languages[$lang]; |
| | | } |
| | | else if ($rcube_languages[$alias]) { |
| | | $languages[$lang] = $rcube_languages[$alias]; |
| | | } |
| | | else { |
| | | $languages[$lang] = ucfirst($lang); |
| | | } |
| | | } |
| | | |
| | | // remove possible duplicates (#1489395) |
| | | $languages = array_unique($languages); |
| | | |
| | | asort($languages); |
| | | |
| | | return $languages; |
| | | } |
| | | |
| | | /** |
| | | * Set content and check spelling |
| | |
| | | $this->content = $text; |
| | | } |
| | | |
| | | if ($this->engine == 'pspell') { |
| | | $this->matches = $this->_pspell_check($this->content); |
| | | } |
| | | else { |
| | | $this->matches = $this->_googie_check($this->content); |
| | | if ($this->backend) { |
| | | $this->matches = $this->backend->check($this->content); |
| | | } |
| | | |
| | | return $this->found() == 0; |
| | |
| | | */ |
| | | function get_suggestions($word) |
| | | { |
| | | if ($this->engine == 'pspell') { |
| | | return $this->_pspell_suggestions($word); |
| | | if ($this->backend) { |
| | | return $this->backend->get_suggestions($word); |
| | | } |
| | | |
| | | return $this->_googie_suggestions($word); |
| | | return array(); |
| | | } |
| | | |
| | | |
| | |
| | | */ |
| | | function get_words($text = null, $is_html=false) |
| | | { |
| | | if ($this->engine == 'pspell') { |
| | | return $this->_pspell_words($text, $is_html); |
| | | if ($is_html) { |
| | | $text = $this->html2text($text); |
| | | } |
| | | |
| | | return $this->_googie_words($text, $is_html); |
| | | if ($this->backend) { |
| | | return $this->backend->get_words($text); |
| | | } |
| | | |
| | | return array(); |
| | | } |
| | | |
| | | |
| | |
| | | function get_xml() |
| | | { |
| | | // send output |
| | | $out = '<?xml version="1.0" encoding="'.RCMAIL_CHARSET.'"?><spellresult charschecked="'.mb_strlen($this->content).'">'; |
| | | $out = '<?xml version="1.0" encoding="'.RCUBE_CHARSET.'"?><spellresult charschecked="'.mb_strlen($this->content).'">'; |
| | | |
| | | foreach ($this->matches as $item) { |
| | | foreach ((array)$this->matches as $item) { |
| | | $out .= '<c o="'.$item[1].'" l="'.$item[2].'">'; |
| | | $out .= is_array($item[4]) ? implode("\t", $item[4]) : $item[4]; |
| | | $out .= '</c>'; |
| | |
| | | { |
| | | $result = array(); |
| | | |
| | | foreach ($this->matches as $item) { |
| | | foreach ((array)$this->matches as $item) { |
| | | if ($this->engine == 'pspell') { |
| | | $word = $item[0]; |
| | | } |
| | | else { |
| | | $word = mb_substr($this->content, $item[1], $item[2], RCMAIL_CHARSET); |
| | | $word = mb_substr($this->content, $item[1], $item[2], RCUBE_CHARSET); |
| | | } |
| | | $result[$word] = is_array($item[4]) ? implode("\t", $item[4]) : $item[4]; |
| | | |
| | | if (is_array($item[4])) { |
| | | $suggestions = $item[4]; |
| | | } |
| | | else if (empty($item[4])) { |
| | | $suggestions = array(); |
| | | } |
| | | else { |
| | | $suggestions = explode("\t", $item[4]); |
| | | } |
| | | |
| | | $result[$word] = $suggestions; |
| | | } |
| | | |
| | | return $result; |
| | |
| | | */ |
| | | function error() |
| | | { |
| | | return $this->error; |
| | | } |
| | | |
| | | |
| | | /** |
| | | * Checks the text using pspell |
| | | * |
| | | * @param string $text Text content for spellchecking |
| | | */ |
| | | private function _pspell_check($text) |
| | | { |
| | | // init spellchecker |
| | | $this->_pspell_init(); |
| | | |
| | | if (!$this->plink) { |
| | | return array(); |
| | | } |
| | | |
| | | // tokenize |
| | | $text = preg_split($this->separator, $text, NULL, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE); |
| | | |
| | | $diff = 0; |
| | | $matches = array(); |
| | | |
| | | foreach ($text as $w) { |
| | | $word = trim($w[0]); |
| | | $pos = $w[1] - $diff; |
| | | $len = mb_strlen($word); |
| | | |
| | | // skip exceptions |
| | | if ($this->is_exception($word)) { |
| | | } |
| | | else if (!pspell_check($this->plink, $word)) { |
| | | $suggestions = pspell_suggest($this->plink, $word); |
| | | |
| | | if (sizeof($suggestions) > self::MAX_SUGGESTIONS) { |
| | | $suggestions = array_slice($suggestions, 0, self::MAX_SUGGESTIONS); |
| | | } |
| | | |
| | | $matches[] = array($word, $pos, $len, null, $suggestions); |
| | | } |
| | | |
| | | $diff += (strlen($word) - $len); |
| | | } |
| | | |
| | | return $matches; |
| | | } |
| | | |
| | | |
| | | /** |
| | | * Returns the misspelled words |
| | | */ |
| | | private function _pspell_words($text = null, $is_html=false) |
| | | { |
| | | $result = array(); |
| | | |
| | | if ($text) { |
| | | // init spellchecker |
| | | $this->_pspell_init(); |
| | | |
| | | if (!$this->plink) { |
| | | return array(); |
| | | } |
| | | |
| | | // With PSpell we don't need to get suggestions to return misspelled words |
| | | if ($is_html) { |
| | | $text = $this->html2text($text); |
| | | } |
| | | |
| | | $text = preg_split($this->separator, $text, NULL, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE); |
| | | |
| | | foreach ($text as $w) { |
| | | $word = trim($w[0]); |
| | | |
| | | // skip exceptions |
| | | if ($this->is_exception($word)) { |
| | | continue; |
| | | } |
| | | |
| | | if (!pspell_check($this->plink, $word)) { |
| | | $result[] = $word; |
| | | } |
| | | } |
| | | |
| | | return $result; |
| | | } |
| | | |
| | | foreach ($this->matches as $m) { |
| | | $result[] = $m[0]; |
| | | } |
| | | |
| | | return $result; |
| | | } |
| | | |
| | | |
| | | /** |
| | | * Returns suggestions for misspelled word |
| | | */ |
| | | private function _pspell_suggestions($word) |
| | | { |
| | | // init spellchecker |
| | | $this->_pspell_init(); |
| | | |
| | | if (!$this->plink) { |
| | | return array(); |
| | | } |
| | | |
| | | $suggestions = pspell_suggest($this->plink, $word); |
| | | |
| | | if (sizeof($suggestions) > self::MAX_SUGGESTIONS) |
| | | $suggestions = array_slice($suggestions, 0, self::MAX_SUGGESTIONS); |
| | | |
| | | return is_array($suggestions) ? $suggestions : array(); |
| | | } |
| | | |
| | | |
| | | /** |
| | | * Initializes PSpell dictionary |
| | | */ |
| | | private function _pspell_init() |
| | | { |
| | | if (!$this->plink) { |
| | | if (!extension_loaded('pspell')) { |
| | | $this->error = "Pspell extension not available"; |
| | | rcube::raise_error(array( |
| | | 'code' => 500, 'type' => 'php', |
| | | 'file' => __FILE__, 'line' => __LINE__, |
| | | 'message' => $this->error), true, false); |
| | | |
| | | return; |
| | | } |
| | | |
| | | $this->plink = pspell_new($this->lang, null, null, RCMAIL_CHARSET, PSPELL_FAST); |
| | | } |
| | | |
| | | if (!$this->plink) { |
| | | $this->error = "Unable to load Pspell engine for selected language"; |
| | | } |
| | | } |
| | | |
| | | |
| | | private function _googie_check($text) |
| | | { |
| | | // spell check uri is configured |
| | | $url = $this->rc->config->get('spellcheck_uri'); |
| | | |
| | | if ($url) { |
| | | $a_uri = parse_url($url); |
| | | $ssl = ($a_uri['scheme'] == 'https' || $a_uri['scheme'] == 'ssl'); |
| | | $port = $a_uri['port'] ? $a_uri['port'] : ($ssl ? 443 : 80); |
| | | $host = ($ssl ? 'ssl://' : '') . $a_uri['host']; |
| | | $path = $a_uri['path'] . ($a_uri['query'] ? '?'.$a_uri['query'] : '') . $this->lang; |
| | | } |
| | | else { |
| | | $host = self::GOOGLE_HOST; |
| | | $port = self::GOOGLE_PORT; |
| | | $path = '/tbproxy/spell?lang=' . $this->lang; |
| | | } |
| | | |
| | | // Google has some problem with spaces, use \n instead |
| | | $gtext = str_replace(' ', "\n", $text); |
| | | |
| | | $gtext = '<?xml version="1.0" encoding="utf-8" ?>' |
| | | .'<spellrequest textalreadyclipped="0" ignoredups="0" ignoredigits="1" ignoreallcaps="1">' |
| | | .'<text>' . $gtext . '</text>' |
| | | .'</spellrequest>'; |
| | | |
| | | $store = ''; |
| | | if ($fp = fsockopen($host, $port, $errno, $errstr, 30)) { |
| | | $out = "POST $path HTTP/1.0\r\n"; |
| | | $out .= "Host: " . str_replace('ssl://', '', $host) . "\r\n"; |
| | | $out .= "Content-Length: " . strlen($gtext) . "\r\n"; |
| | | $out .= "Content-Type: application/x-www-form-urlencoded\r\n"; |
| | | $out .= "Connection: Close\r\n\r\n"; |
| | | $out .= $gtext; |
| | | fwrite($fp, $out); |
| | | |
| | | while (!feof($fp)) |
| | | $store .= fgets($fp, 128); |
| | | fclose($fp); |
| | | } |
| | | |
| | | if (!$store) { |
| | | $this->error = "Empty result from spelling engine"; |
| | | } |
| | | |
| | | preg_match_all('/<c o="([^"]*)" l="([^"]*)" s="([^"]*)">([^<]*)<\/c>/', $store, $matches, PREG_SET_ORDER); |
| | | |
| | | // skip exceptions (if appropriate options are enabled) |
| | | if (!empty($this->options['ignore_syms']) || !empty($this->options['ignore_nums']) |
| | | || !empty($this->options['ignore_caps']) || !empty($this->options['dictionary']) |
| | | ) { |
| | | foreach ($matches as $idx => $m) { |
| | | $word = mb_substr($text, $m[1], $m[2], RCMAIL_CHARSET); |
| | | // skip exceptions |
| | | if ($this->is_exception($word)) { |
| | | unset($matches[$idx]); |
| | | } |
| | | } |
| | | } |
| | | |
| | | return $matches; |
| | | } |
| | | |
| | | |
| | | private function _googie_words($text = null, $is_html=false) |
| | | { |
| | | if ($text) { |
| | | if ($is_html) { |
| | | $text = $this->html2text($text); |
| | | } |
| | | |
| | | $matches = $this->_googie_check($text); |
| | | } |
| | | else { |
| | | $matches = $this->matches; |
| | | $text = $this->content; |
| | | } |
| | | |
| | | $result = array(); |
| | | |
| | | foreach ($matches as $m) { |
| | | $result[] = mb_substr($text, $m[1], $m[2], RCMAIL_CHARSET); |
| | | } |
| | | |
| | | return $result; |
| | | } |
| | | |
| | | |
| | | private function _googie_suggestions($word) |
| | | { |
| | | if ($word) { |
| | | $matches = $this->_googie_check($word); |
| | | } |
| | | else { |
| | | $matches = $this->matches; |
| | | } |
| | | |
| | | if ($matches[0][4]) { |
| | | $suggestions = explode("\t", $matches[0][4]); |
| | | if (sizeof($suggestions) > self::MAX_SUGGESTIONS) { |
| | | $suggestions = array_slice($suggestions, 0, MAX_SUGGESTIONS); |
| | | } |
| | | |
| | | return $suggestions; |
| | | } |
| | | |
| | | return array(); |
| | | return $this->error ? $this->error : ($this->backend ? $this->backend->error() : false); |
| | | } |
| | | |
| | | |
| | | private function html2text($text) |
| | | { |
| | | $h2t = new html2text($text, false, true, 0); |
| | | $h2t = new rcube_html2text($text, false, true, 0); |
| | | return $h2t->get_text(); |
| | | } |
| | | |
| | |
| | | public function is_exception($word) |
| | | { |
| | | // Contain only symbols (e.g. "+9,0", "2:2") |
| | | if (!$word || preg_match('/^[0-9@#$%^&_+~*=:;?!,.-]+$/', $word)) |
| | | if (!$word || preg_match('/^[0-9@#$%^&_+~*<>=:;?!,.-]+$/', $word)) |
| | | return true; |
| | | |
| | | // Contain symbols (e.g. "g@@gle"), all symbols excluding separators |
| | |
| | | "UPDATE ".$this->rc->db->table_name('dictionary') |
| | | ." SET data = ?" |
| | | ." WHERE user_id " . ($plugin['userid'] ? "= ".$this->rc->db->quote($plugin['userid']) : "IS NULL") |
| | | ." AND " . $this->rc->db->quoteIdentifier('language') . " = ?", |
| | | ." AND " . $this->rc->db->quote_identifier('language') . " = ?", |
| | | implode(' ', $plugin['dictionary']), $plugin['language']); |
| | | } |
| | | // don't store empty dict |
| | |
| | | $this->rc->db->query( |
| | | "DELETE FROM " . $this->rc->db->table_name('dictionary') |
| | | ." WHERE user_id " . ($plugin['userid'] ? "= ".$this->rc->db->quote($plugin['userid']) : "IS NULL") |
| | | ." AND " . $this->rc->db->quoteIdentifier('language') . " = ?", |
| | | ." AND " . $this->rc->db->quote_identifier('language') . " = ?", |
| | | $plugin['language']); |
| | | } |
| | | } |
| | | else if (!empty($this->dict)) { |
| | | $this->rc->db->query( |
| | | "INSERT INTO " .$this->rc->db->table_name('dictionary') |
| | | ." (user_id, " . $this->rc->db->quoteIdentifier('language') . ", data) VALUES (?, ?, ?)", |
| | | ." (user_id, " . $this->rc->db->quote_identifier('language') . ", data) VALUES (?, ?, ?)", |
| | | $plugin['userid'], $plugin['language'], implode(' ', $plugin['dictionary'])); |
| | | } |
| | | } |
| | |
| | | |
| | | if (empty($plugin['abort'])) { |
| | | $dict = array(); |
| | | $this->rc->db->query( |
| | | $sql_result = $this->rc->db->query( |
| | | "SELECT data FROM ".$this->rc->db->table_name('dictionary') |
| | | ." WHERE user_id ". ($plugin['userid'] ? "= ".$this->rc->db->quote($plugin['userid']) : "IS NULL") |
| | | ." AND " . $this->rc->db->quoteIdentifier('language') . " = ?", |
| | | ." AND " . $this->rc->db->quote_identifier('language') . " = ?", |
| | | $plugin['language']); |
| | | |
| | | if ($sql_arr = $this->rc->db->fetch_assoc($sql_result)) { |