[ Index ] |
PHP Cross Reference of phpBB 3.0 Beta 3 |
[Summary view] [Print] [Text view]
1 <?php 2 /** 3 * 4 * @package search 5 * @version $Id: fulltext_native.php,v 1.35 2006/11/10 14:55:49 acydburn Exp $ 6 * @copyright (c) 2005 phpBB Group 7 * @license http://opensource.org/licenses/gpl-license.php GNU Public License 8 * 9 */ 10 11 /** 12 */ 13 if (!defined('IN_PHPBB')) 14 { 15 exit; 16 } 17 18 /** 19 * @ignore 20 */ 21 include_once($phpbb_root_path . 'includes/search/search.' . $phpEx); 22 23 /** 24 * fulltext_native 25 * phpBB's own db driven fulltext search, version 2 26 * @package search 27 */ 28 class fulltext_native extends search_backend 29 { 30 var $stats = array(); 31 var $word_length = array(); 32 var $search_query; 33 var $common_words = array(); 34 35 var $must_contain_ids = array(); 36 var $must_not_contain_ids = array(); 37 var $must_exclude_one_ids = array(); 38 39 /** 40 * Initialises the fulltext_native search backend with min/max word length and makes sure the UTF-8 normalizer is loaded. 41 * 42 * @param boolean|string $error is passed by reference and should either be set to false on success or an error message on failure. 43 * 44 * @access public 45 */ 46 function fulltext_native(&$error) 47 { 48 global $phpbb_root_path, $phpEx, $config; 49 50 $this->word_length = array('min' => $config['fulltext_native_min_chars'], 'max' => $config['fulltext_native_max_chars']); 51 52 /** 53 * Load the UTF tools 54 */ 55 if (!class_exists('utf_normalizer')) 56 { 57 include($phpbb_root_path . 'includes/utf/utf_normalizer.' . $phpEx); 58 } 59 60 61 $error = false; 62 } 63 64 /** 65 * This function fills $this->search_query with the cleaned user search query. 66 * 67 * If $terms is 'any' then the words will be extracted from the search query 68 * and combined with | inside brackets. They will afterwards be treated like 69 * an standard search query. 70 * 71 * Then it analyses the query and fills the internal arrays $must_not_contain_ids, 72 * $must_contain_ids and $must_exclude_one_ids which are later used by keyword_search(). 73 * 74 * @param string $keywords contains the search query string as entered by the user 75 * @param string $terms is either 'all' (use search query as entered, default words to 'must be contained in post') 76 * or 'any' (find all posts containing at least one of the given words) 77 * @return boolean false if no valid keywords were found and otherwise true 78 * 79 * @access public 80 */ 81 function split_keywords($keywords, $terms) 82 { 83 global $db, $config, $user; 84 85 $keywords = trim($this->cleanup($keywords, '+-|()*')); 86 87 // allow word|word|word without brackets 88 if ((strpos($keywords, ' ') === false) && (strpos($keywords, '|') !== false) && (strpos($keywords, '(') === false)) 89 { 90 $keywords = '(' . $keywords . ')'; 91 } 92 93 $open_bracket = $space = false; 94 for ($i = 0, $n = strlen($keywords); $i < $n; $i++) 95 { 96 if ($open_bracket !== false) 97 { 98 switch ($keywords[$i]) 99 { 100 case ')': 101 if ($open_bracket + 1 == $i) 102 { 103 $keywords[$i - 1] = '|'; 104 $keywords[$i] = '|'; 105 } 106 $open_bracket = false; 107 break; 108 case '(': 109 $keywords[$i] = '|'; 110 break; 111 case '+': 112 case '-': 113 case ' ': 114 $keywords[$i] = '|'; 115 break; 116 } 117 } 118 else 119 { 120 switch ($keywords[$i]) 121 { 122 case ')': 123 $keywords[$i] = ' '; 124 break; 125 case '(': 126 $open_bracket = $i; 127 break; 128 case '|': 129 $keywords[$i] = ' '; 130 break; 131 case '-': 132 case '+': 133 $space = $keywords[$i]; 134 break; 135 case ' ': 136 if ($space !== false) 137 { 138 $keywords[$i] = $space; 139 } 140 break; 141 default: 142 $space = false; 143 } 144 } 145 } 146 147 if ($open_bracket) 148 { 149 $keywords .= ')'; 150 } 151 152 $match = array( 153 '# +#', 154 '#\|\|+#', 155 '#(\+|\-)(?:\+|\-)+#', 156 '#\(\|#', 157 '#\|\)#', 158 ); 159 $replace = array( 160 ' ', 161 '|', 162 '$1', 163 '(', 164 ')', 165 ); 166 167 $keywords = preg_replace($match, $replace, $keywords); 168 169 // $keywords input format: each word seperated by a space, words in a bracket are not seperated 170 171 // the user wants to search for any word, convert the search query 172 if ($terms == 'any') 173 { 174 $words = array(); 175 176 preg_match_all('#([^\\s+\\-|()]+)(?:$|[\\s+\\-|()])#', $keywords, $words); 177 if (sizeof($words[1])) 178 { 179 $keywords = '(' . implode('|', $words[1]) . ')'; 180 } 181 } 182 183 // set the search_query which is shown to the user 184 $this->search_query = $keywords; 185 186 $exact_words = array(); 187 preg_match_all('#([^\\s+\\-|*()]+)(?:$|[\\s+\\-|()])#', $keywords, $exact_words); 188 $exact_words = $exact_words[1]; 189 190 if (sizeof($exact_words)) 191 { 192 $sql = 'SELECT word_id, word_text, word_common 193 FROM ' . SEARCH_WORDLIST_TABLE . ' 194 WHERE ' . $db->sql_in_set('word_text', $exact_words); 195 $result = $db->sql_query($sql); 196 197 // store an array of words and ids, remove common words 198 while ($row = $db->sql_fetchrow($result)) 199 { 200 if ($row['word_common']) 201 { 202 $this->common_words[] = $row['word_text']; 203 continue; 204 } 205 206 $words[$row['word_text']] = (int) $row['word_id']; 207 } 208 $db->sql_freeresult($result); 209 } 210 unset($exact_words); 211 212 // now analyse the search query, first split it using the spaces 213 $query = explode(' ', $keywords); 214 215 $this->must_contain_ids = array(); 216 $this->must_not_contain_ids = array(); 217 $this->must_exclude_one_ids = array(); 218 219 $mode = ''; 220 $ignore_no_id = true; 221 222 foreach ($query as $word) 223 { 224 if (empty($word)) 225 { 226 continue; 227 } 228 229 // words which should not be included 230 if ($word[0] == '-') 231 { 232 $word = substr($word, 1); 233 234 // a group of which at least one may not be in the resulting posts 235 if ($word[0] == '(') 236 { 237 $word = array_unique(explode('|', substr($word, 1, -1))); 238 $mode = 'must_exclude_one'; 239 } 240 // one word which should not be in the resulting posts 241 else 242 { 243 $mode = 'must_not_contain'; 244 } 245 $ignore_no_id = true; 246 } 247 // words which have to be included 248 else 249 { 250 // no prefix is the same as a +prefix 251 if ($word[0] == '+') 252 { 253 $word = substr($word, 1); 254 } 255 256 // a group of words of which at least one word should be in every resulting post 257 if ($word[0] == '(') 258 { 259 $word = array_unique(explode('|', substr($word, 1, -1))); 260 } 261 $ignore_no_id = false; 262 $mode = 'must_contain'; 263 } 264 265 if (empty($word)) 266 { 267 continue; 268 } 269 270 // if this is an array of words then retrieve an id for each 271 if (is_array($word)) 272 { 273 $id_words = array(); 274 foreach ($word as $i => $word_part) 275 { 276 if (strpos($word_part, '*') !== false) 277 { 278 $id_words[] = '\'' . $db->sql_escape(str_replace('*', '%', $word_part)) . '\''; 279 } 280 if (isset($words[$word_part])) 281 { 282 $id_words[] = $words[$word_part]; 283 } 284 } 285 if (sizeof($id_words)) 286 { 287 sort($id_words); 288 if (sizeof($id_words) > 1) 289 { 290 $this->{$mode . '_ids'}[] = $id_words; 291 } 292 else 293 { 294 $mode = ($mode == 'must_exclude_one') ? 'must_not_contain' : $mode; 295 $this->{$mode . '_ids'}[] = $id_words[0]; 296 } 297 } 298 // throw an error if we shall not ignore unexistant words 299 else if (!$ignore_no_id) 300 { 301 trigger_error(sprintf($user->lang['WORDS_IN_NO_POST'], implode(', ', $word))); 302 } 303 } 304 // else we only need one id 305 else if (($wildcard = strpos($word, '*') !== false) || isset($words[$word])) 306 { 307 if ($wildcard) 308 { 309 $this->{$mode . '_ids'}[] = '\'' . $db->sql_escape(str_replace('*', '%', $word)) . '\''; 310 } 311 else 312 { 313 $this->{$mode . '_ids'}[] = $words[$word]; 314 } 315 } 316 // throw an error if we shall not ignore unexistant words 317 else if (!$ignore_no_id) 318 { 319 trigger_error(sprintf($user->lang['WORD_IN_NO_POST'], $word)); 320 } 321 } 322 323 // we can't search for negatives only 324 if (!sizeof($this->must_contain_ids)) 325 { 326 return false; 327 } 328 329 sort($this->must_contain_ids); 330 sort($this->must_not_contain_ids); 331 sort($this->must_exclude_one_ids); 332 333 if (!empty($this->search_query)) 334 { 335 return true; 336 } 337 return false; 338 } 339 340 /** 341 * Performs a search on keywords depending on display specific params. You have to run split_keywords() first. 342 * 343 * @param string $type contains either posts or topics depending on what should be searched for 344 * @param string $fields contains either titleonly (topic titles should be searched), msgonly (only message bodies should be searched), firstpost (only subject and body of the first post should be searched) or all (all post bodies and subjects should be searched) 345 * @param string $terms is either 'all' (use query as entered, words without prefix should default to "have to be in field") or 'any' (ignore search query parts and just return all posts that contain any of the specified words) 346 * @param array $sort_by_sql contains SQL code for the ORDER BY part of a query 347 * @param string $sort_key is the key of $sort_by_sql for the selected sorting 348 * @param string $sort_dir is either a or d representing ASC and DESC 349 * @param string $sort_days specifies the maximum amount of days a post may be old 350 * @param array $ex_fid_ary specifies an array of forum ids which should not be searched 351 * @param array $m_approve_fid_ary specifies an array of forum ids in which the searcher is allowed to view unapproved posts 352 * @param int $topic_id is set to 0 or a topic id, if it is not 0 then only posts in this topic should be searched 353 * @param array $author_ary an array of author ids if the author should be ignored during the search the array is empty 354 * @param array $id_ary passed by reference, to be filled with ids for the page specified by $start and $per_page, should be ordered 355 * @param int $start indicates the first index of the page 356 * @param int $per_page number of ids each page is supposed to contain 357 * @return boolean|int total number of results 358 * 359 * @access public 360 */ 361 function keyword_search($type, &$fields, &$terms, &$sort_by_sql, &$sort_key, &$sort_dir, &$sort_days, &$ex_fid_ary, &$m_approve_fid_ary, &$topic_id, &$author_ary, &$id_ary, $start, $per_page) 362 { 363 global $config, $db; 364 365 // No keywords? No posts. 366 if (empty($this->search_query)) 367 { 368 return false; 369 } 370 371 // generate a search_key from all the options to identify the results 372 $search_key = md5(implode('#', array( 373 serialize($this->must_contain_ids), 374 serialize($this->must_not_contain_ids), 375 serialize($this->must_exclude_one_ids), 376 $type, 377 $fields, 378 $terms, 379 $sort_days, 380 $sort_key, 381 $topic_id, 382 implode(',', $ex_fid_ary), 383 implode(',', $m_approve_fid_ary), 384 implode(',', $author_ary) 385 ))); 386 387 // try reading the results from cache 388 $total_results = 0; 389 if ($this->obtain_ids($search_key, $total_results, $id_ary, $start, $per_page, $sort_dir) == SEARCH_RESULT_IN_CACHE) 390 { 391 return $total_results; 392 } 393 394 $id_ary = array(); 395 396 $sql_where = array(); 397 $group_by = false; 398 $m_num = 0; 399 $w_num = 0; 400 401 $sql_array = array( 402 'SELECT' => ($type == 'posts') ? 'p.post_id' : 'p.topic_id', 403 'FROM' => array( 404 SEARCH_WORDMATCH_TABLE => array(), 405 SEARCH_WORDLIST_TABLE => array(), 406 POSTS_TABLE => 'p' 407 ), 408 'LEFT_JOIN' => array() 409 ); 410 $sql_where[] = 'm0.post_id = p.post_id'; 411 412 $title_match = ''; 413 $group_by = true; 414 // Build some display specific sql strings 415 switch ($fields) 416 { 417 case 'titleonly': 418 $title_match = 'title_match = 1'; 419 $group_by = false; 420 // no break 421 case 'firstpost': 422 $sql_array['FROM'][TOPICS_TABLE] = 't'; 423 $sql_where[] = 'p.post_id = t.topic_first_post_id'; 424 break; 425 426 case 'msgonly': 427 $title_match = 'title_match = 0'; 428 $group_by = false; 429 break; 430 } 431 432 if ($type == 'topics') 433 { 434 if (!isset($sql_array['FROM'][TOPICS_TABLE])) 435 { 436 $sql_array['FROM'][TOPICS_TABLE] = 't'; 437 $sql_where[] = 'p.topic_id = t.topic_id'; 438 } 439 $group_by = true; 440 } 441 442 /** 443 * @todo Add a query optimizer (handle stuff like "+(4|3) +4") 444 */ 445 446 foreach ($this->must_contain_ids as $subquery) 447 { 448 if (is_array($subquery)) 449 { 450 $group_by = true; 451 452 $word_id_sql = array(); 453 $word_ids = array(); 454 foreach ($subquery as $id) 455 { 456 if (is_string($id)) 457 { 458 $sql_array['LEFT_JOIN'][] = array( 459 'FROM' => array(SEARCH_WORDLIST_TABLE => 'w' . $w_num), 460 'ON' => "w$w_num.word_text LIKE $id" 461 ); 462 $word_ids[] = "w$w_num.word_id"; 463 464 $w_num++; 465 } 466 else 467 { 468 $word_ids[] = $id; 469 } 470 } 471 472 $sql_where[] = $db->sql_in_set("m$m_num.word_id", $word_ids); 473 474 unset($word_id_sql); 475 unset($word_ids); 476 } 477 else if (is_string($subquery)) 478 { 479 $sql_array['FROM'][SEARCH_WORDLIST_TABLE][] = 'w' . $w_num; 480 481 $sql_where[] = "w$w_num.word_text LIKE $subquery"; 482 $sql_where[] = "m$m_num.word_id = w$w_num.word_id"; 483 484 $group_by = true; 485 $w_num++; 486 } 487 else 488 { 489 $sql_where[] = "m$m_num.word_id = $subquery"; 490 } 491 492 $sql_array['FROM'][SEARCH_WORDMATCH_TABLE][] = 'm' . $m_num; 493 494 if ($title_match) 495 { 496 $sql_where[] = "m$m_num.$title_match"; 497 } 498 499 if ($m_num != 0) 500 { 501 $sql_where[] = "m$m_num.post_id = m0.post_id"; 502 } 503 $m_num++; 504 } 505 506 foreach ($this->must_not_contain_ids as $key => $subquery) 507 { 508 if (is_string($subquery)) 509 { 510 $sql_array['LEFT_JOIN'][] = array( 511 'FROM' => array(SEARCH_WORDLIST_TABLE => 'w' . $w_num), 512 'ON' => "w$w_num.word_text LIKE $subquery" 513 ); 514 515 $this->must_not_contain_ids[$key] = "w$w_num.word_id"; 516 517 $group_by = true; 518 $w_num++; 519 } 520 } 521 522 if (sizeof($this->must_not_contain_ids)) 523 { 524 $sql_array['LEFT_JOIN'][] = array( 525 'FROM' => array(SEARCH_WORDMATCH_TABLE => 'm' . $m_num), 526 'ON' => $db->sql_in_set("m$m_num.word_id", $this->must_not_contain_ids) . (($title_match) ? " AND m$m_num.$title_match" : '') . " AND m$m_num.post_id = m0.post_id" 527 ); 528 529 $sql_where[] = "m$m_num.word_id IS NULL"; 530 $m_num++; 531 } 532 533 foreach ($this->must_exclude_one_ids as $ids) 534 { 535 $is_null_joins = array(); 536 foreach ($ids as $id) 537 { 538 if (is_string($id)) 539 { 540 $sql_array['LEFT_JOIN'][] = array( 541 'FROM' => array(SEARCH_WORDLIST_TABLE => 'w' . $w_num), 542 'ON' => "w$w_num.word_text LIKE $id" 543 ); 544 $id = "w$w_num.word_id"; 545 546 $group_by = true; 547 $w_num++; 548 } 549 550 $sql_array['LEFT_JOIN'][] = array( 551 'FROM' => array(SEARCH_WORDMATCH_TABLE => 'm' . $m_num), 552 'ON' => "m$m_num.word_id = $id AND m$m_num.post_id = m0.post_id" . (($title_match) ? " AND m$m_num.$title_match" : '') 553 ); 554 $is_null_joins[] = "m$m_num.word_id IS NULL"; 555 556 $m_num++; 557 } 558 $sql_where[] = '(' . implode(' OR ', $is_null_joins) . ')'; 559 } 560 561 if (!sizeof($m_approve_fid_ary)) 562 { 563 $sql_where[] = 'p.post_approved = 1'; 564 } 565 else if ($m_approve_fid_ary !== array(-1)) 566 { 567 $sql_where[] = '(p.post_approved = 1 OR ' . $db->sql_in_set('p.forum_id', $m_approve_fid_ary, true) . ')'; 568 } 569 570 if ($topic_id) 571 { 572 $sql_where[] = 'p.topic_id = ' . $topic_id; 573 } 574 575 if (sizeof($author_ary)) 576 { 577 $sql_where[] = $db->sql_in_set('p.poster_id', $author_ary); 578 } 579 580 if (sizeof($ex_fid_ary)) 581 { 582 $sql_where[] = $db->sql_in_set('p.forum_id', $ex_fid_ary, true); 583 } 584 585 if ($sort_days) 586 { 587 $sql_where[] = 'p.post_time >= ' . (time() - ($sort_days * 86400)); 588 } 589 590 $sql_array['WHERE'] = implode(' AND ', $sql_where); 591 592 $is_mysql = false; 593 // if the total result count is not cached yet, retrieve it from the db 594 if (!$total_results) 595 { 596 $sql = ''; 597 $sql_array_count = $sql_array; 598 599 switch ($db->sql_layer) 600 { 601 case 'mysql4': 602 case 'mysqli': 603 604 // 3.x does not support SQL_CALC_FOUND_ROWS 605 $sql_array['SELECT'] = 'SQL_CALC_FOUND_ROWS ' . $sql_array['SELECT']; 606 $is_mysql = true; 607 608 break; 609 610 case 'sqlite': 611 $sql_array_count['SELECT'] = ($type == 'posts') ? 'DISTINCT p.post_id' : 'DISTINCT p.topic_id'; 612 $sql = 'SELECT COUNT(' . (($type == 'posts') ? 'post_id' : 'topic_id') . ') as total_results 613 FROM (' . $db->sql_build_query('SELECT', $sql_array_count) . ')'; 614 615 // no break 616 617 default: 618 $sql_array_count['SELECT'] = ($type == 'posts') ? 'COUNT(DISTINCT p.post_id) AS total_results' : 'COUNT(DISTINCT p.topic_id) AS total_results'; 619 $sql = (!$sql) ? $db->sql_build_query('SELECT', $sql_array_count) : $sql; 620 621 $result = $db->sql_query($sql); 622 $total_results = (int) $db->sql_fetchfield('total_results'); 623 $db->sql_freeresult($result); 624 625 if (!$total_results) 626 { 627 return false; 628 } 629 break; 630 } 631 632 unset($sql_array_count, $sql); 633 } 634 635 // Build sql strings for sorting 636 $sql_sort = $sort_by_sql[$sort_key] . (($sort_dir == 'a') ? ' ASC' : ' DESC'); 637 638 switch ($sql_sort[0]) 639 { 640 case 'u': 641 $sql_array['FROM'][USERS_TABLE] = 'u'; 642 $sql_where[] = 'u.user_id = p.poster_id '; 643 break; 644 645 case 't': 646 if (!isset($sql_array['FROM'][TOPICS_TABLE])) 647 { 648 $sql_array['FROM'][TOPICS_TABLE] = 't'; 649 $sql_where[] = 'p.topic_id = t.topic_id'; 650 } 651 break; 652 653 case 'f': 654 $sql_array['FROM'][FORUMS_TABLE] = 'f'; 655 $sql_where[] = 'f.forum_id = p.forum_id'; 656 break; 657 } 658 659 $sql_array['WHERE'] = implode(' AND ', $sql_where); 660 $sql_array['GROUP_BY'] = ($group_by) ? (($type == 'posts') ? 'p.post_id' : 'p.topic_id') . ', ' . $sort_by_sql[$sort_key] : ''; 661 $sql_array['ORDER_BY'] = $sql_sort; 662 663 unset($sql_where, $sql_sort, $group_by); 664 665 $sql = $db->sql_build_query('SELECT', $sql_array); 666 $result = $db->sql_query_limit($sql, $config['search_block_size'], $start); 667 668 while ($row = $db->sql_fetchrow($result)) 669 { 670 $id_ary[] = $row[(($type == 'posts') ? 'post_id' : 'topic_id')]; 671 } 672 $db->sql_freeresult($result); 673 674 if (!sizeof($id_ary)) 675 { 676 return false; 677 } 678 679 // if we use mysql and the total result count is not cached yet, retrieve it from the db 680 if (!$total_results && $is_mysql) 681 { 682 $sql = 'SELECT FOUND_ROWS() as total_results'; 683 $result = $db->sql_query($sql); 684 $total_results = (int) $db->sql_fetchfield('total_results'); 685 $db->sql_freeresult($result); 686 687 if (!$total_results) 688 { 689 return false; 690 } 691 } 692 693 // store the ids, from start on then delete anything that isn't on the current page because we only need ids for one page 694 $this->save_ids($search_key, $this->search_query, $author_ary, $total_results, $id_ary, $start, $sort_dir); 695 $id_ary = array_slice($id_ary, 0, (int) $per_page); 696 697 return $total_results; 698 } 699 700 /** 701 * Performs a search on an author's posts without caring about message contents. Depends on display specific params 702 * 703 * @param string $type contains either posts or topics depending on what should be searched for 704 * @param array $sort_by_sql contains SQL code for the ORDER BY part of a query 705 * @param string $sort_key is the key of $sort_by_sql for the selected sorting 706 * @param string $sort_dir is either a or d representing ASC and DESC 707 * @param string $sort_days specifies the maximum amount of days a post may be old 708 * @param array $ex_fid_ary specifies an array of forum ids which should not be searched 709 * @param array $m_approve_fid_ary specifies an array of forum ids in which the searcher is allowed to view unapproved posts 710 * @param int $topic_id is set to 0 or a topic id, if it is not 0 then only posts in this topic should be searched 711 * @param array $author_ary an array of author ids 712 * @param array $id_ary passed by reference, to be filled with ids for the page specified by $start and $per_page, should be ordered 713 * @param int $start indicates the first index of the page 714 * @param int $per_page number of ids each page is supposed to contain 715 * @return boolean|int total number of results 716 * 717 * @access public 718 */ 719 function author_search($type, &$sort_by_sql, &$sort_key, &$sort_dir, &$sort_days, &$ex_fid_ary, &$m_approve_fid_ary, &$topic_id, &$author_ary, &$id_ary, $start, $per_page) 720 { 721 global $config, $db; 722 723 // No author? No posts. 724 if (!sizeof($author_ary)) 725 { 726 return 0; 727 } 728 729 // generate a search_key from all the options to identify the results 730 $search_key = md5(implode('#', array( 731 '', 732 $type, 733 '', 734 '', 735 $sort_days, 736 $sort_key, 737 $topic_id, 738 implode(',', $ex_fid_ary), 739 implode(',', $m_approve_fid_ary), 740 implode(',', $author_ary) 741 ))); 742 743 // try reading the results from cache 744 $total_results = 0; 745 if ($this->obtain_ids($search_key, $total_results, $id_ary, $start, $per_page, $sort_dir) == SEARCH_RESULT_IN_CACHE) 746 { 747 return $total_results; 748 } 749 750 $id_ary = array(); 751 752 // Create some display specific sql strings 753 $sql_author = $db->sql_in_set('p.poster_id', $author_ary); 754 $sql_fora = (sizeof($ex_fid_ary)) ? ' AND ' . $db->sql_in_set('p.forum_id', $ex_fid_ary, true) : ''; 755 $sql_time = ($sort_days) ? ' AND p.post_time >= ' . (time() - ($sort_days * 86400)) : ''; 756 $sql_topic_id = ($topic_id) ? ' AND p.topic_id = ' . (int) $topic_id : ''; 757 758 // Build sql strings for sorting 759 $sql_sort = $sort_by_sql[$sort_key] . (($sort_dir == 'a') ? ' ASC' : ' DESC'); 760 $sql_sort_table = $sql_sort_join = ''; 761 switch ($sql_sort[0]) 762 { 763 case 'u': 764 $sql_sort_table = USERS_TABLE . ' u, '; 765 $sql_sort_join = ' AND u.user_id = p.poster_id '; 766 break; 767 768 case 't': 769 $sql_sort_table = ($type == 'posts') ? TOPICS_TABLE . ' t, ' : ''; 770 $sql_sort_join = ($type == 'posts') ? ' AND t.topic_id = p.topic_id ' : ''; 771 break; 772 773 case 'f': 774 $sql_sort_table = FORUMS_TABLE . ' f, '; 775 $sql_sort_join = ' AND f.forum_id = p.forum_id '; 776 break; 777 } 778 779 if (!sizeof($m_approve_fid_ary)) 780 { 781 $m_approve_fid_sql = ' AND p.post_approved = 1'; 782 } 783 else if ($m_approve_fid_ary == array(-1)) 784 { 785 $m_approve_fid_sql = ''; 786 } 787 else 788 { 789 $m_approve_fid_sql = ' AND (p.post_approved = 1 OR ' . $db->sql_in_set('p.forum_id', $m_approve_fid_ary, true) . ')'; 790 } 791 792 $select = ($type == 'posts') ? 'p.post_id' : 't.topic_id'; 793 $is_mysql = false; 794 795 // If the cache was completely empty count the results 796 if (!$total_results) 797 { 798 switch ($db->sql_layer) 799 { 800 case 'mysql4': 801 case 'mysqli': 802 $select = 'SQL_CALC_FOUND_ROWS ' . $select; 803 $is_mysql = true; 804 break; 805 806 default: 807 if ($type == 'posts') 808 { 809 $sql = 'SELECT COUNT(p.post_id) as total_results 810 FROM ' . POSTS_TABLE . " p 811 WHERE $sql_author 812 $sql_topic_id 813 $m_approve_fid_sql 814 $sql_fora 815 $sql_time"; 816 } 817 else 818 { 819 if ($db->sql_layer == 'sqlite') 820 { 821 $sql = 'SELECT COUNT(topic_id) as total_results 822 FROM (SELECT DISTINCT t.topic_id'; 823 } 824 else 825 { 826 $sql = 'SELECT COUNT(DISTINCT t.topic_id) as total_results'; 827 } 828 829 $sql .= ' FROM ' . TOPICS_TABLE . ' t, ' . POSTS_TABLE . " p 830 WHERE $sql_author 831 $sql_topic_id 832 $m_approve_fid_sql 833 $sql_fora 834 AND t.topic_id = p.topic_id 835 $sql_time" . (($db->sql_layer == 'sqlite') ? ')' : ''); 836 } 837 $result = $db->sql_query($sql); 838 839 $total_results = (int) $db->sql_fetchfield('total_results'); 840 $db->sql_freeresult($result); 841 842 if (!$total_results) 843 { 844 return false; 845 } 846 break; 847 } 848 } 849 850 // Build the query for really selecting the post_ids 851 if ($type == 'posts') 852 { 853 $sql = "SELECT $select 854 FROM " . $sql_sort_table . POSTS_TABLE . ' p' . (($topic_id) ? ', ' . TOPICS_TABLE . ' t' : '') . " 855 WHERE $sql_author 856 $sql_topic_id 857 $m_approve_fid_sql 858 $sql_fora 859 $sql_sort_join 860 $sql_time 861 ORDER BY $sql_sort"; 862 $field = 'post_id'; 863 } 864 else 865 { 866 $sql = "SELECT $select 867 FROM " . $sql_sort_table . TOPICS_TABLE . ' t, ' . POSTS_TABLE . " p 868 WHERE $sql_author 869 $sql_topic_id 870 $m_approve_fid_sql 871 $sql_fora 872 AND t.topic_id = p.topic_id 873 $sql_sort_join 874 $sql_time 875 GROUP BY t.topic_id, " . $sort_by_sql[$sort_key] . ' 876 ORDER BY ' . $sql_sort; 877 $field = 'topic_id'; 878 } 879 880 // Only read one block of posts from the db and then cache it 881 $result = $db->sql_query_limit($sql, $config['search_block_size'], $start); 882 883 while ($row = $db->sql_fetchrow($result)) 884 { 885 $id_ary[] = $row[$field]; 886 } 887 $db->sql_freeresult($result); 888 889 if (!$total_results && $is_mysql) 890 { 891 $sql = 'SELECT FOUND_ROWS() as total_results'; 892 $result = $db->sql_query($sql); 893 $total_results = (int) $db->sql_fetchfield('total_results'); 894 $db->sql_freeresult($result); 895 896 if (!$total_results) 897 { 898 return false; 899 } 900 } 901 902 if (sizeof($id_ary)) 903 { 904 $this->save_ids($search_key, '', $author_ary, $total_results, $id_ary, $start, $sort_dir); 905 $id_ary = array_slice($id_ary, 0, $per_page); 906 907 return $total_results; 908 } 909 return false; 910 } 911 912 /** 913 * Split a text into words of a given length 914 * 915 * The text is converted to UTF-8, cleaned up, and split. Then, words that 916 * conform to the defined length range are returned in an array. 917 * 918 * NOTE: duplicates are NOT removed from the return array 919 * 920 * @param string $text Text to split, encoded in UTF-8 921 * @return array Array of UTF-8 words 922 * 923 * @access private 924 */ 925 function split_message($text) 926 { 927 global $phpbb_root_path, $phpEx; 928 global $config, $user; 929 930 $match = $words = array(); 931 932 /** 933 * Taken from the original code 934 */ 935 // Do not index code 936 $match[] = '#\[code(?:=.*?)?(\:?[0-9a-z]{5,})\].*?\[\/code(\:?[0-9a-z]{5,})\]#is'; 937 // BBcode 938 $match[] = '#\[\/?[a-z0-9\*\+\-]+(?:=.*?)?(?::[a-z])?(\:?[0-9a-z]{5,})\]#'; 939 940 $min = $config['fulltext_native_min_chars']; 941 $max = $config['fulltext_native_max_chars']; 942 943 $isset_min = $min - 1; 944 945 /** 946 * Clean up the string, remove HTML tags, remove BBCodes 947 */ 948 $word = strtok($this->cleanup(preg_replace($match, ' ', strip_tags($text)), -1), ' '); 949 950 while (isset($word[0])) 951 { 952 if (isset($word[255]) 953 || !isset($word[$isset_min])) 954 { 955 /** 956 * Words longer than 255 bytes are ignored. This will have to be 957 * changed whenever we change the length of search_wordlist.word_text 958 * 959 * Words shorter than $isset_min bytes are ignored, too 960 */ 961 $word = strtok(' '); 962 continue; 963 } 964 965 $len = utf8_strlen($word); 966 967 /** 968 * Test whether the word is too short to be indexed. 969 * 970 * Note that this limit does NOT apply to CJK and Hangul 971 */ 972 if ($len < $min) 973 { 974 /** 975 * Note: this could be optimized. If the codepoint is lower than Hangul's range 976 * we know that it will also be lower than CJK ranges 977 */ 978 if ((strncmp($word, UTF8_HANGUL_FIRST, 3) < 0 || strncmp($word, UTF8_HANGUL_LAST, 3) > 0) 979 && (strncmp($word, UTF8_CJK_FIRST, 3) < 0 || strncmp($word, UTF8_CJK_LAST, 3) > 0) 980 && (strncmp($word, UTF8_CJK_B_FIRST, 4) < 0 || strncmp($word, UTF8_CJK_B_LAST, 4) > 0)) 981 { 982 $word = strtok(' '); 983 continue; 984 } 985 } 986 987 $words[] = $word; 988 $word = strtok(' '); 989 } 990 991 return $words; 992 } 993 994 /** 995 * Updates wordlist and wordmatch tables when a message is posted or changed 996 * 997 * @param string $mode Contains the post mode: edit, post, reply, quote 998 * @param int $post_id The id of the post which is modified/created 999 * @param string $message New or updated post content 1000 * @param string $subject New or updated post subject 1001 * @param int $poster_id Post author's user id 1002 * @param int $forum_id The id of the forum in which the post is located 1003 * 1004 * @access public 1005 */ 1006 function index($mode, $post_id, &$message, &$subject, $poster_id, $forum_id) 1007 { 1008 global $config, $db, $user; 1009 1010 if (!$config['fulltext_native_load_upd']) 1011 { 1012 /** 1013 * The search indexer is disabled, return 1014 */ 1015 return; 1016 } 1017 1018 // Split old and new post/subject to obtain array of 'words' 1019 $split_text = $this->split_message($message); 1020 $split_title = $this->split_message($subject); 1021 1022 $cur_words = array('post' => array(), 'title' => array()); 1023 1024 $words = array(); 1025 if ($mode == 'edit') 1026 { 1027 $words['add']['post'] = array(); 1028 $words['add']['title'] = array(); 1029 $words['del']['post'] = array(); 1030 $words['del']['title'] = array(); 1031 1032 $sql = 'SELECT w.word_id, w.word_text, m.title_match 1033 FROM ' . SEARCH_WORDLIST_TABLE . ' w, ' . SEARCH_WORDMATCH_TABLE . " m 1034 WHERE m.post_id = $post_id 1035 AND w.word_id = m.word_id"; 1036 $result = $db->sql_query($sql); 1037 1038 while ($row = $db->sql_fetchrow($result)) 1039 { 1040 $which = ($row['title_match']) ? 'title' : 'post'; 1041 $cur_words[$which][$row['word_text']] = $row['word_id']; 1042 } 1043 $db->sql_freeresult($result); 1044 1045 $words['add']['post'] = array_diff($split_text, array_keys($cur_words['post'])); 1046 $words['add']['title'] = array_diff($split_title, array_keys($cur_words['title'])); 1047 $words['del']['post'] = array_diff(array_keys($cur_words['post']), $split_text); 1048 $words['del']['title'] = array_diff(array_keys($cur_words['title']), $split_title); 1049 } 1050 else 1051 { 1052 $words['add']['post'] = $split_text; 1053 $words['add']['title'] = $split_title; 1054 $words['del']['post'] = array(); 1055 $words['del']['title'] = array(); 1056 } 1057 unset($split_text); 1058 unset($split_title); 1059 1060 // Get unique words from the above arrays 1061 $unique_add_words = array_unique(array_merge($words['add']['post'], $words['add']['title'])); 1062 1063 // We now have unique arrays of all words to be added and removed and 1064 // individual arrays of added and removed words for text and title. What 1065 // we need to do now is add the new words (if they don't already exist) 1066 // and then add (or remove) matches between the words and this post 1067 if (sizeof($unique_add_words)) 1068 { 1069 $sql = 'SELECT word_id, word_text 1070 FROM ' . SEARCH_WORDLIST_TABLE . ' 1071 WHERE ' . $db->sql_in_set('word_text', $unique_add_words); 1072 $result = $db->sql_query($sql); 1073 1074 $word_ids = array(); 1075 while ($row = $db->sql_fetchrow($result)) 1076 { 1077 $word_ids[$row['word_text']] = $row['word_id']; 1078 } 1079 $db->sql_freeresult($result); 1080 1081 $new_words = array_diff($unique_add_words, array_keys($word_ids)); 1082 1083 if (sizeof($new_words)) 1084 { 1085 $sql_ary = array(); 1086 1087 foreach ($new_words as $word) 1088 { 1089 $sql_ary[] = array('word_text' => $word); 1090 } 1091 1092 $db->sql_multi_insert(SEARCH_WORDLIST_TABLE, $sql_ary); 1093 } 1094 unset($new_words, $sql_ary); 1095 } 1096 1097 // now update the search match table, remove links to removed words and add links to new words 1098 foreach ($words['del'] as $word_in => $word_ary) 1099 { 1100 $title_match = ($word_in == 'title') ? 1 : 0; 1101 1102 if (sizeof($word_ary)) 1103 { 1104 $sql_in = array(); 1105 foreach ($word_ary as $word) 1106 { 1107 $sql_in[] = $cur_words[$word_in][$word]; 1108 } 1109 1110 $sql = 'DELETE FROM ' . SEARCH_WORDMATCH_TABLE . ' 1111 WHERE ' . $db->sql_in_set('word_id', $sql_in) . ' 1112 AND post_id = ' . intval($post_id) . " 1113 AND title_match = $title_match"; 1114 $db->sql_query($sql); 1115 unset($sql_in); 1116 } 1117 } 1118 1119 foreach ($words['add'] as $word_in => $word_ary) 1120 { 1121 $title_match = ($word_in == 'title') ? 1 : 0; 1122 1123 if (sizeof($word_ary)) 1124 { 1125 $sql = 'INSERT INTO ' . SEARCH_WORDMATCH_TABLE . " (post_id, word_id, title_match) 1126 SELECT $post_id, word_id, $title_match 1127 FROM " . SEARCH_WORDLIST_TABLE . ' 1128 WHERE ' . $db->sql_in_set('word_text', $word_ary); 1129 $db->sql_query($sql); 1130 } 1131 } 1132 1133 // destroy cached search results containing any of the words removed or added 1134 $this->destroy_cache(array_unique(array_merge($words['add']['post'], $words['add']['title'], $words['del']['post'], $words['del']['post'])), array($poster_id)); 1135 1136 unset($unique_add_words); 1137 unset($words); 1138 unset($cur_words); 1139 } 1140 1141 /** 1142 * Removes entries from the wordmatch table for the specified post_ids 1143 */ 1144 function index_remove($post_ids, $author_ids, $forum_ids) 1145 { 1146 global $db; 1147 1148 if (sizeof($post_ids)) 1149 { 1150 $sql = 'DELETE FROM ' . SEARCH_WORDMATCH_TABLE . ' 1151 WHERE ' . $db->sql_in_set('post_id', $post_ids); 1152 $db->sql_query($sql); 1153 } 1154 1155 // SEARCH_WORDLIST_TABLE will be updated by tidy() 1156 $this->destroy_cache(array(), $author_ids); 1157 } 1158 1159 /** 1160 * Tidy up indexes: Tag 'common words' and remove 1161 * words no longer referenced in the match table 1162 */ 1163 function tidy() 1164 { 1165 global $db, $config; 1166 1167 // Is the fulltext indexer disabled? If yes then we need not 1168 // carry on ... it's okay ... I know when I'm not wanted boo hoo 1169 if (!$config['fulltext_native_load_upd']) 1170 { 1171 set_config('search_last_gc', time(), true); 1172 return; 1173 } 1174 1175 $destroy_cache_words = array(); 1176 1177 // Remove common (> 20% of posts ) words 1178 if ($config['num_posts'] >= 100) 1179 { 1180 // First, get the IDs of common words 1181 $sql = 'SELECT word_id 1182 FROM ' . SEARCH_WORDMATCH_TABLE . ' 1183 GROUP BY word_id 1184 HAVING COUNT(word_id) > ' . floor($config['num_posts'] * 0.2); 1185 $result = $db->sql_query($sql); 1186 1187 $sql_in = array(); 1188 while ($row = $db->sql_fetchrow($result)) 1189 { 1190 $sql_in[] = $row['word_id']; 1191 } 1192 $db->sql_freeresult($result); 1193 1194 if (sizeof($sql_in)) 1195 { 1196 // Get the text of those new common words 1197 $sql = 'SELECT word_text 1198 FROM ' . SEARCH_WORDLIST_TABLE . ' 1199 WHERE ' . $db->sql_in_set('word_id', $sql_in); 1200 $result = $db->sql_query($sql); 1201 1202 while ($row = $db->sql_fetchrow($result)) 1203 { 1204 $destroy_cache_words[] = $row['word_text']; 1205 } 1206 $db->sql_freeresult($result); 1207 1208 // Flag the words 1209 $sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . ' 1210 SET word_common = 1 1211 WHERE ' . $db->sql_in_set('word_id', $sql_in); 1212 $db->sql_query($sql); 1213 1214 // Delete the matches 1215 $sql = 'DELETE FROM ' . SEARCH_WORDMATCH_TABLE . ' 1216 WHERE ' . $db->sql_in_set('word_id', $sql_in); 1217 $db->sql_query($sql); 1218 } 1219 unset($sql_in); 1220 } 1221 1222 // destroy cached search results containing any of the words that are now common or were removed 1223 $this->destroy_cache(array_unique($destroy_cache_words)); 1224 1225 set_config('search_last_gc', time(), true); 1226 } 1227 1228 /** 1229 * Deletes all words from the index 1230 */ 1231 function delete_index($acp_module, $u_action) 1232 { 1233 global $db; 1234 1235 $db->sql_query((($db->sql_layer != 'sqlite') ? 'TRUNCATE TABLE ' : 'DELETE FROM ') . SEARCH_WORDLIST_TABLE); 1236 $db->sql_query((($db->sql_layer != 'sqlite') ? 'TRUNCATE TABLE ' : 'DELETE FROM ') . SEARCH_WORDMATCH_TABLE); 1237 $db->sql_query((($db->sql_layer != 'sqlite') ? 'TRUNCATE TABLE ' : 'DELETE FROM ') . SEARCH_RESULTS_TABLE); 1238 } 1239 1240 /** 1241 * Returns true if both FULLTEXT indexes exist 1242 */ 1243 function index_created() 1244 { 1245 if (!sizeof($this->stats)) 1246 { 1247 $this->get_stats(); 1248 } 1249 1250 return ($this->stats['total_words'] && $this->stats['total_matches']) ? true : false; 1251 } 1252 1253 /** 1254 * Returns an associative array containing information about the indexes 1255 */ 1256 function index_stats() 1257 { 1258 global $user; 1259 1260 if (!sizeof($this->stats)) 1261 { 1262 $this->get_stats(); 1263 } 1264 1265 return array( 1266 $user->lang['TOTAL_WORDS'] => $this->stats['total_words'], 1267 $user->lang['TOTAL_MATCHES'] => $this->stats['total_matches']); 1268 } 1269 1270 function get_stats() 1271 { 1272 global $db; 1273 1274 $sql = 'SELECT COUNT(*) as total_words 1275 FROM ' . SEARCH_WORDLIST_TABLE; 1276 $result = $db->sql_query($sql); 1277 $this->stats['total_words'] = (int) $db->sql_fetchfield('total_words'); 1278 $db->sql_freeresult($result); 1279 1280 $sql = 'SELECT COUNT(*) as total_matches 1281 FROM ' . SEARCH_WORDMATCH_TABLE; 1282 $result = $db->sql_query($sql); 1283 $this->stats['total_matches'] = (int) $db->sql_fetchfield('total_matches'); 1284 $db->sql_freeresult($result); 1285 } 1286 1287 /** 1288 * Clean up a text to remove non-alphanumeric characters 1289 * 1290 * This method receives a UTF-8 string, normalizes and validates it, replaces all 1291 * non-alphanumeric characters with strings then returns the result. 1292 * 1293 * Any number of "allowed chars" can be passed as a UTF-8 string in NFC. 1294 * 1295 * @param string $text Text to split, in UTF-8 (not normalized or sanitized) 1296 * @param string $allowed_chars String of special chars to allow 1297 * @param string $encoding Text encoding 1298 * @return string Cleaned up text, only alphanumeric chars are left 1299 */ 1300 function cleanup($text, $allowed_chars = null, $encoding = 'utf-8') 1301 { 1302 global $phpbb_root_path, $phpEx; 1303 static $conv = array(), $conv_loaded = array(); 1304 $words = $allow = array(); 1305 1306 /** 1307 * Convert the text to UTF-8 1308 */ 1309 $encoding = strtolower($encoding); 1310 if ($encoding != 'utf-8') 1311 { 1312 $text = utf8_recode($text, $encoding); 1313 } 1314 1315 $utf_len_mask = array( 1316 "\xC0" => 2, 1317 "\xD0" => 2, 1318 "\xE0" => 3, 1319 "\xF0" => 4 1320 ); 1321 1322 /** 1323 * Replace HTML entities and NCRs 1324 */ 1325 $text = htmlspecialchars_decode(utf8_decode_ncr($text), ENT_QUOTES); 1326 1327 /** 1328 * Load the UTF-8 normalizer 1329 * 1330 * If we use it more widely, an instance of that class should be held in a 1331 * a global variable instead 1332 */ 1333 $text = utf_normalizer::nfc($text); 1334 1335 /** 1336 * The first thing we do is: 1337 * 1338 * - convert ASCII-7 letters to lowercase 1339 * - remove the ASCII-7 non-alpha characters 1340 * - remove the bytes that should not appear in a valid UTF-8 string: 0xC0, 1341 * 0xC1 and 0xF5-0xFF 1342 * 1343 * @todo in theory, the third one is already taken care of during normalization and those chars should have been replaced by Unicode replacement chars 1344 */ 1345 $sb_match = "ISTCPAMELRDOJBNHFGVWUQKYXZ\r\n\t!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0B\x0C\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\xC0\xC1\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF"; 1346 $sb_replace = 'istcpamelrdojbnhfgvwuqkyxz '; 1347 1348 /** 1349 * This is the list of legal ASCII chars, it is automatically extended 1350 * with ASCII chars from $allowed_chars 1351 */ 1352 $legal_ascii = ' eaisntroludcpmghbfvq10xy2j9kw354867z'; 1353 1354 /** 1355 * Prepare an array containing the extra chars to allow 1356 */ 1357 if (isset($allowed_chars[0])) 1358 { 1359 $pos = 0; 1360 $len = strlen($allowed_chars); 1361 do 1362 { 1363 $c = $allowed_chars[$pos]; 1364 1365 if ($c < "\x80") 1366 { 1367 /** 1368 * ASCII char 1369 */ 1370 $sb_pos = strpos($sb_match, $c); 1371 if (is_int($sb_pos)) 1372 { 1373 /** 1374 * Remove the char from $sb_match and its corresponding 1375 * replacement in $sb_replace 1376 */ 1377 $sb_match = substr($sb_match, 0, $sb_pos) . substr($sb_match, $sb_pos + 1); 1378 $sb_replace = substr($sb_replace, 0, $sb_pos) . substr($sb_replace, $sb_pos + 1); 1379 $legal_ascii .= $c; 1380 } 1381 1382 ++$pos; 1383 } 1384 else 1385 { 1386 /** 1387 * UTF-8 char 1388 */ 1389 $utf_len = $utf_len_mask[$c & "\xF0"]; 1390 $allow[substr($allowed_chars, $pos, $utf_len)] = 1; 1391 $pos += $utf_len; 1392 } 1393 } 1394 while ($pos < $len); 1395 } 1396 1397 $text = strtr($text, $sb_match, $sb_replace); 1398 $ret = ''; 1399 1400 $pos = 0; 1401 $len = strlen($text); 1402 1403 do 1404 { 1405 /** 1406 * Do all consecutive ASCII chars at once 1407 */ 1408 if ($spn = strspn($text, $legal_ascii, $pos)) 1409 { 1410 $ret .= substr($text, $pos, $spn); 1411 $pos += $spn; 1412 } 1413 1414 if ($pos >= $len) 1415 { 1416 return $ret; 1417 } 1418 1419 /** 1420 * Capture the UTF char 1421 */ 1422 $utf_len = $utf_len_mask[$text[$pos] & "\xF0"]; 1423 $utf_char = substr($text, $pos, $utf_len); 1424 $pos += $utf_len; 1425 1426 if (($utf_char >= UTF8_HANGUL_FIRST && $utf_char <= UTF8_HANGUL_LAST) 1427 || ($utf_char >= UTF8_CJK_FIRST && $utf_char <= UTF8_CJK_LAST) 1428 || ($utf_char >= UTF8_CJK_B_FIRST && $utf_char <= UTF8_CJK_B_LAST)) 1429 { 1430 /** 1431 * All characters within these ranges are valid 1432 * 1433 * We separate them with a space in order to index each character 1434 * individually 1435 */ 1436 $ret .= ' ' . $utf_char . ' '; 1437 continue; 1438 } 1439 1440 if (isset($allow[$utf_char])) 1441 { 1442 /** 1443 * The char is explicitly allowed 1444 */ 1445 $ret .= $utf_char; 1446 continue; 1447 } 1448 1449 if (isset($conv[$utf_char])) 1450 { 1451 /** 1452 * The char is mapped to something, maybe to itself actually 1453 */ 1454 $ret .= $conv[$utf_char]; 1455 continue; 1456 } 1457 1458 /** 1459 * The char isn't mapped, but did we load its conversion table? 1460 * 1461 * The search indexer table is split into blocks. The block number of 1462 * each char is equal to its codepoint right-shifted for 11 bits. It 1463 * means that out of the 11, 16 or 21 meaningful bits of a 2-, 3- or 1464 * 4- byte sequence we only keep the leftmost 0, 5 or 10 bits. Thus, 1465 * all UTF chars encoded in 2 bytes are in the same first block. 1466 */ 1467 if (isset($utf_char[2])) 1468 { 1469 if (isset($utf_char[3])) 1470 { 1471 /** 1472 * 1111 0nnn 10nn nnnn 10nx xxxx 10xx xxxx 1473 * 0000 0111 0011 1111 0010 0000 1474 */ 1475 $idx = ((ord($utf_char[0]) & 0x07) << 7) | ((ord($utf_char[1]) & 0x3F) << 1) | ((ord($utf_char[2]) & 0x20) >> 5); 1476 } 1477 else 1478 { 1479 /** 1480 * 1110 nnnn 10nx xxxx 10xx xxxx 1481 * 0000 0111 0010 0000 1482 */ 1483 $idx = ((ord($utf_char[0]) & 0x07) << 1) | ((ord($utf_char[1]) & 0x20) >> 5); 1484 } 1485 } 1486 else 1487 { 1488 /** 1489 * 110x xxxx 10xx xxxx 1490 * 0000 0000 0000 0000 1491 */ 1492 $idx = 0; 1493 } 1494 1495 /** 1496 * Check if the required conv table has been loaded already 1497 */ 1498 if (!isset($conv_loaded[$idx])) 1499 { 1500 $conv_loaded[$idx] = 1; 1501 $file = $phpbb_root_path . 'includes/utf/data/search_indexer_' . $idx . '.' . $phpEx; 1502 1503 if (file_exists($file)) 1504 { 1505 $conv += include($file); 1506 } 1507 } 1508 1509 if (isset($conv[$utf_char])) 1510 { 1511 $ret .= $conv[$utf_char]; 1512 } 1513 else 1514 { 1515 /** 1516 * We add an entry to the conversion table so that we 1517 * don't have to convert to codepoint and perform the checks 1518 * that are above this block 1519 */ 1520 $conv[$utf_char] = ' '; 1521 $ret .= ' '; 1522 } 1523 } 1524 while (1); 1525 1526 return $ret; 1527 } 1528 1529 /** 1530 * Returns a list of options for the ACP to display 1531 */ 1532 function acp() 1533 { 1534 global $user, $config; 1535 1536 1537 /** 1538 * if we need any options, copied from fulltext_native for now, will have to be adjusted or removed 1539 */ 1540 1541 $tpl = ' 1542 <dl> 1543 <dt><label for="fulltext_native_load_upd">' . $user->lang['YES_SEARCH_UPDATE'] . ':</label><br /><span>' . $user->lang['YES_SEARCH_UPDATE_EXPLAIN'] . '</span></dt> 1544 <dd><input type="radio" id="fulltext_native_load_upd" name="config[fulltext_native_load_upd]" value="1"' . (($config['fulltext_native_load_upd']) ? ' checked="checked"' : '') . ' class="radio" /> ' . $user->lang['YES'] . ' <input type="radio" name="config[fulltext_native_load_upd]" value="0"' . ((!$config['fulltext_native_load_upd']) ? ' checked="checked"' : '') . ' class="radio" /> ' . $user->lang['NO'] . '</dd> 1545 </dl> 1546 <dl> 1547 <dt><label for="fulltext_native_min_chars">' . $user->lang['MIN_SEARCH_CHARS'] . ':</label><br /><span>' . $user->lang['MIN_SEARCH_CHARS_EXPLAIN'] . '</span></dt> 1548 <dd><input id="fulltext_native_min_chars" type="text" size="3" maxlength="3" name="config[fulltext_native_min_chars]" value="' . (int) $config['fulltext_native_min_chars'] . '" /></dd> 1549 </dl> 1550 <dl> 1551 <dt><label for="fulltext_native_max_chars">' . $user->lang['MAX_SEARCH_CHARS'] . ':</label><br /><span>' . $user->lang['MAX_SEARCH_CHARS_EXPLAIN'] . '</span></dt> 1552 <dd><input id="fulltext_native_max_chars" type="text" size="3" maxlength="3" name="config[fulltext_native_max_chars]" value="' . (int) $config['fulltext_native_max_chars'] . '" /></dd> 1553 </dl> 1554 '; 1555 1556 // These are fields required in the config table 1557 return array( 1558 'tpl' => $tpl, 1559 'config' => array('fulltext_native_load_upd' => 'bool', 'fulltext_native_min_chars' => 'integer:0:255', 'fulltext_native_max_chars' => 'integer:0:255') 1560 ); 1561 } 1562 } 1563 1564 ?>
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Wed Nov 22 00:35:05 2006 | Cross-referenced by PHPXref 0.6 |