[ Index ]

PHP Cross Reference of phpBB 3.0 Beta 3

title

Body

[close]

/includes/search/ -> fulltext_native.php (source)

   1  <?php
   2  /** 
   3  *
   4  * @package search
   5  * @version $Id: fulltext_native.php,v 1.35 2006/11/10 14:55:49 acydburn Exp $
   6  * @copyright (c) 2005 phpBB Group 
   7  * @license http://opensource.org/licenses/gpl-license.php GNU Public License 
   8  *
   9  */
  10  
  11  /**
  12  */
  13  if (!defined('IN_PHPBB'))
  14  {
  15      exit;
  16  }
  17  
  18  /**
  19  * @ignore
  20  */
  21  include_once($phpbb_root_path . 'includes/search/search.' . $phpEx);
  22  
  23  /**
  24  * fulltext_native
  25  * phpBB's own db driven fulltext search, version 2
  26  * @package search
  27  */
  28  class fulltext_native extends search_backend
  29  {
  30      var $stats = array();
  31      var $word_length = array();
  32      var $search_query;
  33      var $common_words = array();
  34  
  35      var $must_contain_ids = array();
  36      var $must_not_contain_ids = array();
  37      var $must_exclude_one_ids = array();
  38  
  39      /**
  40      * Initialises the fulltext_native search backend with min/max word length and makes sure the UTF-8 normalizer is loaded.
  41      *
  42      * @param    boolean|string    $error    is passed by reference and should either be set to false on success or an error message on failure.
  43      *
  44      * @access    public
  45      */
  46  	function fulltext_native(&$error)
  47      {
  48          global $phpbb_root_path, $phpEx, $config;
  49  
  50          $this->word_length = array('min' => $config['fulltext_native_min_chars'], 'max' => $config['fulltext_native_max_chars']);
  51  
  52          /**
  53          * Load the UTF tools
  54          */
  55          if (!class_exists('utf_normalizer'))
  56          {
  57              include($phpbb_root_path . 'includes/utf/utf_normalizer.' . $phpEx);
  58          }
  59  
  60  
  61          $error = false;
  62      }
  63  
  64      /**
  65      * This function fills $this->search_query with the cleaned user search query.
  66      *
  67      * If $terms is 'any' then the words will be extracted from the search query
  68      * and combined with | inside brackets. They will afterwards be treated like
  69      * an standard search query.
  70      *
  71      * Then it analyses the query and fills the internal arrays $must_not_contain_ids,
  72      * $must_contain_ids and $must_exclude_one_ids which are later used by keyword_search().
  73      *
  74      * @param    string    $keywords    contains the search query string as entered by the user
  75      * @param    string    $terms        is either 'all' (use search query as entered, default words to 'must be contained in post')
  76      *     or 'any' (find all posts containing at least one of the given words)
  77      * @return    boolean                false if no valid keywords were found and otherwise true
  78      *
  79      * @access    public
  80      */
  81  	function split_keywords($keywords, $terms)
  82      {
  83          global $db, $config, $user;
  84  
  85          $keywords = trim($this->cleanup($keywords, '+-|()*'));
  86  
  87          // allow word|word|word without brackets
  88          if ((strpos($keywords, ' ') === false) && (strpos($keywords, '|') !== false) && (strpos($keywords, '(') === false))
  89          {
  90              $keywords = '(' . $keywords . ')';
  91          }
  92  
  93          $open_bracket = $space = false;
  94          for ($i = 0, $n = strlen($keywords); $i < $n; $i++)
  95          {
  96              if ($open_bracket !== false)
  97              {
  98                  switch ($keywords[$i])
  99                  {
 100                      case ')':
 101                          if ($open_bracket + 1 == $i)
 102                          {
 103                              $keywords[$i - 1] = '|';
 104                              $keywords[$i] = '|';
 105                          }
 106                          $open_bracket = false;
 107                      break;
 108                      case '(':
 109                          $keywords[$i] = '|';
 110                      break;
 111                      case '+':
 112                      case '-':
 113                      case ' ':
 114                          $keywords[$i] = '|';
 115                      break;
 116                  }
 117              }
 118              else
 119              {
 120                  switch ($keywords[$i])
 121                  {
 122                      case ')':
 123                          $keywords[$i] = ' ';
 124                      break;
 125                      case '(':
 126                          $open_bracket = $i;
 127                      break;
 128                      case '|':
 129                          $keywords[$i] = ' ';
 130                      break;
 131                      case '-':
 132                      case '+':
 133                          $space = $keywords[$i];
 134                      break;
 135                      case ' ':
 136                          if ($space !== false)
 137                          {
 138                              $keywords[$i] = $space;
 139                          }
 140                      break;
 141                      default:
 142                          $space = false;
 143                  }
 144              }
 145          }
 146  
 147          if ($open_bracket)
 148          {
 149              $keywords .= ')';
 150          }
 151  
 152          $match = array(
 153              '#  +#',
 154              '#\|\|+#',
 155              '#(\+|\-)(?:\+|\-)+#',
 156              '#\(\|#',
 157              '#\|\)#',
 158          );
 159          $replace = array(
 160              ' ',
 161              '|',
 162              '$1',
 163              '(',
 164              ')',
 165          );
 166  
 167          $keywords = preg_replace($match, $replace, $keywords);
 168  
 169          // $keywords input format: each word seperated by a space, words in a bracket are not seperated
 170  
 171          // the user wants to search for any word, convert the search query
 172          if ($terms == 'any')
 173          {
 174              $words = array();
 175  
 176              preg_match_all('#([^\\s+\\-|()]+)(?:$|[\\s+\\-|()])#', $keywords, $words);
 177              if (sizeof($words[1]))
 178              {
 179                  $keywords = '(' . implode('|', $words[1]) . ')';
 180              }
 181          }
 182  
 183          // set the search_query which is shown to the user
 184          $this->search_query = $keywords;
 185  
 186          $exact_words = array();
 187          preg_match_all('#([^\\s+\\-|*()]+)(?:$|[\\s+\\-|()])#', $keywords, $exact_words);
 188          $exact_words = $exact_words[1];
 189  
 190          if (sizeof($exact_words))
 191          {
 192              $sql = 'SELECT word_id, word_text, word_common
 193                  FROM ' . SEARCH_WORDLIST_TABLE . '
 194                  WHERE ' . $db->sql_in_set('word_text', $exact_words);
 195              $result = $db->sql_query($sql);
 196      
 197              // store an array of words and ids, remove common words
 198              while ($row = $db->sql_fetchrow($result))
 199              {
 200                  if ($row['word_common'])
 201                  {
 202                      $this->common_words[] = $row['word_text'];
 203                      continue;
 204                  }
 205  
 206                  $words[$row['word_text']] = (int) $row['word_id'];
 207              }
 208              $db->sql_freeresult($result);
 209          }
 210          unset($exact_words);
 211  
 212          // now analyse the search query, first split it using the spaces
 213          $query = explode(' ', $keywords);
 214  
 215          $this->must_contain_ids = array();
 216          $this->must_not_contain_ids = array();
 217          $this->must_exclude_one_ids = array();
 218  
 219          $mode = '';
 220          $ignore_no_id = true;
 221  
 222          foreach ($query as $word)
 223          {
 224              if (empty($word))
 225              {
 226                  continue;
 227              }
 228  
 229              // words which should not be included
 230              if ($word[0] == '-')
 231              {
 232                  $word = substr($word, 1);
 233  
 234                  // a group of which at least one may not be in the resulting posts
 235                  if ($word[0] == '(')
 236                  {
 237                      $word = array_unique(explode('|', substr($word, 1, -1)));
 238                      $mode = 'must_exclude_one';
 239                  }
 240                  // one word which should not be in the resulting posts
 241                  else
 242                  {
 243                      $mode = 'must_not_contain';
 244                  }
 245                  $ignore_no_id = true;
 246              }
 247              // words which have to be included
 248              else
 249              {
 250                  // no prefix is the same as a +prefix
 251                  if ($word[0] == '+')
 252                  {
 253                      $word = substr($word, 1);
 254                  }
 255  
 256                  // a group of words of which at least one word should be in every resulting post
 257                  if ($word[0] == '(')
 258                  {
 259                      $word = array_unique(explode('|', substr($word, 1, -1)));
 260                  }
 261                  $ignore_no_id = false;
 262                  $mode = 'must_contain';
 263              }
 264  
 265              if (empty($word))
 266              {
 267                  continue;
 268              }
 269  
 270              // if this is an array of words then retrieve an id for each
 271              if (is_array($word))
 272              {
 273                  $id_words = array();
 274                  foreach ($word as $i => $word_part)
 275                  {
 276                      if (strpos($word_part, '*') !== false)
 277                      {
 278                          $id_words[] = '\'' . $db->sql_escape(str_replace('*', '%', $word_part)) . '\'';
 279                      }
 280                      if (isset($words[$word_part]))
 281                      {
 282                          $id_words[] = $words[$word_part];
 283                      }
 284                  }
 285                  if (sizeof($id_words))
 286                  {
 287                      sort($id_words);
 288                      if (sizeof($id_words) > 1)
 289                      {
 290                          $this->{$mode . '_ids'}[] = $id_words;
 291                      }
 292                      else
 293                      {
 294                          $mode = ($mode == 'must_exclude_one') ? 'must_not_contain' : $mode;
 295                          $this->{$mode . '_ids'}[] = $id_words[0];
 296                      }
 297                  }
 298                  // throw an error if we shall not ignore unexistant words
 299                  else if (!$ignore_no_id)
 300                  {
 301                      trigger_error(sprintf($user->lang['WORDS_IN_NO_POST'], implode(', ', $word)));
 302                  }
 303              }
 304              // else we only need one id
 305              else if (($wildcard = strpos($word, '*') !== false) || isset($words[$word]))
 306              {
 307                  if ($wildcard)
 308                  {
 309                      $this->{$mode . '_ids'}[] = '\'' . $db->sql_escape(str_replace('*', '%', $word)) . '\'';
 310                  }
 311                  else
 312                  {
 313                      $this->{$mode . '_ids'}[] = $words[$word];
 314                  }
 315              }
 316              // throw an error if we shall not ignore unexistant words
 317              else if (!$ignore_no_id)
 318              {
 319                  trigger_error(sprintf($user->lang['WORD_IN_NO_POST'], $word));
 320              }
 321          }
 322  
 323          // we can't search for negatives only
 324          if (!sizeof($this->must_contain_ids))
 325          {
 326              return false;
 327          }
 328  
 329          sort($this->must_contain_ids);
 330          sort($this->must_not_contain_ids);
 331          sort($this->must_exclude_one_ids);
 332  
 333          if (!empty($this->search_query))
 334          {
 335              return true;
 336          }
 337          return false;
 338      }
 339  
 340      /**
 341      * Performs a search on keywords depending on display specific params. You have to run split_keywords() first.
 342      *
 343      * @param    string        $type                contains either posts or topics depending on what should be searched for
 344      * @param    string        $fields                contains either titleonly (topic titles should be searched), msgonly (only message bodies should be searched), firstpost (only subject and body of the first post should be searched) or all (all post bodies and subjects should be searched)
 345      * @param    string        $terms                is either 'all' (use query as entered, words without prefix should default to "have to be in field") or 'any' (ignore search query parts and just return all posts that contain any of the specified words)
 346      * @param    array        $sort_by_sql        contains SQL code for the ORDER BY part of a query
 347      * @param    string        $sort_key            is the key of $sort_by_sql for the selected sorting
 348      * @param    string        $sort_dir            is either a or d representing ASC and DESC
 349      * @param    string        $sort_days            specifies the maximum amount of days a post may be old
 350      * @param    array        $ex_fid_ary            specifies an array of forum ids which should not be searched
 351      * @param    array        $m_approve_fid_ary    specifies an array of forum ids in which the searcher is allowed to view unapproved posts
 352      * @param    int            $topic_id            is set to 0 or a topic id, if it is not 0 then only posts in this topic should be searched
 353      * @param    array        $author_ary            an array of author ids if the author should be ignored during the search the array is empty
 354      * @param    array        $id_ary                passed by reference, to be filled with ids for the page specified by $start and $per_page, should be ordered
 355      * @param    int            $start                indicates the first index of the page
 356      * @param    int            $per_page            number of ids each page is supposed to contain
 357      * @return    boolean|int                        total number of results
 358      *
 359      * @access    public
 360      */
 361  	function keyword_search($type, &$fields, &$terms, &$sort_by_sql, &$sort_key, &$sort_dir, &$sort_days, &$ex_fid_ary, &$m_approve_fid_ary, &$topic_id, &$author_ary, &$id_ary, $start, $per_page)
 362      {
 363          global $config, $db;
 364  
 365          // No keywords? No posts.
 366          if (empty($this->search_query))
 367          {
 368              return false;
 369          }
 370  
 371          // generate a search_key from all the options to identify the results
 372          $search_key = md5(implode('#', array(
 373              serialize($this->must_contain_ids),
 374              serialize($this->must_not_contain_ids),
 375              serialize($this->must_exclude_one_ids),
 376              $type,
 377              $fields,
 378              $terms,
 379              $sort_days,
 380              $sort_key,
 381              $topic_id,
 382              implode(',', $ex_fid_ary),
 383              implode(',', $m_approve_fid_ary),
 384              implode(',', $author_ary)
 385          )));
 386  
 387          // try reading the results from cache
 388          $total_results = 0;
 389          if ($this->obtain_ids($search_key, $total_results, $id_ary, $start, $per_page, $sort_dir) == SEARCH_RESULT_IN_CACHE)
 390          {
 391              return $total_results;
 392          }
 393  
 394          $id_ary = array();
 395  
 396          $sql_where = array();
 397          $group_by = false;
 398          $m_num = 0;
 399          $w_num = 0;
 400  
 401          $sql_array = array(
 402              'SELECT'    => ($type == 'posts') ? 'p.post_id' : 'p.topic_id',
 403              'FROM'        => array(
 404                  SEARCH_WORDMATCH_TABLE    => array(),
 405                  SEARCH_WORDLIST_TABLE    => array(),
 406                  POSTS_TABLE                => 'p'
 407              ),
 408              'LEFT_JOIN'    => array()
 409          );
 410          $sql_where[] = 'm0.post_id = p.post_id';
 411  
 412          $title_match = '';
 413          $group_by = true;
 414          // Build some display specific sql strings
 415          switch ($fields)
 416          {
 417              case 'titleonly':
 418                  $title_match = 'title_match = 1';
 419                  $group_by = false;
 420              // no break
 421              case 'firstpost':
 422                  $sql_array['FROM'][TOPICS_TABLE] = 't';
 423                  $sql_where[] = 'p.post_id = t.topic_first_post_id';
 424              break;
 425  
 426              case 'msgonly':
 427                  $title_match = 'title_match = 0';
 428                  $group_by = false;
 429              break;
 430          }
 431  
 432          if ($type == 'topics')
 433          {
 434              if (!isset($sql_array['FROM'][TOPICS_TABLE]))
 435              {
 436                  $sql_array['FROM'][TOPICS_TABLE] = 't';
 437                  $sql_where[] = 'p.topic_id = t.topic_id';
 438              }
 439              $group_by = true;
 440          }
 441  
 442          /**
 443          * @todo Add a query optimizer (handle stuff like "+(4|3) +4")
 444          */
 445  
 446          foreach ($this->must_contain_ids as $subquery)
 447          {
 448              if (is_array($subquery))
 449              {
 450                  $group_by = true;
 451  
 452                  $word_id_sql = array();
 453                  $word_ids = array();
 454                  foreach ($subquery as $id)
 455                  {
 456                      if (is_string($id))
 457                      {
 458                          $sql_array['LEFT_JOIN'][] = array(
 459                              'FROM'    => array(SEARCH_WORDLIST_TABLE => 'w' . $w_num),
 460                              'ON'    => "w$w_num.word_text LIKE $id"
 461                          );
 462                          $word_ids[] = "w$w_num.word_id";
 463          
 464                          $w_num++;
 465                      }
 466                      else
 467                      {
 468                          $word_ids[] = $id;
 469                      }
 470                  }
 471  
 472                  $sql_where[] = $db->sql_in_set("m$m_num.word_id", $word_ids);
 473  
 474                  unset($word_id_sql);
 475                  unset($word_ids);
 476              }
 477              else if (is_string($subquery))
 478              {
 479                  $sql_array['FROM'][SEARCH_WORDLIST_TABLE][] = 'w' . $w_num;
 480  
 481                  $sql_where[] = "w$w_num.word_text LIKE $subquery";
 482                  $sql_where[] = "m$m_num.word_id = w$w_num.word_id";
 483  
 484                  $group_by = true;
 485                  $w_num++;
 486              }
 487              else
 488              {
 489                  $sql_where[] = "m$m_num.word_id = $subquery";
 490              }
 491      
 492              $sql_array['FROM'][SEARCH_WORDMATCH_TABLE][] = 'm' . $m_num;
 493  
 494              if ($title_match)
 495              {
 496                  $sql_where[] = "m$m_num.$title_match";
 497              }
 498  
 499              if ($m_num != 0)
 500              {
 501                  $sql_where[] = "m$m_num.post_id = m0.post_id";
 502              }
 503              $m_num++;
 504          }
 505  
 506          foreach ($this->must_not_contain_ids as $key => $subquery)
 507          {
 508              if (is_string($subquery))
 509              {
 510                  $sql_array['LEFT_JOIN'][] = array(
 511                      'FROM'    => array(SEARCH_WORDLIST_TABLE => 'w' . $w_num),
 512                      'ON'    => "w$w_num.word_text LIKE $subquery"
 513                  );
 514  
 515                  $this->must_not_contain_ids[$key] = "w$w_num.word_id";
 516  
 517                  $group_by = true;
 518                  $w_num++;
 519              }
 520          }
 521  
 522          if (sizeof($this->must_not_contain_ids))
 523          {
 524              $sql_array['LEFT_JOIN'][] = array(
 525                  'FROM'    => array(SEARCH_WORDMATCH_TABLE => 'm' . $m_num),
 526                  'ON'    => $db->sql_in_set("m$m_num.word_id", $this->must_not_contain_ids) . (($title_match) ? " AND m$m_num.$title_match" : '') . " AND m$m_num.post_id = m0.post_id"
 527              );
 528  
 529              $sql_where[] = "m$m_num.word_id IS NULL";
 530              $m_num++;
 531          }
 532  
 533          foreach ($this->must_exclude_one_ids as $ids)
 534          {
 535              $is_null_joins = array();
 536              foreach ($ids as $id)
 537              {
 538                  if (is_string($id))
 539                  {
 540                      $sql_array['LEFT_JOIN'][] = array(
 541                          'FROM'    => array(SEARCH_WORDLIST_TABLE => 'w' . $w_num),
 542                          'ON'    => "w$w_num.word_text LIKE $id"
 543                      );
 544                      $id = "w$w_num.word_id";
 545  
 546                      $group_by = true;
 547                      $w_num++;
 548                  }
 549  
 550                  $sql_array['LEFT_JOIN'][] = array(
 551                      'FROM'    => array(SEARCH_WORDMATCH_TABLE => 'm' . $m_num),
 552                      'ON'    => "m$m_num.word_id = $id AND m$m_num.post_id = m0.post_id" . (($title_match) ? " AND m$m_num.$title_match" : '')
 553                  );
 554                  $is_null_joins[] = "m$m_num.word_id IS NULL";
 555  
 556                  $m_num++;
 557              }
 558              $sql_where[] = '(' . implode(' OR ', $is_null_joins) . ')';
 559          }
 560  
 561          if (!sizeof($m_approve_fid_ary))
 562          {
 563              $sql_where[] = 'p.post_approved = 1';
 564          }
 565          else if ($m_approve_fid_ary !== array(-1))
 566          {
 567              $sql_where[] = '(p.post_approved = 1 OR ' . $db->sql_in_set('p.forum_id', $m_approve_fid_ary, true) . ')';
 568          }
 569  
 570          if ($topic_id)
 571          {
 572              $sql_where[] = 'p.topic_id = ' . $topic_id;
 573          }
 574  
 575          if (sizeof($author_ary))
 576          {
 577              $sql_where[] = $db->sql_in_set('p.poster_id', $author_ary);
 578          }
 579  
 580          if (sizeof($ex_fid_ary))
 581          {
 582              $sql_where[] = $db->sql_in_set('p.forum_id', $ex_fid_ary, true);
 583          }
 584  
 585          if ($sort_days)
 586          {
 587              $sql_where[] = 'p.post_time >= ' . (time() - ($sort_days * 86400));
 588          }
 589  
 590          $sql_array['WHERE'] = implode(' AND ', $sql_where);
 591  
 592          $is_mysql = false;
 593          // if the total result count is not cached yet, retrieve it from the db
 594          if (!$total_results)
 595          {
 596              $sql = '';
 597              $sql_array_count = $sql_array;
 598  
 599              switch ($db->sql_layer)
 600              {
 601                  case 'mysql4':
 602                  case 'mysqli':
 603  
 604                      // 3.x does not support SQL_CALC_FOUND_ROWS
 605                      $sql_array['SELECT'] = 'SQL_CALC_FOUND_ROWS ' . $sql_array['SELECT'];
 606                      $is_mysql = true;
 607  
 608                  break;
 609  
 610                  case 'sqlite':
 611                      $sql_array_count['SELECT'] = ($type == 'posts') ? 'DISTINCT p.post_id' : 'DISTINCT p.topic_id';
 612                      $sql = 'SELECT COUNT(' . (($type == 'posts') ? 'post_id' : 'topic_id') . ') as total_results
 613                              FROM (' . $db->sql_build_query('SELECT', $sql_array_count) . ')';
 614  
 615                  // no break
 616  
 617                  default:
 618                      $sql_array_count['SELECT'] = ($type == 'posts') ? 'COUNT(DISTINCT p.post_id) AS total_results' : 'COUNT(DISTINCT p.topic_id) AS total_results';
 619                      $sql = (!$sql) ? $db->sql_build_query('SELECT', $sql_array_count) : $sql;
 620          
 621                      $result = $db->sql_query($sql);
 622                      $total_results = (int) $db->sql_fetchfield('total_results');
 623                      $db->sql_freeresult($result);
 624          
 625                      if (!$total_results)
 626                      {
 627                          return false;
 628                      }
 629                  break;
 630              }
 631  
 632              unset($sql_array_count, $sql);
 633          }
 634  
 635          // Build sql strings for sorting
 636          $sql_sort = $sort_by_sql[$sort_key] . (($sort_dir == 'a') ? ' ASC' : ' DESC');
 637  
 638          switch ($sql_sort[0])
 639          {
 640              case 'u':
 641                  $sql_array['FROM'][USERS_TABLE] = 'u';
 642                  $sql_where[] = 'u.user_id = p.poster_id ';
 643              break;
 644  
 645              case 't':
 646                  if (!isset($sql_array['FROM'][TOPICS_TABLE]))
 647                  {
 648                      $sql_array['FROM'][TOPICS_TABLE] = 't';
 649                      $sql_where[] = 'p.topic_id = t.topic_id';
 650                  }
 651              break;
 652  
 653              case 'f':
 654                  $sql_array['FROM'][FORUMS_TABLE] = 'f';
 655                  $sql_where[] = 'f.forum_id = p.forum_id';
 656              break;
 657          }
 658  
 659          $sql_array['WHERE'] = implode(' AND ', $sql_where);
 660          $sql_array['GROUP_BY'] = ($group_by) ? (($type == 'posts') ? 'p.post_id' : 'p.topic_id') . ', ' . $sort_by_sql[$sort_key] : '';
 661          $sql_array['ORDER_BY'] = $sql_sort;
 662  
 663          unset($sql_where, $sql_sort, $group_by);
 664  
 665          $sql = $db->sql_build_query('SELECT', $sql_array);
 666          $result = $db->sql_query_limit($sql, $config['search_block_size'], $start);
 667  
 668          while ($row = $db->sql_fetchrow($result))
 669          {
 670              $id_ary[] = $row[(($type == 'posts') ? 'post_id' : 'topic_id')];
 671          }
 672          $db->sql_freeresult($result);
 673  
 674          if (!sizeof($id_ary))
 675          {
 676              return false;
 677          }
 678  
 679          // if we use mysql and the total result count is not cached yet, retrieve it from the db
 680          if (!$total_results && $is_mysql)
 681          {
 682              $sql = 'SELECT FOUND_ROWS() as total_results';
 683              $result = $db->sql_query($sql);
 684              $total_results = (int) $db->sql_fetchfield('total_results');
 685              $db->sql_freeresult($result);
 686  
 687              if (!$total_results)
 688              {
 689                  return false;
 690              }
 691          }
 692  
 693          // store the ids, from start on then delete anything that isn't on the current page because we only need ids for one page
 694          $this->save_ids($search_key, $this->search_query, $author_ary, $total_results, $id_ary, $start, $sort_dir);
 695          $id_ary = array_slice($id_ary, 0, (int) $per_page);
 696  
 697          return $total_results;
 698      }
 699  
 700      /**
 701      * Performs a search on an author's posts without caring about message contents. Depends on display specific params
 702      *
 703      * @param    string        $type                contains either posts or topics depending on what should be searched for
 704      * @param    array        $sort_by_sql        contains SQL code for the ORDER BY part of a query
 705      * @param    string        $sort_key            is the key of $sort_by_sql for the selected sorting
 706      * @param    string        $sort_dir            is either a or d representing ASC and DESC
 707      * @param    string        $sort_days            specifies the maximum amount of days a post may be old
 708      * @param    array        $ex_fid_ary            specifies an array of forum ids which should not be searched
 709      * @param    array        $m_approve_fid_ary    specifies an array of forum ids in which the searcher is allowed to view unapproved posts
 710      * @param    int            $topic_id            is set to 0 or a topic id, if it is not 0 then only posts in this topic should be searched
 711      * @param    array        $author_ary            an array of author ids
 712      * @param    array        $id_ary                passed by reference, to be filled with ids for the page specified by $start and $per_page, should be ordered
 713      * @param    int            $start                indicates the first index of the page
 714      * @param    int            $per_page            number of ids each page is supposed to contain
 715      * @return    boolean|int                        total number of results
 716      *
 717      * @access    public
 718      */
 719  	function author_search($type, &$sort_by_sql, &$sort_key, &$sort_dir, &$sort_days, &$ex_fid_ary, &$m_approve_fid_ary, &$topic_id, &$author_ary, &$id_ary, $start, $per_page)
 720      {
 721          global $config, $db;
 722  
 723          // No author? No posts.
 724          if (!sizeof($author_ary))
 725          {
 726              return 0;
 727          }
 728  
 729          // generate a search_key from all the options to identify the results
 730          $search_key = md5(implode('#', array(
 731              '',
 732              $type,
 733              '',
 734              '',
 735              $sort_days,
 736              $sort_key,
 737              $topic_id,
 738              implode(',', $ex_fid_ary),
 739              implode(',', $m_approve_fid_ary),
 740              implode(',', $author_ary)
 741          )));
 742  
 743          // try reading the results from cache
 744          $total_results = 0;
 745          if ($this->obtain_ids($search_key, $total_results, $id_ary, $start, $per_page, $sort_dir) == SEARCH_RESULT_IN_CACHE)
 746          {
 747              return $total_results;
 748          }
 749  
 750          $id_ary = array();
 751  
 752          // Create some display specific sql strings
 753          $sql_author        = $db->sql_in_set('p.poster_id', $author_ary);
 754          $sql_fora        = (sizeof($ex_fid_ary)) ? ' AND ' . $db->sql_in_set('p.forum_id', $ex_fid_ary, true) : '';
 755          $sql_time        = ($sort_days) ? ' AND p.post_time >= ' . (time() - ($sort_days * 86400)) : '';
 756          $sql_topic_id    = ($topic_id) ? ' AND p.topic_id = ' . (int) $topic_id : '';
 757  
 758          // Build sql strings for sorting
 759          $sql_sort = $sort_by_sql[$sort_key] . (($sort_dir == 'a') ? ' ASC' : ' DESC');
 760          $sql_sort_table = $sql_sort_join = '';
 761          switch ($sql_sort[0])
 762          {
 763              case 'u':
 764                  $sql_sort_table    = USERS_TABLE . ' u, ';
 765                  $sql_sort_join    = ' AND u.user_id = p.poster_id ';
 766              break;
 767  
 768              case 't':
 769                  $sql_sort_table    = ($type == 'posts') ? TOPICS_TABLE . ' t, ' : '';
 770                  $sql_sort_join    = ($type == 'posts') ? ' AND t.topic_id = p.topic_id ' : '';
 771              break;
 772  
 773              case 'f':
 774                  $sql_sort_table    = FORUMS_TABLE . ' f, ';
 775                  $sql_sort_join    = ' AND f.forum_id = p.forum_id ';
 776              break;
 777          }
 778  
 779          if (!sizeof($m_approve_fid_ary))
 780          {
 781              $m_approve_fid_sql = ' AND p.post_approved = 1';
 782          }
 783          else if ($m_approve_fid_ary == array(-1))
 784          {
 785              $m_approve_fid_sql = '';
 786          }
 787          else
 788          {
 789              $m_approve_fid_sql = ' AND (p.post_approved = 1 OR ' . $db->sql_in_set('p.forum_id', $m_approve_fid_ary, true) . ')';
 790          }
 791  
 792          $select = ($type == 'posts') ? 'p.post_id' : 't.topic_id';
 793          $is_mysql = false;
 794  
 795          // If the cache was completely empty count the results
 796          if (!$total_results)
 797          {
 798              switch ($db->sql_layer)
 799              {
 800                  case 'mysql4':
 801                  case 'mysqli':
 802                      $select = 'SQL_CALC_FOUND_ROWS ' . $select;
 803                      $is_mysql = true;
 804                  break;
 805  
 806                  default:
 807                      if ($type == 'posts')
 808                      {
 809                          $sql = 'SELECT COUNT(p.post_id) as total_results
 810                              FROM ' . POSTS_TABLE . " p
 811                              WHERE $sql_author
 812                                  $sql_topic_id
 813                                  $m_approve_fid_sql
 814                                  $sql_fora
 815                                  $sql_time";
 816                      }
 817                      else
 818                      {
 819                          if ($db->sql_layer == 'sqlite')
 820                          {
 821                              $sql = 'SELECT COUNT(topic_id) as total_results
 822                                  FROM (SELECT DISTINCT t.topic_id';
 823                          }
 824                          else
 825                          {
 826                              $sql = 'SELECT COUNT(DISTINCT t.topic_id) as total_results';
 827                          }
 828  
 829                          $sql .= ' FROM ' . TOPICS_TABLE . ' t, ' . POSTS_TABLE . " p
 830                              WHERE $sql_author
 831                                  $sql_topic_id
 832                                  $m_approve_fid_sql
 833                                  $sql_fora
 834                                  AND t.topic_id = p.topic_id
 835                                  $sql_time" . (($db->sql_layer == 'sqlite') ? ')' : '');
 836                      }
 837                      $result = $db->sql_query($sql);
 838          
 839                      $total_results = (int) $db->sql_fetchfield('total_results');
 840                      $db->sql_freeresult($result);
 841          
 842                      if (!$total_results)
 843                      {
 844                          return false;
 845                      }
 846                  break;
 847              }
 848          }
 849  
 850          // Build the query for really selecting the post_ids
 851          if ($type == 'posts')
 852          {
 853              $sql = "SELECT $select
 854                  FROM " . $sql_sort_table . POSTS_TABLE . ' p' . (($topic_id) ? ', ' . TOPICS_TABLE . ' t' : '') . "
 855                  WHERE $sql_author
 856                      $sql_topic_id
 857                      $m_approve_fid_sql
 858                      $sql_fora
 859                      $sql_sort_join
 860                      $sql_time
 861                  ORDER BY $sql_sort";
 862              $field = 'post_id';
 863          }
 864          else
 865          {
 866              $sql = "SELECT $select
 867                  FROM " . $sql_sort_table . TOPICS_TABLE . ' t, ' . POSTS_TABLE . " p
 868                  WHERE $sql_author
 869                      $sql_topic_id
 870                      $m_approve_fid_sql
 871                      $sql_fora
 872                      AND t.topic_id = p.topic_id
 873                      $sql_sort_join
 874                      $sql_time
 875                  GROUP BY t.topic_id, " . $sort_by_sql[$sort_key] . '
 876                  ORDER BY ' . $sql_sort;
 877              $field = 'topic_id';
 878          }
 879  
 880          // Only read one block of posts from the db and then cache it
 881          $result = $db->sql_query_limit($sql, $config['search_block_size'], $start);
 882  
 883          while ($row = $db->sql_fetchrow($result))
 884          {
 885              $id_ary[] = $row[$field];
 886          }
 887          $db->sql_freeresult($result);
 888  
 889          if (!$total_results && $is_mysql)
 890          {
 891              $sql = 'SELECT FOUND_ROWS() as total_results';
 892              $result = $db->sql_query($sql);
 893              $total_results = (int) $db->sql_fetchfield('total_results');
 894              $db->sql_freeresult($result);
 895  
 896              if (!$total_results)
 897              {
 898                  return false;
 899              }
 900          }
 901  
 902          if (sizeof($id_ary))
 903          {
 904              $this->save_ids($search_key, '', $author_ary, $total_results, $id_ary, $start, $sort_dir);
 905              $id_ary = array_slice($id_ary, 0, $per_page);
 906  
 907              return $total_results;
 908          }
 909          return false;
 910      }
 911  
 912      /**
 913      * Split a text into words of a given length
 914      *
 915      * The text is converted to UTF-8, cleaned up, and split. Then, words that
 916      * conform to the defined length range are returned in an array.
 917      *
 918      * NOTE: duplicates are NOT removed from the return array
 919      *
 920      * @param    string    $text    Text to split, encoded in UTF-8
 921      * @return    array            Array of UTF-8 words
 922      *
 923      * @access    private
 924      */
 925  	function split_message($text)
 926      {
 927          global $phpbb_root_path, $phpEx;
 928          global $config, $user;
 929  
 930          $match = $words = array();
 931  
 932          /**
 933          * Taken from the original code
 934          */
 935          // Do not index code
 936          $match[] = '#\[code(?:=.*?)?(\:?[0-9a-z]{5,})\].*?\[\/code(\:?[0-9a-z]{5,})\]#is';
 937          // BBcode
 938          $match[] = '#\[\/?[a-z0-9\*\+\-]+(?:=.*?)?(?::[a-z])?(\:?[0-9a-z]{5,})\]#';
 939  
 940          $min = $config['fulltext_native_min_chars'];
 941          $max = $config['fulltext_native_max_chars'];
 942  
 943          $isset_min = $min - 1;
 944  
 945          /**
 946          * Clean up the string, remove HTML tags, remove BBCodes
 947          */
 948          $word = strtok($this->cleanup(preg_replace($match, ' ', strip_tags($text)), -1), ' ');
 949  
 950          while (isset($word[0]))
 951          {
 952              if (isset($word[255])
 953               || !isset($word[$isset_min]))
 954              {
 955                  /**
 956                  * Words longer than 255 bytes are ignored. This will have to be
 957                  * changed whenever we change the length of search_wordlist.word_text
 958                  *
 959                  * Words shorter than $isset_min bytes are ignored, too
 960                  */
 961                  $word = strtok(' ');
 962                  continue;
 963              }
 964  
 965              $len = utf8_strlen($word);
 966  
 967              /**
 968              * Test whether the word is too short to be indexed.
 969              *
 970              * Note that this limit does NOT apply to CJK and Hangul
 971              */
 972              if ($len < $min)
 973              {
 974                  /**
 975                  * Note: this could be optimized. If the codepoint is lower than Hangul's range
 976                  * we know that it will also be lower than CJK ranges
 977                  */
 978                  if ((strncmp($word, UTF8_HANGUL_FIRST, 3) < 0 || strncmp($word, UTF8_HANGUL_LAST, 3) > 0)
 979                   && (strncmp($word, UTF8_CJK_FIRST, 3) < 0 || strncmp($word, UTF8_CJK_LAST, 3) > 0)
 980                   && (strncmp($word, UTF8_CJK_B_FIRST, 4) < 0 || strncmp($word, UTF8_CJK_B_LAST, 4) > 0))
 981                  {
 982                      $word = strtok(' ');
 983                      continue;
 984                  }
 985              }
 986  
 987              $words[] = $word;
 988              $word = strtok(' ');
 989          }
 990  
 991          return $words;
 992      }
 993  
 994      /**
 995      * Updates wordlist and wordmatch tables when a message is posted or changed
 996      *
 997      * @param    string    $mode        Contains the post mode: edit, post, reply, quote
 998      * @param    int        $post_id    The id of the post which is modified/created
 999      * @param    string    $message    New or updated post content
1000      * @param    string    $subject    New or updated post subject
1001      * @param    int        $poster_id    Post author's user id
1002      * @param    int        $forum_id    The id of the forum in which the post is located
1003      *
1004      * @access    public
1005      */
1006  	function index($mode, $post_id, &$message, &$subject, $poster_id, $forum_id)
1007      {
1008          global $config, $db, $user;
1009  
1010          if (!$config['fulltext_native_load_upd'])
1011          {
1012              /**
1013              * The search indexer is disabled, return
1014              */
1015              return;
1016          }
1017  
1018          // Split old and new post/subject to obtain array of 'words'
1019          $split_text = $this->split_message($message);
1020          $split_title = $this->split_message($subject);
1021  
1022          $cur_words = array('post' => array(), 'title' => array());
1023  
1024          $words = array();
1025          if ($mode == 'edit')
1026          {
1027              $words['add']['post'] = array();
1028              $words['add']['title'] = array();
1029              $words['del']['post'] = array();
1030              $words['del']['title'] = array();
1031  
1032              $sql = 'SELECT w.word_id, w.word_text, m.title_match
1033                  FROM ' . SEARCH_WORDLIST_TABLE . ' w, ' . SEARCH_WORDMATCH_TABLE . " m
1034                  WHERE m.post_id = $post_id
1035                      AND w.word_id = m.word_id";
1036              $result = $db->sql_query($sql);
1037  
1038              while ($row = $db->sql_fetchrow($result))
1039              {
1040                  $which = ($row['title_match']) ? 'title' : 'post';
1041                  $cur_words[$which][$row['word_text']] = $row['word_id'];
1042              }
1043              $db->sql_freeresult($result);
1044  
1045              $words['add']['post'] = array_diff($split_text, array_keys($cur_words['post']));
1046              $words['add']['title'] = array_diff($split_title, array_keys($cur_words['title']));
1047              $words['del']['post'] = array_diff(array_keys($cur_words['post']), $split_text);
1048              $words['del']['title'] = array_diff(array_keys($cur_words['title']), $split_title);
1049          }
1050          else
1051          {
1052              $words['add']['post'] = $split_text;
1053              $words['add']['title'] = $split_title;
1054              $words['del']['post'] = array();
1055              $words['del']['title'] = array();
1056          }
1057          unset($split_text);
1058          unset($split_title);
1059  
1060          // Get unique words from the above arrays
1061          $unique_add_words = array_unique(array_merge($words['add']['post'], $words['add']['title']));
1062  
1063          // We now have unique arrays of all words to be added and removed and
1064          // individual arrays of added and removed words for text and title. What
1065          // we need to do now is add the new words (if they don't already exist)
1066          // and then add (or remove) matches between the words and this post
1067          if (sizeof($unique_add_words))
1068          {
1069              $sql = 'SELECT word_id, word_text
1070                  FROM ' . SEARCH_WORDLIST_TABLE . '
1071                  WHERE ' . $db->sql_in_set('word_text', $unique_add_words);
1072              $result = $db->sql_query($sql);
1073  
1074              $word_ids = array();
1075              while ($row = $db->sql_fetchrow($result))
1076              {
1077                  $word_ids[$row['word_text']] = $row['word_id'];
1078              }
1079              $db->sql_freeresult($result);
1080  
1081              $new_words = array_diff($unique_add_words, array_keys($word_ids));
1082  
1083              if (sizeof($new_words))
1084              {
1085                  $sql_ary = array();
1086  
1087                  foreach ($new_words as $word)
1088                  {
1089                      $sql_ary[] = array('word_text' => $word);
1090                  }
1091  
1092                  $db->sql_multi_insert(SEARCH_WORDLIST_TABLE, $sql_ary);
1093              }
1094              unset($new_words, $sql_ary);
1095          }
1096  
1097          // now update the search match table, remove links to removed words and add links to new words
1098          foreach ($words['del'] as $word_in => $word_ary)
1099          {
1100              $title_match = ($word_in == 'title') ? 1 : 0;
1101  
1102              if (sizeof($word_ary))
1103              {
1104                  $sql_in = array();
1105                  foreach ($word_ary as $word)
1106                  {
1107                      $sql_in[] = $cur_words[$word_in][$word];
1108                  }
1109  
1110                  $sql = 'DELETE FROM ' . SEARCH_WORDMATCH_TABLE . '
1111                      WHERE ' . $db->sql_in_set('word_id', $sql_in) . '
1112                          AND post_id = ' . intval($post_id) . "
1113                          AND title_match = $title_match";
1114                  $db->sql_query($sql);
1115                  unset($sql_in);
1116              }
1117          }
1118  
1119          foreach ($words['add'] as $word_in => $word_ary)
1120          {
1121              $title_match = ($word_in == 'title') ? 1 : 0;
1122  
1123              if (sizeof($word_ary))
1124              {
1125                  $sql = 'INSERT INTO ' . SEARCH_WORDMATCH_TABLE . " (post_id, word_id, title_match)
1126                      SELECT $post_id, word_id, $title_match
1127                      FROM " . SEARCH_WORDLIST_TABLE . '
1128                      WHERE ' . $db->sql_in_set('word_text', $word_ary);
1129                  $db->sql_query($sql);
1130              }
1131          }
1132  
1133          // destroy cached search results containing any of the words removed or added
1134          $this->destroy_cache(array_unique(array_merge($words['add']['post'], $words['add']['title'], $words['del']['post'], $words['del']['post'])), array($poster_id));
1135  
1136          unset($unique_add_words);
1137          unset($words);
1138          unset($cur_words);
1139      }
1140  
1141      /**
1142      * Removes entries from the wordmatch table for the specified post_ids
1143      */
1144  	function index_remove($post_ids, $author_ids, $forum_ids)
1145      {
1146          global $db;
1147  
1148          if (sizeof($post_ids))
1149          {
1150              $sql = 'DELETE FROM ' . SEARCH_WORDMATCH_TABLE . '
1151                  WHERE ' . $db->sql_in_set('post_id', $post_ids);
1152              $db->sql_query($sql);
1153          }
1154  
1155          // SEARCH_WORDLIST_TABLE will be updated by tidy()
1156          $this->destroy_cache(array(), $author_ids);
1157      }
1158  
1159      /**
1160      * Tidy up indexes: Tag 'common words' and remove
1161      * words no longer referenced in the match table
1162      */
1163  	function tidy()
1164      {
1165          global $db, $config;
1166  
1167          // Is the fulltext indexer disabled? If yes then we need not
1168          // carry on ... it's okay ... I know when I'm not wanted boo hoo
1169          if (!$config['fulltext_native_load_upd'])
1170          {
1171              set_config('search_last_gc', time(), true);
1172              return;
1173          }
1174  
1175          $destroy_cache_words = array();
1176  
1177          // Remove common (> 20% of posts ) words
1178          if ($config['num_posts'] >= 100)
1179          {
1180              // First, get the IDs of common words
1181              $sql = 'SELECT word_id
1182                  FROM ' . SEARCH_WORDMATCH_TABLE . '
1183                  GROUP BY word_id
1184                  HAVING COUNT(word_id) > ' . floor($config['num_posts'] * 0.2);
1185              $result = $db->sql_query($sql);
1186  
1187              $sql_in = array();
1188              while ($row = $db->sql_fetchrow($result))
1189              {
1190                  $sql_in[] = $row['word_id'];
1191              }
1192              $db->sql_freeresult($result);
1193  
1194              if (sizeof($sql_in))
1195              {
1196                  // Get the text of those new common words
1197                  $sql = 'SELECT word_text
1198                      FROM ' . SEARCH_WORDLIST_TABLE . '
1199                      WHERE ' . $db->sql_in_set('word_id', $sql_in);
1200                  $result = $db->sql_query($sql);
1201  
1202                  while ($row = $db->sql_fetchrow($result))
1203                  {
1204                      $destroy_cache_words[] = $row['word_text'];
1205                  }
1206                  $db->sql_freeresult($result);
1207  
1208                  // Flag the words
1209                  $sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . '
1210                      SET word_common = 1
1211                      WHERE ' . $db->sql_in_set('word_id', $sql_in);
1212                  $db->sql_query($sql);
1213  
1214                  // Delete the matches
1215                  $sql = 'DELETE FROM ' . SEARCH_WORDMATCH_TABLE . '
1216                      WHERE ' . $db->sql_in_set('word_id', $sql_in);
1217                  $db->sql_query($sql);
1218              }
1219              unset($sql_in);
1220          }
1221  
1222          // destroy cached search results containing any of the words that are now common or were removed
1223          $this->destroy_cache(array_unique($destroy_cache_words));
1224  
1225          set_config('search_last_gc', time(), true);
1226      }
1227  
1228      /**
1229      * Deletes all words from the index
1230      */
1231  	function delete_index($acp_module, $u_action)
1232      {
1233          global $db;
1234  
1235          $db->sql_query((($db->sql_layer != 'sqlite') ? 'TRUNCATE TABLE ' : 'DELETE FROM ') . SEARCH_WORDLIST_TABLE);
1236          $db->sql_query((($db->sql_layer != 'sqlite') ? 'TRUNCATE TABLE ' : 'DELETE FROM ') . SEARCH_WORDMATCH_TABLE);
1237          $db->sql_query((($db->sql_layer != 'sqlite') ? 'TRUNCATE TABLE ' : 'DELETE FROM ') . SEARCH_RESULTS_TABLE);
1238      }
1239  
1240      /**
1241      * Returns true if both FULLTEXT indexes exist
1242      */
1243  	function index_created()
1244      {
1245          if (!sizeof($this->stats))
1246          {
1247              $this->get_stats();
1248          }
1249  
1250          return ($this->stats['total_words'] && $this->stats['total_matches']) ? true : false;
1251      }
1252  
1253      /**
1254      * Returns an associative array containing information about the indexes
1255      */
1256  	function index_stats()
1257      {
1258          global $user;
1259  
1260          if (!sizeof($this->stats))
1261          {
1262              $this->get_stats();
1263          }
1264  
1265          return array(
1266              $user->lang['TOTAL_WORDS']        => $this->stats['total_words'],
1267              $user->lang['TOTAL_MATCHES']    => $this->stats['total_matches']);
1268      }
1269  
1270  	function get_stats()
1271      {
1272          global $db;
1273  
1274          $sql = 'SELECT COUNT(*) as total_words
1275              FROM ' . SEARCH_WORDLIST_TABLE;
1276          $result = $db->sql_query($sql);
1277          $this->stats['total_words'] = (int) $db->sql_fetchfield('total_words');
1278          $db->sql_freeresult($result);
1279  
1280          $sql = 'SELECT COUNT(*) as total_matches
1281              FROM ' . SEARCH_WORDMATCH_TABLE;
1282          $result = $db->sql_query($sql);
1283          $this->stats['total_matches'] = (int) $db->sql_fetchfield('total_matches');
1284          $db->sql_freeresult($result);
1285      }
1286  
1287      /**
1288      * Clean up a text to remove non-alphanumeric characters
1289      *
1290      * This method receives a UTF-8 string, normalizes and validates it, replaces all
1291      * non-alphanumeric characters with strings then returns the result.
1292      *
1293      * Any number of "allowed chars" can be passed as a UTF-8 string in NFC.
1294      *
1295      * @param    string    $text            Text to split, in UTF-8 (not normalized or sanitized)
1296      * @param    string    $allowed_chars    String of special chars to allow
1297      * @param    string    $encoding        Text encoding
1298      * @return    string                    Cleaned up text, only alphanumeric chars are left
1299      */
1300  	function cleanup($text, $allowed_chars = null, $encoding = 'utf-8')
1301      {
1302          global $phpbb_root_path, $phpEx;
1303          static $conv = array(), $conv_loaded = array();
1304          $words = $allow = array();
1305  
1306          /**
1307          * Convert the text to UTF-8
1308          */
1309          $encoding = strtolower($encoding);
1310          if ($encoding != 'utf-8')
1311          {
1312              $text = utf8_recode($text, $encoding);
1313          }
1314  
1315          $utf_len_mask = array(
1316              "\xC0"    =>    2,
1317              "\xD0"    =>    2,
1318              "\xE0"    =>    3,
1319              "\xF0"    =>    4
1320          );
1321  
1322          /**
1323          * Replace HTML entities and NCRs
1324          */
1325          $text = htmlspecialchars_decode(utf8_decode_ncr($text), ENT_QUOTES);
1326  
1327          /**
1328          * Load the UTF-8 normalizer
1329          *
1330          * If we use it more widely, an instance of that class should be held in a
1331          * a global variable instead
1332          */
1333          $text = utf_normalizer::nfc($text);
1334  
1335          /**
1336          * The first thing we do is:
1337          *
1338          * - convert ASCII-7 letters to lowercase
1339          * - remove the ASCII-7 non-alpha characters
1340          * - remove the bytes that should not appear in a valid UTF-8 string: 0xC0,
1341          *   0xC1 and 0xF5-0xFF
1342          *
1343          * @todo in theory, the third one is already taken care of during normalization and those chars should have been replaced by Unicode replacement chars
1344          */
1345          $sb_match    = "ISTCPAMELRDOJBNHFGVWUQKYXZ\r\n\t!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0B\x0C\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\xC0\xC1\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF";
1346          $sb_replace    = 'istcpamelrdojbnhfgvwuqkyxz                                                                              ';
1347  
1348          /**
1349          * This is the list of legal ASCII chars, it is automatically extended
1350          * with ASCII chars from $allowed_chars
1351          */
1352          $legal_ascii = ' eaisntroludcpmghbfvq10xy2j9kw354867z';
1353  
1354          /**
1355          * Prepare an array containing the extra chars to allow
1356          */
1357          if (isset($allowed_chars[0]))
1358          {
1359              $pos = 0;
1360              $len = strlen($allowed_chars);
1361              do
1362              {
1363                  $c = $allowed_chars[$pos];
1364  
1365                  if ($c < "\x80")
1366                  {
1367                      /**
1368                      * ASCII char
1369                      */
1370                      $sb_pos = strpos($sb_match, $c);
1371                      if (is_int($sb_pos))
1372                      {
1373                          /**
1374                          * Remove the char from $sb_match and its corresponding
1375                          * replacement in $sb_replace
1376                          */
1377                          $sb_match = substr($sb_match, 0, $sb_pos) . substr($sb_match, $sb_pos + 1);
1378                          $sb_replace = substr($sb_replace, 0, $sb_pos) . substr($sb_replace, $sb_pos + 1);
1379                          $legal_ascii .= $c;
1380                      }
1381  
1382                      ++$pos;
1383                  }
1384                  else
1385                  {
1386                      /**
1387                      * UTF-8 char
1388                      */
1389                      $utf_len = $utf_len_mask[$c & "\xF0"];
1390                      $allow[substr($allowed_chars, $pos, $utf_len)] = 1;
1391                      $pos += $utf_len;
1392                  }
1393              }
1394              while ($pos < $len);
1395          }
1396  
1397          $text = strtr($text, $sb_match, $sb_replace);
1398          $ret = '';
1399  
1400          $pos = 0;
1401          $len = strlen($text);
1402  
1403          do
1404          {
1405              /**
1406              * Do all consecutive ASCII chars at once
1407              */
1408              if ($spn = strspn($text, $legal_ascii, $pos))
1409              {
1410                  $ret .= substr($text, $pos, $spn);
1411                  $pos += $spn;
1412              }
1413  
1414              if ($pos >= $len)
1415              {
1416                  return $ret;
1417              }
1418  
1419              /**
1420              * Capture the UTF char
1421              */
1422              $utf_len = $utf_len_mask[$text[$pos] & "\xF0"];
1423              $utf_char = substr($text, $pos, $utf_len);
1424              $pos += $utf_len;
1425  
1426              if (($utf_char >= UTF8_HANGUL_FIRST && $utf_char <= UTF8_HANGUL_LAST)
1427               || ($utf_char >= UTF8_CJK_FIRST && $utf_char <= UTF8_CJK_LAST)
1428               || ($utf_char >= UTF8_CJK_B_FIRST && $utf_char <= UTF8_CJK_B_LAST))
1429              {
1430                  /**
1431                  * All characters within these ranges are valid
1432                  *
1433                  * We separate them with a space in order to index each character
1434                  * individually
1435                  */
1436                  $ret .= ' ' . $utf_char . ' ';
1437                  continue;
1438              }
1439  
1440              if (isset($allow[$utf_char]))
1441              {
1442                  /**
1443                  * The char is explicitly allowed
1444                  */
1445                  $ret .= $utf_char;
1446                  continue;
1447              }
1448  
1449              if (isset($conv[$utf_char]))
1450              {
1451                  /**
1452                  * The char is mapped to something, maybe to itself actually
1453                  */
1454                  $ret .= $conv[$utf_char];
1455                  continue;
1456              }
1457  
1458              /**
1459              * The char isn't mapped, but did we load its conversion table?
1460              *
1461              * The search indexer table is split into blocks. The block number of
1462              * each char is equal to its codepoint right-shifted for 11 bits. It
1463              * means that out of the 11, 16 or 21 meaningful bits of a 2-, 3- or
1464              * 4- byte sequence we only keep the leftmost 0, 5 or 10 bits. Thus,
1465              * all UTF chars encoded in 2 bytes are in the same first block.
1466              */
1467              if (isset($utf_char[2]))
1468              {
1469                  if (isset($utf_char[3]))
1470                  {
1471                      /**
1472                      * 1111 0nnn 10nn nnnn 10nx xxxx 10xx xxxx
1473                      * 0000 0111 0011 1111 0010 0000
1474                      */
1475                      $idx = ((ord($utf_char[0]) & 0x07) << 7) | ((ord($utf_char[1]) & 0x3F) << 1) | ((ord($utf_char[2]) & 0x20) >> 5);
1476                  }
1477                  else
1478                  {
1479                      /**
1480                      * 1110 nnnn 10nx xxxx 10xx xxxx
1481                      * 0000 0111 0010 0000
1482                      */
1483                      $idx = ((ord($utf_char[0]) & 0x07) << 1) | ((ord($utf_char[1]) & 0x20) >> 5);
1484                  }
1485              }
1486              else
1487              {
1488                  /**
1489                  * 110x xxxx 10xx xxxx
1490                  * 0000 0000 0000 0000
1491                  */
1492                  $idx = 0;
1493              }
1494  
1495              /**
1496              * Check if the required conv table has been loaded already
1497              */
1498              if (!isset($conv_loaded[$idx]))
1499              {
1500                  $conv_loaded[$idx] = 1;
1501                  $file = $phpbb_root_path . 'includes/utf/data/search_indexer_' . $idx . '.' . $phpEx;
1502  
1503                  if (file_exists($file))
1504                  {
1505                      $conv += include($file);
1506                  }
1507              }
1508  
1509              if (isset($conv[$utf_char]))
1510              {
1511                  $ret .= $conv[$utf_char];
1512              }
1513              else
1514              {
1515                  /**
1516                  * We add an entry to the conversion table so that we
1517                  * don't have to convert to codepoint and perform the checks
1518                  * that are above this block
1519                  */
1520                  $conv[$utf_char] = ' ';
1521                  $ret .= ' ';
1522              }
1523          }
1524          while (1);
1525  
1526          return $ret;
1527      }
1528  
1529      /**
1530      * Returns a list of options for the ACP to display
1531      */
1532  	function acp()
1533      {
1534          global $user, $config;
1535  
1536  
1537          /**
1538          * if we need any options, copied from fulltext_native for now, will have to be adjusted or removed
1539          */
1540  
1541          $tpl = '
1542          <dl>
1543              <dt><label for="fulltext_native_load_upd">' . $user->lang['YES_SEARCH_UPDATE'] . ':</label><br /><span>' . $user->lang['YES_SEARCH_UPDATE_EXPLAIN'] . '</span></dt>
1544              <dd><input type="radio" id="fulltext_native_load_upd" name="config[fulltext_native_load_upd]" value="1"' . (($config['fulltext_native_load_upd']) ? ' checked="checked"' : '') . ' class="radio" />&nbsp;' . $user->lang['YES'] . '&nbsp;&nbsp;<input type="radio" name="config[fulltext_native_load_upd]" value="0"' . ((!$config['fulltext_native_load_upd']) ? ' checked="checked"' : '') . ' class="radio" />&nbsp;' . $user->lang['NO'] . '</dd>
1545          </dl>
1546          <dl>
1547              <dt><label for="fulltext_native_min_chars">' . $user->lang['MIN_SEARCH_CHARS'] . ':</label><br /><span>' . $user->lang['MIN_SEARCH_CHARS_EXPLAIN'] . '</span></dt>
1548              <dd><input id="fulltext_native_min_chars" type="text" size="3" maxlength="3" name="config[fulltext_native_min_chars]" value="' . (int) $config['fulltext_native_min_chars'] . '" /></dd>
1549          </dl>
1550          <dl>
1551              <dt><label for="fulltext_native_max_chars">' . $user->lang['MAX_SEARCH_CHARS'] . ':</label><br /><span>' . $user->lang['MAX_SEARCH_CHARS_EXPLAIN'] . '</span></dt>
1552              <dd><input id="fulltext_native_max_chars" type="text" size="3" maxlength="3" name="config[fulltext_native_max_chars]" value="' . (int) $config['fulltext_native_max_chars'] . '" /></dd>
1553          </dl>
1554          ';
1555  
1556          // These are fields required in the config table
1557          return array(
1558              'tpl'        => $tpl,
1559              'config'    => array('fulltext_native_load_upd' => 'bool', 'fulltext_native_min_chars' => 'integer:0:255', 'fulltext_native_max_chars' => 'integer:0:255')
1560          );
1561      }
1562  }
1563  
1564  ?>


Generated: Wed Nov 22 00:35:05 2006 Cross-referenced by PHPXref 0.6