PHPXRef 0.6 : phpBB 3.0 Beta 3 : /includes/diff/engine.php source

[Summary view] [Print] [Text view]
   1  <?php
   2  /** 
   3  *
   4  * @package phpBB3
   5  * @version $Id: engine.php,v 1.1 2006/08/22 21:29:45 acydburn Exp $
   6  * @copyright (c) 2006 phpBB Group 
   7  * @license http://opensource.org/licenses/gpl-license.php GNU Public License 
   8  *
   9  */
  10  
  11  /**
  12  */
  13  if (!defined('IN_PHPBB'))
  14  {
  15      exit;
  16  }
  17  
  18  /**
  19  * Code from pear.php.net, Text_Diff-0.2.1 (beta) package
  20  * http://pear.php.net/package/Text_Diff/
  21  *
  22  * Modified by Acyd Burn to meet our coding standards
  23  * and being able to integrate into phpBB
  24  */
  25  
  26  /**
  27  * Class used internally by Diff to actually compute the diffs.  This class is
  28  * implemented using native PHP code.
  29  *
  30  * The algorithm used here is mostly lifted from the perl module
  31  * Algorithm::Diff (version 1.06) by Ned Konz, which is available at:
  32  * http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip
  33  *
  34  * More ideas are taken from:
  35  * http://www.ics.uci.edu/~eppstein/161/960229.html
  36  *
  37  * Some ideas (and a bit of code) are taken from analyze.c, of GNU
  38  * diffutils-2.7, which can be found at:
  39  * ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz
  40  *
  41  * Some ideas (subdivision by NCHUNKS > 2, and some optimizations) are from
  42  * Geoffrey T. Dairiki <dairiki@dairiki.org>. The original PHP version of this
  43  * code was written by him, and is used/adapted with his permission.
  44  *
  45  * @author  Geoffrey T. Dairiki <dairiki@dairiki.org>
  46  * @package phpBB3
  47  *
  48  * @access private
  49  */
  50  class diff_engine
  51  {
  52  	function diff($from_lines, $to_lines)
  53      {
  54          array_walk($from_lines, array('diff', 'trim_newlines'));
  55          array_walk($to_lines, array('diff', 'trim_newlines'));
  56  
  57          $n_from = sizeof($from_lines);
  58          $n_to = sizeof($to_lines);
  59  
  60          $this->xchanged = $this->ychanged = $this->xv = $this->yv = $this->xind = $this->yind = array();
  61          unset($this->seq, $this->in_seq, $this->lcs);
  62  
  63          // Skip leading common lines.
  64          for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++)
  65          {
  66              if ($from_lines[$skip] !== $to_lines[$skip])
  67              {
  68                  break;
  69              }
  70              $this->xchanged[$skip] = $this->ychanged[$skip] = false;
  71          }
  72  
  73          // Skip trailing common lines.
  74          $xi = $n_from;
  75          $yi = $n_to;
  76  
  77          for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++)
  78          {
  79              if ($from_lines[$xi] !== $to_lines[$yi])
  80              {
  81                  break;
  82              }
  83              $this->xchanged[$xi] = $this->ychanged[$yi] = false;
  84          }
  85  
  86          // Ignore lines which do not exist in both files.
  87          for ($xi = $skip; $xi < $n_from - $endskip; $xi++)
  88          {
  89              $xhash[$from_lines[$xi]] = 1;
  90          }
  91  
  92          for ($yi = $skip; $yi < $n_to - $endskip; $yi++)
  93          {
  94              $line = $to_lines[$yi];
  95  
  96              if (($this->ychanged[$yi] = empty($xhash[$line])))
  97              {
  98                  continue;
  99              }
 100              $yhash[$line] = 1;
 101              $this->yv[] = $line;
 102              $this->yind[] = $yi;
 103          }
 104  
 105          for ($xi = $skip; $xi < $n_from - $endskip; $xi++)
 106          {
 107              $line = $from_lines[$xi];
 108  
 109              if (($this->xchanged[$xi] = empty($yhash[$line])))
 110              {
 111                  continue;
 112              }
 113              $this->xv[] = $line;
 114              $this->xind[] = $xi;
 115          }
 116  
 117          // Find the LCS.
 118          $this->_compareseq(0, sizeof($this->xv), 0, sizeof($this->yv));
 119  
 120          // Merge edits when possible.
 121          $this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged);
 122          $this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged);
 123  
 124          // Compute the edit operations.
 125          $edits = array();
 126          $xi = $yi = 0;
 127  
 128          while ($xi < $n_from || $yi < $n_to)
 129          {
 130              // Skip matching "snake".
 131              $copy = array();
 132  
 133              while ($xi < $n_from && $yi < $n_to && !$this->xchanged[$xi] && !$this->ychanged[$yi])
 134              {
 135                  $copy[] = $from_lines[$xi++];
 136                  $yi++;
 137              }
 138  
 139              if ($copy)
 140              {
 141                  $edits[] = &new diff_op_copy($copy);
 142              }
 143  
 144              // Find deletes & adds.
 145              $delete = array();
 146              while ($xi < $n_from && $this->xchanged[$xi])
 147              {
 148                  $delete[] = $from_lines[$xi++];
 149              }
 150  
 151              $add = array();
 152              while ($yi < $n_to && $this->ychanged[$yi])
 153              {
 154                  $add[] = $to_lines[$yi++];
 155              }
 156  
 157              if ($delete && $add)
 158              {
 159                  $edits[] = &new diff_op_change($delete, $add);
 160              }
 161              else if ($delete)
 162              {
 163                  $edits[] = &new diff_op_delete($delete);
 164              }
 165              else if ($add)
 166              {
 167                  $edits[] = &new diff_op_add($add);
 168              }
 169          }
 170  
 171          return $edits;
 172      }
 173  
 174      /**
 175      * Divides the Largest Common Subsequence (LCS) of the sequences (XOFF,
 176      * XLIM) and (YOFF, YLIM) into NCHUNKS approximately equally sized segments.
 177      *
 178      * Returns (LCS, PTS).  LCS is the length of the LCS. PTS is an array of
 179      * NCHUNKS+1 (X, Y) indexes giving the diving points between sub
 180      * sequences.  The first sub-sequence is contained in (X0, X1), (Y0, Y1),
 181      * the second in (X1, X2), (Y1, Y2) and so on.  Note that (X0, Y0) ==
 182      * (XOFF, YOFF) and (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM).
 183      *
 184      * This function assumes that the first lines of the specified portions of
 185      * the two files do not match, and likewise that the last lines do not
 186      * match.  The caller must trim matching lines from the beginning and end
 187      * of the portions it is going to specify.
 188      */
 189  	function _diag($xoff, $xlim, $yoff, $ylim, $nchunks)
 190      {
 191          $flip = false;
 192  
 193          if ($xlim - $xoff > $ylim - $yoff)
 194          {
 195              // Things seems faster (I'm not sure I understand why) when the shortest sequence is in X.
 196              $flip = true;
 197              list($xoff, $xlim, $yoff, $ylim) = array($yoff, $ylim, $xoff, $xlim);
 198          }
 199  
 200          if ($flip)
 201          {
 202              for ($i = $ylim - 1; $i >= $yoff; $i--)
 203              {
 204                  $ymatches[$this->xv[$i]][] = $i;
 205              }
 206          }
 207          else
 208          {
 209              for ($i = $ylim - 1; $i >= $yoff; $i--)
 210              {
 211                  $ymatches[$this->yv[$i]][] = $i;
 212              }
 213          }
 214  
 215          $this->lcs = 0;
 216          $this->seq[0]= $yoff - 1;
 217          $this->in_seq = array();
 218          $ymids[0] = array();
 219  
 220          $numer = $xlim - $xoff + $nchunks - 1;
 221          $x = $xoff;
 222  
 223          for ($chunk = 0; $chunk < $nchunks; $chunk++)
 224          {
 225              if ($chunk > 0)
 226              {
 227                  for ($i = 0; $i <= $this->lcs; $i++)
 228                  {
 229                      $ymids[$i][$chunk - 1] = $this->seq[$i];
 230                  }
 231              }
 232  
 233              $x1 = $xoff + (int)(($numer + ($xlim-$xoff)*$chunk) / $nchunks);
 234  
 235              for (; $x < $x1; $x++)
 236              {
 237                  $line = $flip ? $this->yv[$x] : $this->xv[$x];
 238                  if (empty($ymatches[$line]))
 239                  {
 240                      continue;
 241                  }
 242                  $matches = $ymatches[$line];
 243  
 244                  foreach ($matches as $y)
 245                  {
 246                      if (empty($this->in_seq[$y]))
 247                      {
 248                          $k = $this->_lcs_pos($y);
 249                          $ymids[$k] = $ymids[$k - 1];
 250                          break;
 251                      }
 252                  }
 253  
 254                  while (list($junk, $y) = each($matches))
 255                  {
 256                      if ($y > $this->seq[$k - 1])
 257                      {
 258                          // Optimization: this is a common case: next match is just replacing previous match.
 259                          $this->in_seq[$this->seq[$k]] = false;
 260                          $this->seq[$k] = $y;
 261                          $this->in_seq[$y] = 1;
 262                      }
 263                      else if (empty($this->in_seq[$y]))
 264                      {
 265                          $k = $this->_lcs_pos($y);
 266                          $ymids[$k] = $ymids[$k - 1];
 267                      }
 268                  }
 269              }
 270          }
 271  
 272          $seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff);
 273          $ymid = $ymids[$this->lcs];
 274  
 275          for ($n = 0; $n < $nchunks - 1; $n++)
 276          {
 277              $x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $n) / $nchunks);
 278              $y1 = $ymid[$n] + 1;
 279              $seps[] = $flip ? array($y1, $x1) : array($x1, $y1);
 280          }
 281          $seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim);
 282  
 283          return array($this->lcs, $seps);
 284      }
 285  
 286  	function _lcs_pos($ypos)
 287      {
 288          $end = $this->lcs;
 289  
 290          if ($end == 0 || $ypos > $this->seq[$end])
 291          {
 292              $this->seq[++$this->lcs] = $ypos;
 293              $this->in_seq[$ypos] = 1;
 294              return $this->lcs;
 295          }
 296  
 297          $beg = 1;
 298          while ($beg < $end)
 299          {
 300              $mid = (int)(($beg + $end) / 2);
 301              if ($ypos > $this->seq[$mid])
 302              {
 303                  $beg = $mid + 1;
 304              }
 305              else
 306              {
 307                  $end = $mid;
 308              }
 309          }
 310  
 311          $this->in_seq[$this->seq[$end]] = false;
 312          $this->seq[$end] = $ypos;
 313          $this->in_seq[$ypos] = 1;
 314  
 315          return $end;
 316      }
 317  
 318      /**
 319      * Finds LCS of two sequences.
 320      *
 321      * The results are recorded in the vectors $this->{x,y}changed[], by
 322      * storing a 1 in the element for each line that is an insertion or
 323      * deletion (ie. is not in the LCS).
 324      *
 325      * The subsequence of file 0 is (XOFF, XLIM) and likewise for file 1.
 326      *
 327      * Note that XLIM, YLIM are exclusive bounds.  All line numbers are
 328      * origin-0 and discarded lines are not counted.
 329      */
 330  	function _compareseq($xoff, $xlim, $yoff, $ylim)
 331      {
 332          // Slide down the bottom initial diagonal.
 333          while ($xoff < $xlim && $yoff < $ylim && $this->xv[$xoff] == $this->yv[$yoff])
 334          {
 335              ++$xoff;
 336              ++$yoff;
 337          }
 338  
 339          // Slide up the top initial diagonal.
 340          while ($xlim > $xoff && $ylim > $yoff && $this->xv[$xlim - 1] == $this->yv[$ylim - 1])
 341          {
 342              --$xlim;
 343              --$ylim;
 344          }
 345  
 346          if ($xoff == $xlim || $yoff == $ylim)
 347          {
 348              $lcs = 0;
 349          }
 350          else
 351          {
 352              // This is ad hoc but seems to work well.
 353              // $nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5);
 354              // $nchunks = max(2,min(8,(int)$nchunks));
 355              $nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1;
 356              list($lcs, $seps) = $this->_diag($xoff, $xlim, $yoff, $ylim, $nchunks);
 357          }
 358  
 359          if ($lcs == 0)
 360          {
 361              // X and Y sequences have no common subsequence: mark all changed.
 362              while ($yoff < $ylim)
 363              {
 364                  $this->ychanged[$this->yind[$yoff++]] = 1;
 365              }
 366  
 367              while ($xoff < $xlim)
 368              {
 369                  $this->xchanged[$this->xind[$xoff++]] = 1;
 370              }
 371          }
 372          else
 373          {
 374              // Use the partitions to split this problem into subproblems.
 375              reset($seps);
 376              $pt1 = $seps[0];
 377  
 378              while ($pt2 = next($seps))
 379              {
 380                  $this->_compareseq($pt1[0], $pt2[0], $pt1[1], $pt2[1]);
 381                  $pt1 = $pt2;
 382              }
 383          }
 384      }
 385  
 386      /**
 387      * Adjusts inserts/deletes of identical lines to join changes as much as possible.
 388      *
 389      * We do something when a run of changed lines include a line at one end
 390      * and has an excluded, identical line at the other.  We are free to
 391      * choose which identical line is included. 'compareseq' usually chooses
 392      * the one at the beginning, but usually it is cleaner to consider the
 393      * following identical line to be the "change".
 394      *
 395      * This is extracted verbatim from analyze.c (GNU diffutils-2.7).
 396      */
 397  	function _shift_boundaries($lines, &$changed, $other_changed)
 398      {
 399          $i = 0;
 400          $j = 0;
 401  
 402          $len = sizeof($lines);
 403          $other_len = sizeof($other_changed);
 404  
 405          while (1)
 406          {
 407              // Scan forward to find the beginning of another run of
 408              // changes. Also keep track of the corresponding point in the other file.
 409              //
 410              // Throughout this code, $i and $j are adjusted together so that
 411              // the first $i elements of $changed and the first $j elements of
 412              // $other_changed both contain the same number of zeros (unchanged lines).
 413              //
 414              // Furthermore, $j is always kept so that $j == $other_len or $other_changed[$j] == false.
 415              while ($j < $other_len && $other_changed[$j])
 416              {
 417                  $j++;
 418              }
 419  
 420              while ($i < $len && ! $changed[$i])
 421              {
 422                  $i++;
 423                  $j++;
 424  
 425                  while ($j < $other_len && $other_changed[$j])
 426                  {
 427                      $j++;
 428                  }
 429              }
 430  
 431              if ($i == $len)
 432              {
 433                  break;
 434              }
 435  
 436              $start = $i;
 437  
 438              // Find the end of this run of changes.
 439              while (++$i < $len && $changed[$i])
 440              {
 441                  continue;
 442              }
 443  
 444              do
 445              {
 446                  // Record the length of this run of changes, so that we can later determine whether the run has grown.
 447                  $runlength = $i - $start;
 448  
 449                  // Move the changed region back, so long as the previous unchanged line matches the last changed one.
 450                  // This merges with previous changed regions.
 451                  while ($start > 0 && $lines[$start - 1] == $lines[$i - 1])
 452                  {
 453                      $changed[--$start] = 1;
 454                      $changed[--$i] = false;
 455  
 456                      while ($start > 0 && $changed[$start - 1])
 457                      {
 458                          $start--;
 459                      }
 460  
 461                      while ($other_changed[--$j])
 462                      {
 463                          continue;
 464                      }
 465                  }
 466  
 467                  // Set CORRESPONDING to the end of the changed run, at the last point where it corresponds to a changed run in the
 468                  // other file. CORRESPONDING == LEN means no such point has been found.
 469                  $corresponding = $j < $other_len ? $i : $len;
 470  
 471                  // Move the changed region forward, so long as the first changed line matches the following unchanged one.
 472                  // This merges with following changed regions.
 473                  // Do this second, so that if there are no merges, the changed region is moved forward as far as possible.
 474                  while ($i < $len && $lines[$start] == $lines[$i])
 475                  {
 476                      $changed[$start++] = false;
 477                      $changed[$i++] = 1;
 478  
 479                      while ($i < $len && $changed[$i])
 480                      {
 481                          $i++;
 482                      }
 483  
 484                      $j++;
 485                      if ($j < $other_len && $other_changed[$j])
 486                      {
 487                          $corresponding = $i;
 488                          while ($j < $other_len && $other_changed[$j])
 489                          {
 490                              $j++;
 491                          }
 492                      }
 493                  }
 494              }
 495              while ($runlength != $i - $start);
 496  
 497              // If possible, move the fully-merged run of changes back to a corresponding run in the other file.
 498              while ($corresponding < $i)
 499              {
 500                  $changed[--$start] = 1;
 501                  $changed[--$i] = 0;
 502  
 503                  while ($other_changed[--$j])
 504                  {
 505                      continue;
 506                  }
 507              }
 508          }
 509      }
 510  }
 511  
 512  ?>
PHP Cross Reference of phpBB 3.0 Beta 3

/includes/diff/ -> engine.php (source)