aboutsummaryrefslogblamecommitdiffstats
path: root/library/class.Diff.php
blob: 689abe9e71dc7f2647f9f5a78e22ef8479f5132c (plain) (tree)
































































































































































































































































































































































































                                                                                
<?php

/*

class.Diff.php

A class containing a diff implementation

Created by Stephen Morley - http://stephenmorley.org/ - and released under the
terms of the CC0 1.0 Universal legal code:

http://creativecommons.org/publicdomain/zero/1.0/legalcode

*/

// A class containing functions for computing diffs and formatting the output.
class Diff{

  // define the constants
  const UNMODIFIED = 0;
  const DELETED    = 1;
  const INSERTED   = 2;

  /* Returns the diff for two strings. The return value is an array, each of
   * whose values is an array containing two values: a line (or character, if
   * $compareCharacters is true), and one of the constants DIFF::UNMODIFIED (the
   * line or character is in both strings), DIFF::DELETED (the line or character
   * is only in the first string), and DIFF::INSERTED (the line or character is
   * only in the second string). The parameters are:
   *
   * $string1           - the first string
   * $string2           - the second string
   * $compareCharacters - true to compare characters, and false to compare
   *                      lines; this optional parameter defaults to false
   */
  public static function compare(
      $string1, $string2, $compareCharacters = false){

    // initialise the sequences and comparison start and end positions
    $start = 0;
    if ($compareCharacters){
      $sequence1 = $string1;
      $sequence2 = $string2;
      $end1 = strlen($string1) - 1;
      $end2 = strlen($string2) - 1;
    }else{
      $sequence1 = preg_split('/\R/', $string1);
      $sequence2 = preg_split('/\R/', $string2);
      $end1 = count($sequence1) - 1;
      $end2 = count($sequence2) - 1;
    }

    // skip any common prefix
    while ($start <= $end1 && $start <= $end2
        && $sequence1[$start] == $sequence2[$start]){
      $start ++;
    }

    // skip any common suffix
    while ($end1 >= $start && $end2 >= $start
        && $sequence1[$end1] == $sequence2[$end2]){
      $end1 --;
      $end2 --;
    }

    // compute the table of longest common subsequence lengths
    $table = self::computeTable($sequence1, $sequence2, $start, $end1, $end2);

    // generate the partial diff
    $partialDiff =
        self::generatePartialDiff($table, $sequence1, $sequence2, $start);

    // generate the full diff
    $diff = array();
    for ($index = 0; $index < $start; $index ++){
      $diff[] = array($sequence1[$index], self::UNMODIFIED);
    }
    while (count($partialDiff) > 0) $diff[] = array_pop($partialDiff);
    for ($index = $end1 + 1;
        $index < ($compareCharacters ? strlen($sequence1) : count($sequence1));
        $index ++){
      $diff[] = array($sequence1[$index], self::UNMODIFIED);
    }

    // return the diff
    return $diff;

  }

  /* Returns the diff for two files. The parameters are:
   *
   * $file1             - the path to the first file
   * $file2             - the path to the second file
   * $compareCharacters - true to compare characters, and false to compare
   *                      lines; this optional parameter defaults to false
   */
  public static function compareFiles(
      $file1, $file2, $compareCharacters = false){

    // return the diff of the files
    return self::compare(
        file_get_contents($file1),
        file_get_contents($file2),
        $compareCharacters);

  }

  /* Returns the table of longest common subsequence lengths for the specified
   * sequences. The parameters are:
   *
   * $sequence1 - the first sequence
   * $sequence2 - the second sequence
   * $start     - the starting index
   * $end1      - the ending index for the first sequence
   * $end2      - the ending index for the second sequence
   */
  private static function computeTable(
      $sequence1, $sequence2, $start, $end1, $end2){

    // determine the lengths to be compared
    $length1 = $end1 - $start + 1;
    $length2 = $end2 - $start + 1;

    // initialise the table
    $table = array(array_fill(0, $length2 + 1, 0));

    // loop over the rows
    for ($index1 = 1; $index1 <= $length1; $index1 ++){

      // create the new row
      $table[$index1] = array(0);

      // loop over the columns
      for ($index2 = 1; $index2 <= $length2; $index2 ++){

        // store the longest common subsequence length
        if ($sequence1[$index1 + $start - 1]
            == $sequence2[$index2 + $start - 1]){
          $table[$index1][$index2] = $table[$index1 - 1][$index2 - 1] + 1;
        }else{
          $table[$index1][$index2] =
              max($table[$index1 - 1][$index2], $table[$index1][$index2 - 1]);
        }

      }
    }

    // return the table
    return $table;

  }

  /* Returns the partial diff for the specificed sequences, in reverse order.
   * The parameters are:
   *
   * $table     - the table returned by the computeTable function
   * $sequence1 - the first sequence
   * $sequence2 - the second sequence
   * $start     - the starting index
   */
  private static function generatePartialDiff(
      $table, $sequence1, $sequence2, $start){

    //  initialise the diff
    $diff = array();

    // initialise the indices
    $index1 = count($table) - 1;
    $index2 = count($table[0]) - 1;

    // loop until there are no items remaining in either sequence
    while ($index1 > 0 || $index2 > 0){

      // check what has happened to the items at these indices
      if ($index1 > 0 && $index2 > 0
          && $sequence1[$index1 + $start - 1]
              == $sequence2[$index2 + $start - 1]){

        // update the diff and the indices
        $diff[] = array($sequence1[$index1 + $start - 1], self::UNMODIFIED);
        $index1 --;
        $index2 --;

      }elseif ($index2 > 0
          && $table[$index1][$index2] == $table[$index1][$index2 - 1]){

        // update the diff and the indices
        $diff[] = array($sequence2[$index2 + $start - 1], self::INSERTED);
        $index2 --;

      }else{

        // update the diff and the indices
        $diff[] = array($sequence1[$index1 + $start - 1], self::DELETED);
        $index1 --;

      }

    }

    // return the diff
    return $diff;

  }

  /* Returns a diff as a string, where unmodified lines are prefixed by '  ',
   * deletions are prefixed by '- ', and insertions are prefixed by '+ '. The
   * parameters are:
   *
   * $diff      - the diff array
   * $separator - the separator between lines; this optional parameter defaults
   *              to "\n"
   */
  public static function toString($diff, $separator = "\n"){

    // initialise the string
    $string = '';

    // loop over the lines in the diff
    foreach ($diff as $line){

      // extend the string with the line
      switch ($line[1]){
        case self::UNMODIFIED : $string .= '  ' . $line[0];break;
        case self::DELETED    : $string .= '- ' . $line[0];break;
        case self::INSERTED   : $string .= '+ ' . $line[0];break;
      }

      // extend the string with the separator
      $string .= $separator;

    }

    // return the string
    return $string;

  }

  /* Returns a diff as an HTML string, where unmodified lines are contained
   * within 'span' elements, deletions are contained within 'del' elements, and
   * insertions are contained within 'ins' elements. The parameters are:
   *
   * $diff      - the diff array
   * $separator - the separator between lines; this optional parameter defaults
   *              to '<br>'
   */
  public static function toHTML($diff, $separator = '<br>'){

    // initialise the HTML
    $html = '';

    // loop over the lines in the diff
    foreach ($diff as $line){

      // extend the HTML with the line
      switch ($line[1]){
        case self::UNMODIFIED : $element = 'span'; break;
        case self::DELETED    : $element = 'del';  break;
        case self::INSERTED   : $element = 'ins';  break;
      }
      $html .=
          '<' . $element . '>'
          . htmlspecialchars($line[0])
          . '</' . $element . '>';

      // extend the HTML with the separator
      $html .= $separator;

    }

    // return the HTML
    return $html;

  }

  /* Returns a diff as an HTML table. The parameters are:
   *
   * $diff        - the diff array
   * $indentation - indentation to add to every line of the generated HTML; this
   *                optional parameter defaults to ''
   * $separator   - the separator between lines; this optional parameter
   *                defaults to '<br>'
   */
  public static function toTable($diff, $indentation = '', $separator = '<br>'){

    // initialise the HTML
    $html = $indentation . "<table class=\"diff\">\n";

    // loop over the lines in the diff
    $index = 0;
    while ($index < count($diff)){

      // determine the line type
      switch ($diff[$index][1]){

        // display the content on the left and right
        case self::UNMODIFIED:
          $leftCell =
              self::getCellContent(
                  $diff, $indentation, $separator, $index, self::UNMODIFIED);
          $rightCell = $leftCell;
          break;

        // display the deleted on the left and inserted content on the right
        case self::DELETED:
          $leftCell =
              self::getCellContent(
                  $diff, $indentation, $separator, $index, self::DELETED);
          $rightCell =
              self::getCellContent(
                  $diff, $indentation, $separator, $index, self::INSERTED);
          break;

        // display the inserted content on the right
        case self::INSERTED:
          $leftCell = '';
          $rightCell =
              self::getCellContent(
                  $diff, $indentation, $separator, $index, self::INSERTED);
          break;

      }

      // extend the HTML with the new row
      $html .=
          $indentation
          . "  <tr>\n"
          . $indentation
          . '    <td class="diff'
          . ($leftCell == $rightCell
              ? 'Unmodified'
              : ($leftCell == '' ? 'Blank' : 'Deleted'))
          . '">'
          . $leftCell
          . "</td>\n"
          . $indentation
          . '    <td class="diff'
          . ($leftCell == $rightCell
              ? 'Unmodified'
              : ($rightCell == '' ? 'Blank' : 'Inserted'))
          . '">'
          . $rightCell
          . "</td>\n"
          . $indentation
          . "  </tr>\n";

    }

    // return the HTML
    return $html . $indentation . "</table>\n";

  }

  /* Returns the content of the cell, for use in the toTable function. The
   * parameters are:
   *
   * $diff        - the diff array
   * $indentation - indentation to add to every line of the generated HTML
   * $separator   - the separator between lines
   * $index       - the current index, passes by reference
   * $type        - the type of line
   */
  private static function getCellContent(
      $diff, $indentation, $separator, &$index, $type){

    // initialise the HTML
    $html = '';

    // loop over the matching lines, adding them to the HTML
    while ($index < count($diff) && $diff[$index][1] == $type){
      $html .=
          '<span>'
          . htmlspecialchars($diff[$index][0])
          . '</span>'
          . $separator;
      $index ++;
    }

    // return the HTML
    return $html;

  }

}

?>