<?php /* class.Diff.php A class containing a diff implementation Created by Stephen Morley - http://stephenmorley.org/ - and released under the terms of the CC0 1.0 Universal legal code: http://creativecommons.org/publicdomain/zero/1.0/legalcode */ // A class containing functions for computing diffs and formatting the output. class Diff{ // define the constants const UNMODIFIED = 0; const DELETED = 1; const INSERTED = 2; /* Returns the diff for two strings. The return value is an array, each of * whose values is an array containing two values: a line (or character, if * $compareCharacters is true), and one of the constants DIFF::UNMODIFIED (the * line or character is in both strings), DIFF::DELETED (the line or character * is only in the first string), and DIFF::INSERTED (the line or character is * only in the second string). The parameters are: * * $string1 - the first string * $string2 - the second string * $compareCharacters - true to compare characters, and false to compare * lines; this optional parameter defaults to false */ public static function compare( $string1, $string2, $compareCharacters = false){ // initialise the sequences and comparison start and end positions $start = 0; if ($compareCharacters){ $sequence1 = $string1; $sequence2 = $string2; $end1 = strlen($string1) - 1; $end2 = strlen($string2) - 1; }else{ $sequence1 = preg_split('/\R/', $string1); $sequence2 = preg_split('/\R/', $string2); $end1 = count($sequence1) - 1; $end2 = count($sequence2) - 1; } // skip any common prefix while ($start <= $end1 && $start <= $end2 && $sequence1[$start] == $sequence2[$start]){ $start ++; } // skip any common suffix while ($end1 >= $start && $end2 >= $start && $sequence1[$end1] == $sequence2[$end2]){ $end1 --; $end2 --; } // compute the table of longest common subsequence lengths $table = self::computeTable($sequence1, $sequence2, $start, $end1, $end2); // generate the partial diff $partialDiff = self::generatePartialDiff($table, $sequence1, $sequence2, $start); // generate the full diff $diff = array(); for ($index = 0; $index < $start; $index ++){ $diff[] = array($sequence1[$index], self::UNMODIFIED); } while (count($partialDiff) > 0) $diff[] = array_pop($partialDiff); for ($index = $end1 + 1; $index < ($compareCharacters ? strlen($sequence1) : count($sequence1)); $index ++){ $diff[] = array($sequence1[$index], self::UNMODIFIED); } // return the diff return $diff; } /* Returns the diff for two files. The parameters are: * * $file1 - the path to the first file * $file2 - the path to the second file * $compareCharacters - true to compare characters, and false to compare * lines; this optional parameter defaults to false */ public static function compareFiles( $file1, $file2, $compareCharacters = false){ // return the diff of the files return self::compare( file_get_contents($file1), file_get_contents($file2), $compareCharacters); } /* Returns the table of longest common subsequence lengths for the specified * sequences. The parameters are: * * $sequence1 - the first sequence * $sequence2 - the second sequence * $start - the starting index * $end1 - the ending index for the first sequence * $end2 - the ending index for the second sequence */ private static function computeTable( $sequence1, $sequence2, $start, $end1, $end2){ // determine the lengths to be compared $length1 = $end1 - $start + 1; $length2 = $end2 - $start + 1; // initialise the table $table = array(array_fill(0, $length2 + 1, 0)); // loop over the rows for ($index1 = 1; $index1 <= $length1; $index1 ++){ // create the new row $table[$index1] = array(0); // loop over the columns for ($index2 = 1; $index2 <= $length2; $index2 ++){ // store the longest common subsequence length if ($sequence1[$index1 + $start - 1] == $sequence2[$index2 + $start - 1]){ $table[$index1][$index2] = $table[$index1 - 1][$index2 - 1] + 1; }else{ $table[$index1][$index2] = max($table[$index1 - 1][$index2], $table[$index1][$index2 - 1]); } } } // return the table return $table; } /* Returns the partial diff for the specificed sequences, in reverse order. * The parameters are: * * $table - the table returned by the computeTable function * $sequence1 - the first sequence * $sequence2 - the second sequence * $start - the starting index */ private static function generatePartialDiff( $table, $sequence1, $sequence2, $start){ // initialise the diff $diff = array(); // initialise the indices $index1 = count($table) - 1; $index2 = count($table[0]) - 1; // loop until there are no items remaining in either sequence while ($index1 > 0 || $index2 > 0){ // check what has happened to the items at these indices if ($index1 > 0 && $index2 > 0 && $sequence1[$index1 + $start - 1] == $sequence2[$index2 + $start - 1]){ // update the diff and the indices $diff[] = array($sequence1[$index1 + $start - 1], self::UNMODIFIED); $index1 --; $index2 --; }elseif ($index2 > 0 && $table[$index1][$index2] == $table[$index1][$index2 - 1]){ // update the diff and the indices $diff[] = array($sequence2[$index2 + $start - 1], self::INSERTED); $index2 --; }else{ // update the diff and the indices $diff[] = array($sequence1[$index1 + $start - 1], self::DELETED); $index1 --; } } // return the diff return $diff; } /* Returns a diff as a string, where unmodified lines are prefixed by ' ', * deletions are prefixed by '- ', and insertions are prefixed by '+ '. The * parameters are: * * $diff - the diff array * $separator - the separator between lines; this optional parameter defaults * to "\n" */ public static function toString($diff, $separator = "\n"){ // initialise the string $string = ''; // loop over the lines in the diff foreach ($diff as $line){ // extend the string with the line switch ($line[1]){ case self::UNMODIFIED : $string .= ' ' . $line[0];break; case self::DELETED : $string .= '- ' . $line[0];break; case self::INSERTED : $string .= '+ ' . $line[0];break; } // extend the string with the separator $string .= $separator; } // return the string return $string; } /* Returns a diff as an HTML string, where unmodified lines are contained * within 'span' elements, deletions are contained within 'del' elements, and * insertions are contained within 'ins' elements. The parameters are: * * $diff - the diff array * $separator - the separator between lines; this optional parameter defaults * to '<br>' */ public static function toHTML($diff, $separator = '<br>'){ // initialise the HTML $html = ''; // loop over the lines in the diff foreach ($diff as $line){ // extend the HTML with the line switch ($line[1]){ case self::UNMODIFIED : $element = 'span'; break; case self::DELETED : $element = 'del'; break; case self::INSERTED : $element = 'ins'; break; } $html .= '<' . $element . '>' . htmlspecialchars($line[0]) . '</' . $element . '>'; // extend the HTML with the separator $html .= $separator; } // return the HTML return $html; } /* Returns a diff as an HTML table. The parameters are: * * $diff - the diff array * $indentation - indentation to add to every line of the generated HTML; this * optional parameter defaults to '' * $separator - the separator between lines; this optional parameter * defaults to '<br>' */ public static function toTable($diff, $indentation = '', $separator = '<br>'){ // initialise the HTML $html = $indentation . "<table class=\"diff\">\n"; // loop over the lines in the diff $index = 0; while ($index < count($diff)){ // determine the line type switch ($diff[$index][1]){ // display the content on the left and right case self::UNMODIFIED: $leftCell = self::getCellContent( $diff, $indentation, $separator, $index, self::UNMODIFIED); $rightCell = $leftCell; break; // display the deleted on the left and inserted content on the right case self::DELETED: $leftCell = self::getCellContent( $diff, $indentation, $separator, $index, self::DELETED); $rightCell = self::getCellContent( $diff, $indentation, $separator, $index, self::INSERTED); break; // display the inserted content on the right case self::INSERTED: $leftCell = ''; $rightCell = self::getCellContent( $diff, $indentation, $separator, $index, self::INSERTED); break; } // extend the HTML with the new row $html .= $indentation . " <tr>\n" . $indentation . ' <td class="diff' . ($leftCell == $rightCell ? 'Unmodified' : ($leftCell == '' ? 'Blank' : 'Deleted')) . '">' . $leftCell . "</td>\n" . $indentation . ' <td class="diff' . ($leftCell == $rightCell ? 'Unmodified' : ($rightCell == '' ? 'Blank' : 'Inserted')) . '">' . $rightCell . "</td>\n" . $indentation . " </tr>\n"; } // return the HTML return $html . $indentation . "</table>\n"; } /* Returns the content of the cell, for use in the toTable function. The * parameters are: * * $diff - the diff array * $indentation - indentation to add to every line of the generated HTML * $separator - the separator between lines * $index - the current index, passes by reference * $type - the type of line */ private static function getCellContent( $diff, $indentation, $separator, &$index, $type){ // initialise the HTML $html = ''; // loop over the matching lines, adding them to the HTML while ($index < count($diff) && $diff[$index][1] == $type){ $html .= '<span>' . htmlspecialchars($diff[$index][0]) . '</span>' . $separator; $index ++; } // return the HTML return $html; } } ?>