aboutsummaryrefslogtreecommitdiffstats
path: root/library/class.Diff.php
blob: 689abe9e71dc7f2647f9f5a78e22ef8479f5132c (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
<?php

/*

class.Diff.php

A class containing a diff implementation

Created by Stephen Morley - http://stephenmorley.org/ - and released under the
terms of the CC0 1.0 Universal legal code:

http://creativecommons.org/publicdomain/zero/1.0/legalcode

*/

// A class containing functions for computing diffs and formatting the output.
class Diff{

  // define the constants
  const UNMODIFIED = 0;
  const DELETED    = 1;
  const INSERTED   = 2;

  /* Returns the diff for two strings. The return value is an array, each of
   * whose values is an array containing two values: a line (or character, if
   * $compareCharacters is true), and one of the constants DIFF::UNMODIFIED (the
   * line or character is in both strings), DIFF::DELETED (the line or character
   * is only in the first string), and DIFF::INSERTED (the line or character is
   * only in the second string). The parameters are:
   *
   * $string1           - the first string
   * $string2           - the second string
   * $compareCharacters - true to compare characters, and false to compare
   *                      lines; this optional parameter defaults to false
   */
  public static function compare(
      $string1, $string2, $compareCharacters = false){

    // initialise the sequences and comparison start and end positions
    $start = 0;
    if ($compareCharacters){
      $sequence1 = $string1;
      $sequence2 = $string2;
      $end1 = strlen($string1) - 1;
      $end2 = strlen($string2) - 1;
    }else{
      $sequence1 = preg_split('/\R/', $string1);
      $sequence2 = preg_split('/\R/', $string2);
      $end1 = count($sequence1) - 1;
      $end2 = count($sequence2) - 1;
    }

    // skip any common prefix
    while ($start <= $end1 && $start <= $end2
        && $sequence1[$start] == $sequence2[$start]){
      $start ++;
    }

    // skip any common suffix
    while ($end1 >= $start && $end2 >= $start
        && $sequence1[$end1] == $sequence2[$end2]){
      $end1 --;
      $end2 --;
    }

    // compute the table of longest common subsequence lengths
    $table = self::computeTable($sequence1, $sequence2, $start, $end1, $end2);

    // generate the partial diff
    $partialDiff =
        self::generatePartialDiff($table, $sequence1, $sequence2, $start);

    // generate the full diff
    $diff = array();
    for ($index = 0; $index < $start; $index ++){
      $diff[] = array($sequence1[$index], self::UNMODIFIED);
    }
    while (count($partialDiff) > 0) $diff[] = array_pop($partialDiff);
    for ($index = $end1 + 1;
        $index < ($compareCharacters ? strlen($sequence1) : count($sequence1));
        $index ++){
      $diff[] = array($sequence1[$index], self::UNMODIFIED);
    }

    // return the diff
    return $diff;

  }

  /* Returns the diff for two files. The parameters are:
   *
   * $file1             - the path to the first file
   * $file2             - the path to the second file
   * $compareCharacters - true to compare characters, and false to compare
   *                      lines; this optional parameter defaults to false
   */
  public static function compareFiles(
      $file1, $file2, $compareCharacters = false){

    // return the diff of the files
    return self::compare(
        file_get_contents($file1),
        file_get_contents($file2),
        $compareCharacters);

  }

  /* Returns the table of longest common subsequence lengths for the specified
   * sequences. The parameters are:
   *
   * $sequence1 - the first sequence
   * $sequence2 - the second sequence
   * $start     - the starting index
   * $end1      - the ending index for the first sequence
   * $end2      - the ending index for the second sequence
   */
  private static function computeTable(
      $sequence1, $sequence2, $start, $end1, $end2){

    // determine the lengths to be compared
    $length1 = $end1 - $start + 1;
    $length2 = $end2 - $start + 1;

    // initialise the table
    $table = array(array_fill(0, $length2 + 1, 0));

    // loop over the rows
    for ($index1 = 1; $index1 <= $length1; $index1 ++){

      // create the new row
      $table[$index1] = array(0);

      // loop over the columns
      for ($index2 = 1; $index2 <= $length2; $index2 ++){

        // store the longest common subsequence length
        if ($sequence1[$index1 + $start - 1]
            == $sequence2[$index2 + $start - 1]){
          $table[$index1][$index2] = $table[$index1 - 1][$index2 - 1] + 1;
        }else{
          $table[$index1][$index2] =
              max($table[$index1 - 1][$index2], $table[$index1][$index2 - 1]);
        }

      }
    }

    // return the table
    return $table;

  }

  /* Returns the partial diff for the specificed sequences, in reverse order.
   * The parameters are:
   *
   * $table     - the table returned by the computeTable function
   * $sequence1 - the first sequence
   * $sequence2 - the second sequence
   * $start     - the starting index
   */
  private static function generatePartialDiff(
      $table, $sequence1, $sequence2, $start){

    //  initialise the diff
    $diff = array();

    // initialise the indices
    $index1 = count($table) - 1;
    $index2 = count($table[0]) - 1;

    // loop until there are no items remaining in either sequence
    while ($index1 > 0 || $index2 > 0){

      // check what has happened to the items at these indices
      if ($index1 > 0 && $index2 > 0
          && $sequence1[$index1 + $start - 1]
              == $sequence2[$index2 + $start - 1]){

        // update the diff and the indices
        $diff[] = array($sequence1[$index1 + $start - 1], self::UNMODIFIED);
        $index1 --;
        $index2 --;

      }elseif ($index2 > 0
          && $table[$index1][$index2] == $table[$index1][$index2 - 1]){

        // update the diff and the indices
        $diff[] = array($sequence2[$index2 + $start - 1], self::INSERTED);
        $index2 --;

      }else{

        // update the diff and the indices
        $diff[] = array($sequence1[$index1 + $start - 1], self::DELETED);
        $index1 --;

      }

    }

    // return the diff
    return $diff;

  }

  /* Returns a diff as a string, where unmodified lines are prefixed by '  ',
   * deletions are prefixed by '- ', and insertions are prefixed by '+ '. The
   * parameters are:
   *
   * $diff      - the diff array
   * $separator - the separator between lines; this optional parameter defaults
   *              to "\n"
   */
  public static function toString($diff, $separator = "\n"){

    // initialise the string
    $string = '';

    // loop over the lines in the diff
    foreach ($diff as $line){

      // extend the string with the line
      switch ($line[1]){
        case self::UNMODIFIED : $string .= '  ' . $line[0];break;
        case self::DELETED    : $string .= '- ' . $line[0];break;
        case self::INSERTED   : $string .= '+ ' . $line[0];break;
      }

      // extend the string with the separator
      $string .= $separator;

    }

    // return the string
    return $string;

  }

  /* Returns a diff as an HTML string, where unmodified lines are contained
   * within 'span' elements, deletions are contained within 'del' elements, and
   * insertions are contained within 'ins' elements. The parameters are:
   *
   * $diff      - the diff array
   * $separator - the separator between lines; this optional parameter defaults
   *              to '<br>'
   */
  public static function toHTML($diff, $separator = '<br>'){

    // initialise the HTML
    $html = '';

    // loop over the lines in the diff
    foreach ($diff as $line){

      // extend the HTML with the line
      switch ($line[1]){
        case self::UNMODIFIED : $element = 'span'; break;
        case self::DELETED    : $element = 'del';  break;
        case self::INSERTED   : $element = 'ins';  break;
      }
      $html .=
          '<' . $element . '>'
          . htmlspecialchars($line[0])
          . '</' . $element . '>';

      // extend the HTML with the separator
      $html .= $separator;

    }

    // return the HTML
    return $html;

  }

  /* Returns a diff as an HTML table. The parameters are:
   *
   * $diff        - the diff array
   * $indentation - indentation to add to every line of the generated HTML; this
   *                optional parameter defaults to ''
   * $separator   - the separator between lines; this optional parameter
   *                defaults to '<br>'
   */
  public static function toTable($diff, $indentation = '', $separator = '<br>'){

    // initialise the HTML
    $html = $indentation . "<table class=\"diff\">\n";

    // loop over the lines in the diff
    $index = 0;
    while ($index < count($diff)){

      // determine the line type
      switch ($diff[$index][1]){

        // display the content on the left and right
        case self::UNMODIFIED:
          $leftCell =
              self::getCellContent(
                  $diff, $indentation, $separator, $index, self::UNMODIFIED);
          $rightCell = $leftCell;
          break;

        // display the deleted on the left and inserted content on the right
        case self::DELETED:
          $leftCell =
              self::getCellContent(
                  $diff, $indentation, $separator, $index, self::DELETED);
          $rightCell =
              self::getCellContent(
                  $diff, $indentation, $separator, $index, self::INSERTED);
          break;

        // display the inserted content on the right
        case self::INSERTED:
          $leftCell = '';
          $rightCell =
              self::getCellContent(
                  $diff, $indentation, $separator, $index, self::INSERTED);
          break;

      }

      // extend the HTML with the new row
      $html .=
          $indentation
          . "  <tr>\n"
          . $indentation
          . '    <td class="diff'
          . ($leftCell == $rightCell
              ? 'Unmodified'
              : ($leftCell == '' ? 'Blank' : 'Deleted'))
          . '">'
          . $leftCell
          . "</td>\n"
          . $indentation
          . '    <td class="diff'
          . ($leftCell == $rightCell
              ? 'Unmodified'
              : ($rightCell == '' ? 'Blank' : 'Inserted'))
          . '">'
          . $rightCell
          . "</td>\n"
          . $indentation
          . "  </tr>\n";

    }

    // return the HTML
    return $html . $indentation . "</table>\n";

  }

  /* Returns the content of the cell, for use in the toTable function. The
   * parameters are:
   *
   * $diff        - the diff array
   * $indentation - indentation to add to every line of the generated HTML
   * $separator   - the separator between lines
   * $index       - the current index, passes by reference
   * $type        - the type of line
   */
  private static function getCellContent(
      $diff, $indentation, $separator, &$index, $type){

    // initialise the HTML
    $html = '';

    // loop over the matching lines, adding them to the HTML
    while ($index < count($diff) && $diff[$index][1] == $type){
      $html .=
          '<span>'
          . htmlspecialchars($diff[$index][0])
          . '</span>'
          . $separator;
      $index ++;
    }

    // return the HTML
    return $html;

  }

}

?>