Create a customized myers diff based system for snip content
This commit is contained in:
171
src/Service/SnipContent/MyersDiff.php
Normal file
171
src/Service/SnipContent/MyersDiff.php
Normal file
@ -0,0 +1,171 @@
|
||||
<?php
|
||||
|
||||
namespace App\Service\SnipContent;
|
||||
|
||||
class MyersDiff
|
||||
{
|
||||
private const string NEWLINE = "\r\n";
|
||||
|
||||
/**
|
||||
* Backtrack through the intermediate results to extract the "snakes" that
|
||||
* are visited on the chosen "D-path".
|
||||
*
|
||||
* @param string[] $v_save Intermediate results
|
||||
* @param int $x End position
|
||||
* @param int $y End position
|
||||
*
|
||||
* @return int[][]
|
||||
*/
|
||||
private static function extractSnakes(array $v_save, int $x, int $y): array
|
||||
{
|
||||
$snakes = [];
|
||||
for ($d = count($v_save) - 1; $x >= 0 && $y >= 0; $d--) {
|
||||
array_unshift($snakes, [$x, $y]);
|
||||
|
||||
$v = $v_save[$d];
|
||||
$k = $x - $y;
|
||||
|
||||
if ($k === -$d || $k !== $d && $v[$k - 1] < $v[$k + 1]) {
|
||||
$k_prev = $k + 1;
|
||||
} else {
|
||||
$k_prev = $k - 1;
|
||||
}
|
||||
|
||||
$x = $v[$k_prev];
|
||||
$y = $x - $k_prev;
|
||||
}
|
||||
|
||||
return $snakes;
|
||||
}
|
||||
|
||||
private static function formatCompact(array $snakes, array $b): array
|
||||
{
|
||||
$solution = [];
|
||||
$x = 0;
|
||||
$y = 0;
|
||||
|
||||
foreach ($snakes as $snake) {
|
||||
// Deletions
|
||||
while ($snake[0] - $snake[1] > $x - $y) {
|
||||
$count = 0;
|
||||
while ($snake[0] - $snake[1] > $x - $y) {
|
||||
$x++;
|
||||
$count++;
|
||||
}
|
||||
$solution[] = ['D', $count];
|
||||
}
|
||||
|
||||
// Insertions
|
||||
while ($snake[0] - $snake[1] < $x - $y) {
|
||||
$values = [];
|
||||
while ($snake[0] - $snake[1] < $x - $y) {
|
||||
$values[] = $b[$y];
|
||||
$y++;
|
||||
}
|
||||
if ($solution[count($solution) - 1][0] === 'I') {
|
||||
$solution[count($solution) - 1][1] = array_merge($solution[count($solution) - 1][1], $values);
|
||||
} else {
|
||||
$solution[] = ['I', $values];
|
||||
}
|
||||
}
|
||||
|
||||
// Keeps (snake diagonals)
|
||||
$count = 0;
|
||||
while ($x < $snake[0]) {
|
||||
$x++;
|
||||
$y++;
|
||||
$count++;
|
||||
}
|
||||
if ($count > 0) {
|
||||
$solution[] = ['K', $count];
|
||||
}
|
||||
}
|
||||
|
||||
return $solution;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate the shortest edit sequence to convert $x into $y.
|
||||
*
|
||||
* @param string $textFrom - tokens (characters, words or lines)
|
||||
* @param string $textTo - tokens (characters, words or lines)
|
||||
* @param ?callable $compare - comparison function for tokens. Signature is compare($x, $y):bool. If null, === is used.
|
||||
*
|
||||
* @return array[] - pairs of token and edit (-1 for delete, 0 for keep, +1 for insert)
|
||||
*/
|
||||
public static function calculate(string $textFrom, string $textTo, ?callable $compare = null): array
|
||||
{
|
||||
$a = self::explode($textFrom);
|
||||
$b = self::explode($textTo);
|
||||
|
||||
if ($compare === null) {
|
||||
$compare = function ($x, $y) {
|
||||
return $x === $y;
|
||||
};
|
||||
}
|
||||
|
||||
$n = count($a);
|
||||
$m = count($b);
|
||||
$a = array_values($a);
|
||||
$b = array_values($b);
|
||||
$max = $m + $n;
|
||||
|
||||
$v_save = [];
|
||||
|
||||
$v = [1 => 0];
|
||||
for ($d = 0; $d <= $max; $d++) {
|
||||
for ($k = -$d; $k <= $d; $k += 2) {
|
||||
if ($k === -$d || $k !== $d && $v[$k - 1] < $v[$k + 1]) {
|
||||
$x = $v[$k + 1];
|
||||
} else {
|
||||
$x = $v[$k - 1] + 1;
|
||||
}
|
||||
$y = $x - $k;
|
||||
while ($x < $n && $y < $m && $compare($a[$x], $b[$y])) {
|
||||
$x++;
|
||||
$y++;
|
||||
}
|
||||
$v[$k] = $x;
|
||||
$v_save[$d] = $v;
|
||||
if ($x === $n && $y === $m) {
|
||||
break 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return self::formatCompact(self::extractSnakes($v_save, $n, $m), $b);
|
||||
}
|
||||
|
||||
public static function rebuildBFromCompact(string $textFrom, array $diff): string
|
||||
{
|
||||
$a = self::explode($textFrom);
|
||||
$b = [];
|
||||
$x = 0;
|
||||
|
||||
foreach ($diff as [$op, $data]) {
|
||||
if ($op === 'K') {
|
||||
for ($i = 0; $i < $data; $i++) {
|
||||
$b[] = $a[$x++];
|
||||
}
|
||||
} elseif ($op === 'D') {
|
||||
$x += $data; // skip deleted
|
||||
} elseif ($op === 'I') {
|
||||
foreach ($data as $v) {
|
||||
$b[] = $v;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return self::implode($b);
|
||||
}
|
||||
|
||||
private static function explode(string $text): array
|
||||
{
|
||||
return explode(self::NEWLINE, $text);
|
||||
}
|
||||
|
||||
private static function implode(array $text): string
|
||||
{
|
||||
return implode(self::NEWLINE, $text);
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user