Snips/src/Service/SnipContent/MyersDiff.php

233 lines
7.1 KiB
PHP

<?php
namespace App\Service\SnipContent;
class MyersDiff
{
private const string NEWLINE = "\r\n";
/**
* Backtrack through the intermediate results to extract the "snakes" that
* are visited on the chosen "D-path".
*
* @param string[] $v_save Intermediate results
* @param int $x End position
* @param int $y End position
*
* @return int[][]
*/
private static function extractSnakes(array $v_save, int $x, int $y): array
{
$snakes = [];
for ($d = count($v_save) - 1; $x >= 0 && $y >= 0; $d--) {
array_unshift($snakes, [$x, $y]);
$v = $v_save[$d];
$k = $x - $y;
if ($k === -$d || $k !== $d && $v[$k - 1] < $v[$k + 1]) {
$k_prev = $k + 1;
} else {
$k_prev = $k - 1;
}
$x = $v[$k_prev];
$y = $x - $k_prev;
}
return $snakes;
}
private static function formatCompact(array $snakes, array $b): array
{
$solution = [];
$x = 0;
$y = 0;
foreach ($snakes as $snake) {
// Deletions
while ($snake[0] - $snake[1] > $x - $y) {
$count = 0;
while ($snake[0] - $snake[1] > $x - $y) {
$x++;
$count++;
}
$solution[] = [DiffTypeEnum::DELETE->value, $count];
}
// Insertions
while ($snake[0] - $snake[1] < $x - $y) {
$values = [];
while ($snake[0] - $snake[1] < $x - $y) {
$values[] = $b[$y];
$y++;
}
$solutionKey = count($solution) - 1;
if ($solutionKey >= 0 && DiffTypeEnum::INSERT->is($solution[$solutionKey][0])) {
$solution[$solutionKey][1] = array_merge($solution[$solutionKey][1], $values);
} else {
$solution[] = [DiffTypeEnum::INSERT->value, $values];
}
}
// Keeps (snake diagonals)
$count = 0;
while ($x < $snake[0]) {
$x++;
$y++;
$count++;
}
if ($count > 0) {
$solution[] = [DiffTypeEnum::KEEP->value, $count];
}
}
return $solution;
}
/**
* Calculate the shortest edit sequence to convert $x into $y.
*
* @param string|array $textFrom - tokens (characters, words or lines)
* @param string|array $textTo - tokens (characters, words or lines)
* @param ?callable $compare - comparison function for tokens. Signature is compare($x, $y):bool. If null, === is used.
*
* @return array[] - pairs of token and edit (-1 for delete, 0 for keep, +1 for insert)
*/
public static function calculate(string|array $textFrom, string|array $textTo, ?callable $compare = null): array
{
if (is_string($textFrom)) {
$a = self::explode($textFrom);
} else {
$a = $textFrom;
}
if (is_string($textTo)) {
$b = self::explode($textTo);
} else {
$b = $textTo;
}
if ($compare === null) {
$compare = function ($x, $y) {
return $x === $y;
};
}
$n = count($a);
$m = count($b);
$a = array_values($a);
$b = array_values($b);
$max = $m + $n;
$v_save = [];
$v = [1 => 0];
for ($d = 0; $d <= $max; $d++) {
for ($k = -$d; $k <= $d; $k += 2) {
if ($k === -$d || $k !== $d && $v[$k - 1] < $v[$k + 1]) {
$x = $v[$k + 1];
} else {
$x = $v[$k - 1] + 1;
}
$y = $x - $k;
while ($x < $n && $y < $m && $compare($a[$x], $b[$y])) {
$x++;
$y++;
}
$v[$k] = $x;
$v_save[$d] = $v;
if ($x === $n && $y === $m) {
break 2;
}
}
}
return self::formatCompact(self::extractSnakes($v_save, $n, $m), $b);
}
public static function rebuildBFromCompact(string $textFrom, array $diff): string
{
$a = self::explode($textFrom);
$b = [];
$x = 0;
foreach ($diff as [$op, $data]) {
switch ($op) {
case DiffTypeEnum::KEEP->value:
for ($i = 0; $i < $data; $i++) {
$b[] = $a[$x++];
}
break;
case DiffTypeEnum::DELETE->value:
$x += $data; // skip deleted
break;
case DiffTypeEnum::INSERT->value:
foreach ($data as $v) {
$b[] = $v;
}
break;
default:
throw new \InvalidArgumentException('Invalid diff operation');
}
}
return self::implode($b);
}
public static function buildDiffLines(string $textFrom, string $textTo): array
{
$a = self::explode($textFrom);
$b = self::explode($textTo);
$diff = MyersDiff::calculate($a, $b);
$lines = [];
$x = 0;
foreach ($diff as [$op, $data]) {
switch ($op) {
case DiffTypeEnum::KEEP->value:
for ($i = 0; $i < $data; $i++) {
$lines[] = [
'type' => 'keep',
'from' => $a[$x],
'to' => $a[$x],
];
$x++;
}
break;
case DiffTypeEnum::DELETE->value:
for ($i = 0; $i < $data; $i++) {
$lines[] = [
'type' => 'delete',
'from' => $a[$x],
'to' => '',
];
$x++;
}
break;
case DiffTypeEnum::INSERT->value:
foreach ($data as $v) {
$lines[] = [
'type' => 'insert',
'from' => '',
'to' => $v,
];
}
break;
default:
throw new \InvalidArgumentException('Invalid diff operation');
}
}
return $lines;
}
private static function explode(string $text): array
{
return explode(self::NEWLINE, $text);
}
private static function implode(array $text): string
{
return implode(self::NEWLINE, $text);
}
}