233 lines
7.1 KiB
PHP
233 lines
7.1 KiB
PHP
<?php
|
|
|
|
namespace App\Service\SnipContent;
|
|
|
|
class MyersDiff
|
|
{
|
|
private const string NEWLINE = "\r\n";
|
|
|
|
/**
|
|
* Backtrack through the intermediate results to extract the "snakes" that
|
|
* are visited on the chosen "D-path".
|
|
*
|
|
* @param string[] $v_save Intermediate results
|
|
* @param int $x End position
|
|
* @param int $y End position
|
|
*
|
|
* @return int[][]
|
|
*/
|
|
private static function extractSnakes(array $v_save, int $x, int $y): array
|
|
{
|
|
$snakes = [];
|
|
for ($d = count($v_save) - 1; $x >= 0 && $y >= 0; $d--) {
|
|
array_unshift($snakes, [$x, $y]);
|
|
|
|
$v = $v_save[$d];
|
|
$k = $x - $y;
|
|
|
|
if ($k === -$d || $k !== $d && $v[$k - 1] < $v[$k + 1]) {
|
|
$k_prev = $k + 1;
|
|
} else {
|
|
$k_prev = $k - 1;
|
|
}
|
|
|
|
$x = $v[$k_prev];
|
|
$y = $x - $k_prev;
|
|
}
|
|
|
|
return $snakes;
|
|
}
|
|
|
|
private static function formatCompact(array $snakes, array $b): array
|
|
{
|
|
$solution = [];
|
|
$x = 0;
|
|
$y = 0;
|
|
|
|
foreach ($snakes as $snake) {
|
|
// Deletions
|
|
while ($snake[0] - $snake[1] > $x - $y) {
|
|
$count = 0;
|
|
while ($snake[0] - $snake[1] > $x - $y) {
|
|
$x++;
|
|
$count++;
|
|
}
|
|
$solution[] = [DiffTypeEnum::DELETE->value, $count];
|
|
}
|
|
|
|
// Insertions
|
|
while ($snake[0] - $snake[1] < $x - $y) {
|
|
$values = [];
|
|
while ($snake[0] - $snake[1] < $x - $y) {
|
|
$values[] = $b[$y];
|
|
$y++;
|
|
}
|
|
$solutionKey = count($solution) - 1;
|
|
if ($solutionKey >= 0 && DiffTypeEnum::INSERT->is($solution[$solutionKey][0])) {
|
|
$solution[$solutionKey][1] = array_merge($solution[$solutionKey][1], $values);
|
|
} else {
|
|
$solution[] = [DiffTypeEnum::INSERT->value, $values];
|
|
}
|
|
}
|
|
|
|
// Keeps (snake diagonals)
|
|
$count = 0;
|
|
while ($x < $snake[0]) {
|
|
$x++;
|
|
$y++;
|
|
$count++;
|
|
}
|
|
if ($count > 0) {
|
|
$solution[] = [DiffTypeEnum::KEEP->value, $count];
|
|
}
|
|
}
|
|
|
|
return $solution;
|
|
}
|
|
|
|
/**
|
|
* Calculate the shortest edit sequence to convert $x into $y.
|
|
*
|
|
* @param string|array $textFrom - tokens (characters, words or lines)
|
|
* @param string|array $textTo - tokens (characters, words or lines)
|
|
* @param ?callable $compare - comparison function for tokens. Signature is compare($x, $y):bool. If null, === is used.
|
|
*
|
|
* @return array[] - pairs of token and edit (-1 for delete, 0 for keep, +1 for insert)
|
|
*/
|
|
public static function calculate(string|array $textFrom, string|array $textTo, ?callable $compare = null): array
|
|
{
|
|
if (is_string($textFrom)) {
|
|
$a = self::explode($textFrom);
|
|
} else {
|
|
$a = $textFrom;
|
|
}
|
|
if (is_string($textTo)) {
|
|
$b = self::explode($textTo);
|
|
} else {
|
|
$b = $textTo;
|
|
}
|
|
|
|
if ($compare === null) {
|
|
$compare = function ($x, $y) {
|
|
return $x === $y;
|
|
};
|
|
}
|
|
|
|
$n = count($a);
|
|
$m = count($b);
|
|
$a = array_values($a);
|
|
$b = array_values($b);
|
|
$max = $m + $n;
|
|
|
|
$v_save = [];
|
|
|
|
$v = [1 => 0];
|
|
for ($d = 0; $d <= $max; $d++) {
|
|
for ($k = -$d; $k <= $d; $k += 2) {
|
|
if ($k === -$d || $k !== $d && $v[$k - 1] < $v[$k + 1]) {
|
|
$x = $v[$k + 1];
|
|
} else {
|
|
$x = $v[$k - 1] + 1;
|
|
}
|
|
$y = $x - $k;
|
|
while ($x < $n && $y < $m && $compare($a[$x], $b[$y])) {
|
|
$x++;
|
|
$y++;
|
|
}
|
|
$v[$k] = $x;
|
|
$v_save[$d] = $v;
|
|
if ($x === $n && $y === $m) {
|
|
break 2;
|
|
}
|
|
}
|
|
}
|
|
|
|
return self::formatCompact(self::extractSnakes($v_save, $n, $m), $b);
|
|
}
|
|
|
|
public static function rebuildBFromCompact(string $textFrom, array $diff): string
|
|
{
|
|
$a = self::explode($textFrom);
|
|
$b = [];
|
|
$x = 0;
|
|
|
|
foreach ($diff as [$op, $data]) {
|
|
switch ($op) {
|
|
case DiffTypeEnum::KEEP->value:
|
|
for ($i = 0; $i < $data; $i++) {
|
|
$b[] = $a[$x++];
|
|
}
|
|
break;
|
|
case DiffTypeEnum::DELETE->value:
|
|
$x += $data; // skip deleted
|
|
break;
|
|
case DiffTypeEnum::INSERT->value:
|
|
foreach ($data as $v) {
|
|
$b[] = $v;
|
|
}
|
|
break;
|
|
default:
|
|
throw new \InvalidArgumentException('Invalid diff operation');
|
|
}
|
|
}
|
|
|
|
return self::implode($b);
|
|
}
|
|
|
|
public static function buildDiffLines(string $textFrom, string $textTo): array
|
|
{
|
|
$a = self::explode($textFrom);
|
|
$b = self::explode($textTo);
|
|
$diff = MyersDiff::calculate($a, $b);
|
|
|
|
$lines = [];
|
|
$x = 0;
|
|
foreach ($diff as [$op, $data]) {
|
|
switch ($op) {
|
|
case DiffTypeEnum::KEEP->value:
|
|
for ($i = 0; $i < $data; $i++) {
|
|
$lines[] = [
|
|
'type' => 'keep',
|
|
'from' => $a[$x],
|
|
'to' => $a[$x],
|
|
];
|
|
$x++;
|
|
}
|
|
break;
|
|
case DiffTypeEnum::DELETE->value:
|
|
for ($i = 0; $i < $data; $i++) {
|
|
$lines[] = [
|
|
'type' => 'delete',
|
|
'from' => $a[$x],
|
|
'to' => '',
|
|
];
|
|
$x++;
|
|
}
|
|
break;
|
|
case DiffTypeEnum::INSERT->value:
|
|
foreach ($data as $v) {
|
|
$lines[] = [
|
|
'type' => 'insert',
|
|
'from' => '',
|
|
'to' => $v,
|
|
];
|
|
}
|
|
break;
|
|
default:
|
|
throw new \InvalidArgumentException('Invalid diff operation');
|
|
}
|
|
}
|
|
|
|
return $lines;
|
|
}
|
|
|
|
private static function explode(string $text): array
|
|
{
|
|
return explode(self::NEWLINE, $text);
|
|
}
|
|
|
|
private static function implode(array $text): string
|
|
{
|
|
return implode(self::NEWLINE, $text);
|
|
}
|
|
} |