Implement lexer class for tokenization and replace all line endings by PHP_EOL

This commit is contained in:
Tim 2025-05-10 14:35:11 +02:00
parent 771f354346
commit cda03f7b67
3 changed files with 60 additions and 21 deletions

View File

@ -0,0 +1,45 @@
<?php
namespace App\Command;
use App\Entity\SnipContent;
use App\Repository\SnipContentRepository;
use App\Service\SnipContent\Lexer;
use Symfony\Component\Console\Attribute\AsCommand;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\Console\Style\SymfonyStyle;
#[AsCommand(
name: 'app:snip:update-content',
description: 'Update Snip content line endings',
)]
class SnipUpdateContentCommand extends Command
{
public function __construct(
private readonly SnipContentRepository $snipContentRepository,
)
{
parent::__construct();
}
protected function execute(InputInterface $input, OutputInterface $output): int
{
$io = new SymfonyStyle($input, $output);
$qb = $this->snipContentRepository->createQueryBuilder('s');
$qb->where('s.text IS NOT NULL');
$c = 0;
/** @var SnipContent $snipContent */
foreach ($qb->getQuery()->getResult() as $snipContent) {
$text = $snipContent->getText();
$text = Lexer::reconstruct(Lexer::tokenize($text));
$snipContent->setText($text);
$this->snipContentRepository->save($snipContent);
}
return Command::SUCCESS;
}
}

View File

@ -4,5 +4,13 @@ namespace App\Service\SnipContent;
class Lexer class Lexer
{ {
public static function tokenize(string $text): array {
$text = str_replace("\r", '', $text);
return explode(PHP_EOL, $text);
}
public static function reconstruct(array $tokens): string
{
return implode(PHP_EOL, $tokens);
}
} }

View File

@ -4,8 +4,6 @@ namespace App\Service\SnipContent;
class MyersDiff class MyersDiff
{ {
private const string NEWLINE = "\r\n";
/** /**
* Backtrack through the intermediate results to extract the "snakes" that * Backtrack through the intermediate results to extract the "snakes" that
* are visited on the chosen "D-path". * are visited on the chosen "D-path".
@ -97,20 +95,18 @@ class MyersDiff
public static function calculate(string|array $textFrom, string|array $textTo, ?callable $compare = null): array public static function calculate(string|array $textFrom, string|array $textTo, ?callable $compare = null): array
{ {
if (is_string($textFrom)) { if (is_string($textFrom)) {
$a = self::explode($textFrom); $a = Lexer::tokenize($textFrom);
} else { } else {
$a = $textFrom; $a = $textFrom;
} }
if (is_string($textTo)) { if (is_string($textTo)) {
$b = self::explode($textTo); $b = Lexer::tokenize($textTo);
} else { } else {
$b = $textTo; $b = $textTo;
} }
if ($compare === null) { if ($compare === null) {
$compare = function ($x, $y) { $compare = fn($x, $y) => $x === $y;
return $x === $y;
};
} }
$n = count($a); $n = count($a);
@ -147,7 +143,7 @@ class MyersDiff
public static function rebuildBFromCompact(string $textFrom, array $diff): string public static function rebuildBFromCompact(string $textFrom, array $diff): string
{ {
$a = self::explode($textFrom); $a = Lexer::tokenize($textFrom);
$b = []; $b = [];
$x = 0; $x = 0;
@ -171,13 +167,13 @@ class MyersDiff
} }
} }
return self::implode($b); return Lexer::reconstruct($b);
} }
public static function buildDiffLines(string $textFrom, string $textTo): array public static function buildDiffLines(string $textFrom, string $textTo): array
{ {
$a = self::explode($textFrom); $a = Lexer::tokenize($textFrom);
$b = self::explode($textTo); $b = Lexer::tokenize($textTo);
$diff = MyersDiff::calculate($a, $b); $diff = MyersDiff::calculate($a, $b);
$lines = []; $lines = [];
@ -223,14 +219,4 @@ class MyersDiff
return $lines; return $lines;
} }
private static function explode(string $text): array
{
return explode(self::NEWLINE, $text);
}
private static function implode(array $text): string
{
return implode(self::NEWLINE, $text);
}
} }