991 lines
28 KiB
PHP
991 lines
28 KiB
PHP
<?php
|
|
|
|
/*
|
|
* This file is part of the Behat Gherkin Parser.
|
|
* (c) Konstantin Kudryashov <ever.zet@gmail.com>
|
|
*
|
|
* For the full copyright and license information, please view the LICENSE
|
|
* file that was distributed with this source code.
|
|
*/
|
|
|
|
namespace Behat\Gherkin;
|
|
|
|
use Behat\Gherkin\Dialect\DialectProviderInterface;
|
|
use Behat\Gherkin\Dialect\GherkinDialect;
|
|
use Behat\Gherkin\Dialect\KeywordsDialectProvider;
|
|
use Behat\Gherkin\Exception\LexerException;
|
|
use Behat\Gherkin\Exception\NoSuchLanguageException;
|
|
use Behat\Gherkin\Keywords\KeywordsInterface;
|
|
use LogicException;
|
|
|
|
use function assert;
|
|
|
|
/**
|
|
* Gherkin lexer.
|
|
*
|
|
* @author Konstantin Kudryashov <ever.zet@gmail.com>
|
|
*
|
|
* @final since 4.15.0
|
|
*
|
|
* @phpstan-type TStepKeyword 'Given'|'When'|'Then'|'And'|'But'
|
|
* @phpstan-type TTitleKeyword 'Feature'|'Background'|'Scenario'|'Outline'|'Examples'
|
|
* @phpstan-type TTokenType 'Text'|'Comment'|'EOS'|'Newline'|'PyStringOp'|'TableRow'|'Tag'|'Language'|'Step'|TTitleKeyword
|
|
* @phpstan-type TToken TStringValueToken|TNullValueToken|TTitleToken|TStepToken|TTagToken|TTableRowToken
|
|
* @phpstan-type TStringValueToken array{type: TTokenType, value: string, line: int, deferred: bool}
|
|
* @phpstan-type TNullValueToken array{type: TTokenType, value: null, line: int, deferred: bool}
|
|
* @phpstan-type TTitleToken array{type: TTitleKeyword, value: null|non-empty-string, line: int, deferred: bool, keyword: string, indent: int}
|
|
* @phpstan-type TStepToken array{type: 'Step', value: string, line: int, deferred: bool, keyword_type: string, text: string}
|
|
* @phpstan-type TTagToken array{type: 'Tag', value: null, line: int, deferred: bool, tags: list<string>}
|
|
* @phpstan-type TTableRowToken array{type: 'TableRow', value: null, line: int, deferred: bool, columns: list<string>}
|
|
* @phpstan-type TDocStringSeparator '"""'|'```'
|
|
*/
|
|
class Lexer
|
|
{
|
|
/**
|
|
* Splits a string around | char, only if it's not preceded by an odd number of \.
|
|
*
|
|
* @see https://github.com/cucumber/gherkin/blob/679a87e21263699c15ea635159c6cda60f64af3b/php/src/StringGherkinLine.php#L14
|
|
*/
|
|
private const CELL_PATTERN = '/(?<!\\\\)(?:\\\\{2})*\K\\|/u';
|
|
|
|
private readonly DialectProviderInterface $dialectProvider;
|
|
private GherkinDialect $currentDialect;
|
|
private GherkinCompatibilityMode $compatibilityMode = GherkinCompatibilityMode::LEGACY;
|
|
/**
|
|
* @var list<string>
|
|
*/
|
|
private array $lines;
|
|
private int $linesCount;
|
|
private string $line;
|
|
private ?string $trimmedLine = null;
|
|
private int $lineNumber;
|
|
private bool $eos;
|
|
/**
|
|
* A cache of keyword types associated with each keyword.
|
|
*
|
|
* @phpstan-var array<string, non-empty-list<TStepKeyword>>|null
|
|
*/
|
|
private ?array $stepKeywordTypesCache = null;
|
|
/**
|
|
* @phpstan-var list<TToken>
|
|
*/
|
|
private array $deferredObjects = [];
|
|
private int $deferredObjectsCount = 0;
|
|
/**
|
|
* @phpstan-var TToken|null
|
|
*/
|
|
private ?array $stashedToken = null;
|
|
private bool $inPyString = false;
|
|
private int $pyStringSwallow = 0;
|
|
private bool $allowLanguageTag = true;
|
|
private bool $allowFeature = true;
|
|
private bool $allowMultilineArguments = false;
|
|
private bool $allowExamples = false;
|
|
private bool $allowSteps = false;
|
|
/**
|
|
* @phpstan-var TDocStringSeparator|null
|
|
*/
|
|
private ?string $pyStringDelimiter = null;
|
|
|
|
public function __construct(
|
|
DialectProviderInterface|KeywordsInterface $dialectProvider,
|
|
) {
|
|
if ($dialectProvider instanceof KeywordsInterface) {
|
|
// TODO trigger deprecation
|
|
$dialectProvider = new KeywordsDialectProvider($dialectProvider);
|
|
}
|
|
|
|
$this->dialectProvider = $dialectProvider;
|
|
}
|
|
|
|
/**
|
|
* @internal
|
|
*/
|
|
public function setCompatibilityMode(GherkinCompatibilityMode $compatibilityMode): void
|
|
{
|
|
$this->compatibilityMode = $compatibilityMode;
|
|
}
|
|
|
|
/**
|
|
* Sets lexer input.
|
|
*
|
|
* @param string $input Input string
|
|
* @param string $language Language name
|
|
*
|
|
* @return void
|
|
*
|
|
* @throws LexerException
|
|
*/
|
|
public function analyse(string $input, string $language = 'en')
|
|
{
|
|
// try to detect unsupported encoding
|
|
if (mb_detect_encoding($input, 'UTF-8', true) !== 'UTF-8') {
|
|
throw new LexerException('Feature file is not in UTF8 encoding');
|
|
}
|
|
|
|
$input = strtr($input, ["\r\n" => "\n", "\r" => "\n"]);
|
|
|
|
$this->lines = explode("\n", $input);
|
|
$this->linesCount = count($this->lines);
|
|
$this->line = $this->lines[0];
|
|
$this->lineNumber = 1;
|
|
$this->trimmedLine = null;
|
|
$this->eos = false;
|
|
|
|
$this->deferredObjects = [];
|
|
$this->deferredObjectsCount = 0;
|
|
$this->stashedToken = null;
|
|
$this->inPyString = false;
|
|
$this->pyStringSwallow = 0;
|
|
|
|
$this->allowLanguageTag = true;
|
|
$this->allowFeature = true;
|
|
$this->allowMultilineArguments = false;
|
|
$this->allowSteps = false;
|
|
$this->allowExamples = false;
|
|
|
|
if (\func_num_args() > 1) {
|
|
// @codeCoverageIgnoreStart
|
|
\assert($language !== '');
|
|
// TODO trigger deprecation (the Parser does not use this code path)
|
|
$this->setLanguage($language);
|
|
// @codeCoverageIgnoreEnd
|
|
} else {
|
|
$this->currentDialect = $this->dialectProvider->getDefaultDialect();
|
|
$this->stepKeywordTypesCache = null;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @param non-empty-string $language
|
|
*/
|
|
private function setLanguage(string $language): void
|
|
{
|
|
if (($this->stashedToken !== null) || ($this->deferredObjects !== [])) {
|
|
// @codeCoverageIgnoreStart
|
|
// It is not possible to trigger this condition using the public interface of this class.
|
|
// It may be possible if the end-user has extended the Lexer with custom functionality.
|
|
throw new LogicException(
|
|
<<<'STRING'
|
|
Cannot set gherkin language due to unexpected Lexer state.
|
|
|
|
Please open an issue at https://github.com/Behat/Gherkin with a copy of the current
|
|
feature file. If you are using a Lexer or Parser class that extends the ones provided
|
|
in behat/gherkin, please also provide details of these.
|
|
STRING,
|
|
);
|
|
// @codeCoverageIgnoreEnd
|
|
}
|
|
|
|
try {
|
|
$this->currentDialect = $this->dialectProvider->getDialect($language);
|
|
} catch (NoSuchLanguageException $e) {
|
|
if (!$this->compatibilityMode->shouldIgnoreInvalidLanguage()) {
|
|
throw $e;
|
|
}
|
|
}
|
|
$this->stepKeywordTypesCache = null;
|
|
}
|
|
|
|
/**
|
|
* Returns current lexer language.
|
|
*
|
|
* @return string
|
|
*/
|
|
public function getLanguage()
|
|
{
|
|
return $this->currentDialect->getLanguage();
|
|
}
|
|
|
|
/**
|
|
* Returns next token or previously stashed one.
|
|
*
|
|
* @return array
|
|
*
|
|
* @phpstan-return TToken
|
|
*/
|
|
public function getAdvancedToken()
|
|
{
|
|
return $this->getStashedToken() ?? $this->getNextToken();
|
|
}
|
|
|
|
/**
|
|
* Defers token.
|
|
*
|
|
* @phpstan-param TToken $token Token to defer
|
|
*
|
|
* @return void
|
|
*/
|
|
public function deferToken(array $token)
|
|
{
|
|
$token['deferred'] = true;
|
|
$this->deferredObjects[] = $token;
|
|
++$this->deferredObjectsCount;
|
|
}
|
|
|
|
/**
|
|
* Predicts the upcoming token without passing over it.
|
|
*
|
|
* @return array
|
|
*
|
|
* @phpstan-return TToken
|
|
*/
|
|
public function predictToken()
|
|
{
|
|
return $this->stashedToken ??= $this->getNextToken();
|
|
}
|
|
|
|
/**
|
|
* Skips over the currently-predicted token, if any.
|
|
*
|
|
* @return void
|
|
*/
|
|
public function skipPredictedToken()
|
|
{
|
|
$this->stashedToken = null;
|
|
}
|
|
|
|
/**
|
|
* Constructs a token with specified parameters.
|
|
*
|
|
* @template T of TTokenType
|
|
*
|
|
* @param string|null $value Token value
|
|
*
|
|
* @phpstan-param T $type Token type
|
|
*
|
|
* @return array
|
|
*
|
|
* @phpstan-return ($value is non-empty-string ? array{type: T, value: non-empty-string, line: int, deferred: bool} : array{type: T, value: null, line: int, deferred: bool})
|
|
*/
|
|
public function takeToken(string $type, ?string $value = null)
|
|
{
|
|
return [
|
|
'type' => $type,
|
|
'line' => $this->lineNumber,
|
|
'value' => $value ?: null,
|
|
'deferred' => false,
|
|
];
|
|
}
|
|
|
|
/**
|
|
* Consumes line from input & increments line counter.
|
|
*
|
|
* @return void
|
|
*/
|
|
protected function consumeLine()
|
|
{
|
|
++$this->lineNumber;
|
|
|
|
if (($this->lineNumber - 1) === $this->linesCount) {
|
|
$this->eos = true;
|
|
|
|
return;
|
|
}
|
|
|
|
$this->line = $this->lines[$this->lineNumber - 1];
|
|
$this->trimmedLine = null;
|
|
}
|
|
|
|
/**
|
|
* Consumes first part of line from input without incrementing the line number.
|
|
*
|
|
* @return void
|
|
*/
|
|
protected function consumeLineUntil(int $trimmedOffset)
|
|
{
|
|
$this->line = mb_substr(ltrim($this->line), $trimmedOffset, null, 'utf-8');
|
|
$this->trimmedLine = null;
|
|
}
|
|
|
|
/**
|
|
* Returns trimmed version of line.
|
|
*
|
|
* @return string
|
|
*/
|
|
protected function getTrimmedLine()
|
|
{
|
|
return $this->trimmedLine ??= trim($this->line);
|
|
}
|
|
|
|
/**
|
|
* Returns stashed token or null if there isn't one.
|
|
*
|
|
* @return array|null
|
|
*
|
|
* @phpstan-return TToken|null
|
|
*/
|
|
protected function getStashedToken()
|
|
{
|
|
$stashedToken = $this->stashedToken;
|
|
$this->stashedToken = null;
|
|
|
|
return $stashedToken;
|
|
}
|
|
|
|
/**
|
|
* Returns deferred token or null if there isn't one.
|
|
*
|
|
* @return array|null
|
|
*
|
|
* @phpstan-return TToken|null
|
|
*/
|
|
protected function getDeferredToken()
|
|
{
|
|
if (!$this->deferredObjectsCount) {
|
|
return null;
|
|
}
|
|
|
|
--$this->deferredObjectsCount;
|
|
|
|
return array_shift($this->deferredObjects);
|
|
}
|
|
|
|
/**
|
|
* Returns next token from input.
|
|
*
|
|
* @return array
|
|
*
|
|
* @phpstan-return TToken
|
|
*/
|
|
protected function getNextToken()
|
|
{
|
|
return $this->getDeferredToken()
|
|
?? $this->scanEOS()
|
|
?? $this->scanLanguage()
|
|
?? $this->scanComment()
|
|
?? $this->scanPyStringOp()
|
|
?? $this->scanPyStringContent()
|
|
?? $this->scanStep()
|
|
?? $this->scanScenario()
|
|
?? $this->scanBackground()
|
|
?? $this->scanOutline()
|
|
?? $this->scanExamples()
|
|
?? $this->scanFeature()
|
|
?? $this->scanTags()
|
|
?? $this->scanTableRow()
|
|
?? $this->scanNewline()
|
|
?? $this->scanText();
|
|
}
|
|
|
|
/**
|
|
* Scans for token with specified regex.
|
|
*
|
|
* @param string $regex Regular expression
|
|
*
|
|
* @phpstan-param TTokenType $type Expected token type
|
|
*
|
|
* @return array|null
|
|
*
|
|
* @phpstan-return TStringValueToken|null
|
|
*/
|
|
protected function scanInput(string $regex, string $type)
|
|
{
|
|
if (!preg_match($regex, $this->line, $matches)) {
|
|
return null;
|
|
}
|
|
|
|
assert($matches[1] !== '');
|
|
|
|
$token = $this->takeToken($type, $matches[1]);
|
|
$this->consumeLine();
|
|
|
|
return $token;
|
|
}
|
|
|
|
/**
|
|
* Scans for token with specified keywords.
|
|
*
|
|
* @param string $keywords Keywords (separated by "|")
|
|
*
|
|
* @phpstan-param TTitleKeyword $type Expected token type
|
|
*
|
|
* @return array|null
|
|
*
|
|
* @phpstan-return TTitleToken|null
|
|
*
|
|
* @deprecated
|
|
*/
|
|
protected function scanInputForKeywords(string $keywords, string $type)
|
|
{
|
|
// @codeCoverageIgnoreStart
|
|
if (!preg_match('/^(\s*)(' . $keywords . '):\s*(.*)/u', $this->line, $matches)) {
|
|
return null;
|
|
}
|
|
|
|
$token = $this->takeToken($type, $matches[3]);
|
|
$token['keyword'] = $matches[2];
|
|
$token['indent'] = mb_strlen($matches[1], 'utf8');
|
|
|
|
$this->consumeLine();
|
|
|
|
// turn off language searching and feature detection
|
|
if ($type === 'Feature') {
|
|
$this->allowFeature = false;
|
|
$this->allowLanguageTag = false;
|
|
}
|
|
|
|
// turn off PyString and Table searching
|
|
if ($type === 'Feature' || $type === 'Scenario' || $type === 'Outline') {
|
|
$this->allowMultilineArguments = false;
|
|
} elseif ($type === 'Examples') {
|
|
$this->allowMultilineArguments = true;
|
|
}
|
|
|
|
// turn on steps searching
|
|
if ($type === 'Scenario' || $type === 'Background' || $type === 'Outline') {
|
|
$this->allowSteps = true;
|
|
}
|
|
|
|
return $token;
|
|
// @codeCoverageIgnoreEnd
|
|
}
|
|
|
|
/**
|
|
* @param list<string> $keywords
|
|
*
|
|
* @phpstan-param TTitleKeyword $type
|
|
*
|
|
* @phpstan-return TTitleToken|null
|
|
*/
|
|
private function scanTitleLine(array $keywords, string $type): ?array
|
|
{
|
|
$trimmedLine = $this->getTrimmedLine();
|
|
|
|
foreach ($keywords as $keyword) {
|
|
if (str_starts_with($trimmedLine, $keyword . ':')) {
|
|
$title = trim(mb_substr($trimmedLine, mb_strlen($keyword) + 1));
|
|
|
|
$token = $this->takeToken($type, $title);
|
|
$token['keyword'] = $keyword;
|
|
$token['indent'] = mb_strlen($this->line, 'utf8') - mb_strlen(ltrim($this->line), 'utf8');
|
|
|
|
$this->consumeLine();
|
|
|
|
return $token;
|
|
}
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* Scans EOS from input & returns it if found.
|
|
*
|
|
* @return array|null
|
|
*
|
|
* @phpstan-return TNullValueToken|null
|
|
*/
|
|
protected function scanEOS()
|
|
{
|
|
if (!$this->eos) {
|
|
return null;
|
|
}
|
|
|
|
return $this->takeToken('EOS');
|
|
}
|
|
|
|
/**
|
|
* Returns a regex matching the keywords for the provided type.
|
|
*
|
|
* @phpstan-param 'Step'|TTitleKeyword|TStepKeyword $type Keyword type
|
|
*
|
|
* @return string
|
|
*
|
|
* @deprecated
|
|
*/
|
|
protected function getKeywords(string $type)
|
|
{
|
|
// @codeCoverageIgnoreStart
|
|
$keywords = match ($type) {
|
|
'Feature' => $this->currentDialect->getFeatureKeywords(),
|
|
'Background' => $this->currentDialect->getBackgroundKeywords(),
|
|
'Scenario' => $this->currentDialect->getScenarioKeywords(),
|
|
'Outline' => $this->currentDialect->getScenarioOutlineKeywords(),
|
|
'Examples' => $this->currentDialect->getExamplesKeywords(),
|
|
'Step' => $this->currentDialect->getStepKeywords(),
|
|
'Given' => $this->currentDialect->getGivenKeywords(),
|
|
'When' => $this->currentDialect->getWhenKeywords(),
|
|
'Then' => $this->currentDialect->getThenKeywords(),
|
|
'And' => $this->currentDialect->getAndKeywords(),
|
|
'But' => $this->currentDialect->getButKeywords(),
|
|
default => throw new \InvalidArgumentException(sprintf('Unknown keyword type "%s"', $type)),
|
|
};
|
|
|
|
$keywordsRegex = implode('|', array_map(fn ($keyword) => preg_quote($keyword, '/'), $keywords));
|
|
|
|
if ($type === 'Step') {
|
|
$keywordsRegex = '(?:' . $keywordsRegex . ')\s*';
|
|
}
|
|
|
|
return $keywordsRegex;
|
|
// @codeCoverageIgnoreEnd
|
|
}
|
|
|
|
/**
|
|
* Scans Feature from input & returns it if found.
|
|
*
|
|
* @return array|null
|
|
*
|
|
* @phpstan-return TTitleToken|null
|
|
*/
|
|
protected function scanFeature()
|
|
{
|
|
if (!$this->allowFeature) {
|
|
// The Feature: tag is only allowed once in a file, later in the file it may be part of a description node
|
|
return null;
|
|
}
|
|
|
|
$token = $this->scanTitleLine($this->currentDialect->getFeatureKeywords(), 'Feature');
|
|
|
|
if ($token === null) {
|
|
return null;
|
|
}
|
|
|
|
$this->allowFeature = false;
|
|
$this->allowLanguageTag = false;
|
|
$this->allowMultilineArguments = false;
|
|
|
|
return $token;
|
|
}
|
|
|
|
/**
|
|
* Scans Background from input & returns it if found.
|
|
*
|
|
* @return array|null
|
|
*
|
|
* @phpstan-return TTitleToken|null
|
|
*/
|
|
protected function scanBackground()
|
|
{
|
|
$token = $this->scanTitleLine($this->currentDialect->getBackgroundKeywords(), 'Background');
|
|
|
|
if ($token === null) {
|
|
return null;
|
|
}
|
|
|
|
$this->allowSteps = true;
|
|
|
|
return $token;
|
|
}
|
|
|
|
/**
|
|
* Scans Scenario from input & returns it if found.
|
|
*
|
|
* @return array|null
|
|
*
|
|
* @phpstan-return TTitleToken|null
|
|
*/
|
|
protected function scanScenario()
|
|
{
|
|
$token = $this->scanTitleLine($this->currentDialect->getScenarioKeywords(), 'Scenario');
|
|
|
|
if ($token === null) {
|
|
return null;
|
|
}
|
|
|
|
$this->allowMultilineArguments = false;
|
|
$this->allowSteps = true;
|
|
$this->allowExamples = true;
|
|
|
|
return $token;
|
|
}
|
|
|
|
/**
|
|
* Scans Scenario Outline from input & returns it if found.
|
|
*
|
|
* @return array|null
|
|
*
|
|
* @phpstan-return TTitleToken|null
|
|
*/
|
|
protected function scanOutline()
|
|
{
|
|
$token = $this->scanTitleLine($this->currentDialect->getScenarioOutlineKeywords(), 'Outline');
|
|
|
|
if ($token === null) {
|
|
return null;
|
|
}
|
|
|
|
$this->allowMultilineArguments = false;
|
|
$this->allowSteps = true;
|
|
$this->allowExamples = true;
|
|
|
|
return $token;
|
|
}
|
|
|
|
/**
|
|
* Scans Scenario Outline Examples from input & returns it if found.
|
|
*
|
|
* @return array|null
|
|
*
|
|
* @phpstan-return TTitleToken|null
|
|
*/
|
|
protected function scanExamples()
|
|
{
|
|
if (!$this->allowExamples) {
|
|
return null;
|
|
}
|
|
$token = $this->scanTitleLine($this->currentDialect->getExamplesKeywords(), 'Examples');
|
|
|
|
if ($token === null) {
|
|
return null;
|
|
}
|
|
|
|
$this->allowMultilineArguments = true;
|
|
|
|
return $token;
|
|
}
|
|
|
|
/**
|
|
* Scans Step from input & returns it if found.
|
|
*
|
|
* @return array|null
|
|
*
|
|
* @phpstan-return TStepToken|null
|
|
*/
|
|
protected function scanStep()
|
|
{
|
|
if (!$this->allowSteps) {
|
|
return null;
|
|
}
|
|
|
|
$trimmedLine = $this->getTrimmedLine();
|
|
$matchedKeyword = null;
|
|
|
|
foreach ($this->currentDialect->getStepKeywords() as $keyword) {
|
|
if (str_starts_with($trimmedLine, $keyword)) {
|
|
$matchedKeyword = $keyword;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if ($matchedKeyword === null) {
|
|
return null;
|
|
}
|
|
|
|
$text = ltrim(mb_substr($trimmedLine, mb_strlen($matchedKeyword)));
|
|
|
|
$nodeKeyword = $this->compatibilityMode->shouldRemoveStepKeywordSpace() ? trim($matchedKeyword) : $matchedKeyword;
|
|
assert($nodeKeyword !== '');
|
|
|
|
$token = $this->takeToken('Step', $nodeKeyword);
|
|
$token['keyword_type'] = $this->getStepKeywordType($matchedKeyword);
|
|
$token['text'] = $text;
|
|
|
|
$this->consumeLine();
|
|
$this->allowMultilineArguments = true;
|
|
|
|
return $token;
|
|
}
|
|
|
|
/**
|
|
* Scans PyString from input & returns it if found.
|
|
*
|
|
* @return array|null
|
|
*
|
|
* @phpstan-return TNullValueToken|null
|
|
*/
|
|
protected function scanPyStringOp()
|
|
{
|
|
if (!$this->allowMultilineArguments) {
|
|
return null;
|
|
}
|
|
|
|
if (!preg_match('/^\s*(?<delimiter>"""|```)/u', $this->line, $matches, PREG_OFFSET_CAPTURE)) {
|
|
return null;
|
|
}
|
|
|
|
['delimiter' => [0 => $delimiter, 1 => $indent]] = $matches;
|
|
|
|
if ($this->inPyString) {
|
|
if ($this->pyStringDelimiter !== $delimiter) {
|
|
return null;
|
|
}
|
|
$this->pyStringDelimiter = null;
|
|
} else {
|
|
$this->pyStringDelimiter = $delimiter;
|
|
}
|
|
|
|
$this->inPyString = !$this->inPyString;
|
|
$token = $this->takeToken('PyStringOp');
|
|
$this->pyStringSwallow = $indent;
|
|
|
|
$this->consumeLine();
|
|
|
|
return $token;
|
|
}
|
|
|
|
/**
|
|
* Scans PyString content.
|
|
*
|
|
* @return array|null
|
|
*
|
|
* @phpstan-return TStringValueToken|null
|
|
*/
|
|
protected function scanPyStringContent()
|
|
{
|
|
if (!$this->inPyString) {
|
|
return null;
|
|
}
|
|
|
|
$token = $this->scanText();
|
|
// swallow trailing spaces
|
|
$value = (string) preg_replace('/^\s{0,' . $this->pyStringSwallow . '}/u', '', $token['value'] ?? '');
|
|
|
|
if ($this->compatibilityMode->shouldUnespaceDocStringDelimiters()) {
|
|
\assert($this->pyStringDelimiter !== null);
|
|
$escapedDelimiter = match ($this->pyStringDelimiter) {
|
|
'"""' => '\\"\\"\\"',
|
|
'```' => '\\`\\`\\`',
|
|
};
|
|
$value = str_replace($escapedDelimiter, $this->pyStringDelimiter, $value);
|
|
}
|
|
|
|
$token['value'] = $value;
|
|
|
|
return $token;
|
|
}
|
|
|
|
/**
|
|
* Scans Table Row from input & returns it if found.
|
|
*
|
|
* @return array|null
|
|
*
|
|
* @phpstan-return TTableRowToken|null
|
|
*/
|
|
protected function scanTableRow()
|
|
{
|
|
if (!$this->allowMultilineArguments) {
|
|
return null;
|
|
}
|
|
|
|
$line = $this->getTrimmedLine();
|
|
if (!str_starts_with($line, '|')) {
|
|
// Strictly speaking, a table row only has to begin with a pipe - content to the right
|
|
// of the final pipe will be ignored after we split the cells.
|
|
return null;
|
|
}
|
|
|
|
$rawColumns = preg_split(self::CELL_PATTERN, $line);
|
|
assert($rawColumns !== false);
|
|
|
|
// Safely remove elements before the first and last separators
|
|
array_shift($rawColumns);
|
|
array_pop($rawColumns);
|
|
|
|
$token = $this->takeToken('TableRow');
|
|
if ($this->compatibilityMode->shouldUseNewTableCellParsing()) {
|
|
$columns = array_map($this->parseTableCell(...), $rawColumns);
|
|
} else {
|
|
$columns = array_map(static fn ($column) => trim(str_replace(['\\|', '\\\\'], ['|', '\\'], $column)), $rawColumns);
|
|
}
|
|
$token['columns'] = $columns;
|
|
|
|
$this->consumeLine();
|
|
|
|
return $token;
|
|
}
|
|
|
|
private function parseTableCell(string $cell): string
|
|
{
|
|
$trimmedCell = preg_replace('/^[ \\t\\n\\x0B\\f\\r\\x85\\xA0]++|[ \\t\\n\\x0B\\f\\r\\x85\\xA0]++$/u', '', $cell);
|
|
\assert($trimmedCell !== null);
|
|
|
|
$value = preg_replace_callback('/\\\\./', function (array $matches) {
|
|
return match ($matches[0]) {
|
|
'\\n' => "\n",
|
|
'\\\\' => '\\',
|
|
'\\|' => '|',
|
|
default => $matches[0],
|
|
};
|
|
}, $trimmedCell);
|
|
|
|
assert($value !== null);
|
|
|
|
return $value;
|
|
}
|
|
|
|
/**
|
|
* Scans Tags from input & returns it if found.
|
|
*
|
|
* @return array|null
|
|
*
|
|
* @phpstan-return TTagToken|null
|
|
*/
|
|
protected function scanTags()
|
|
{
|
|
$line = $this->getTrimmedLine();
|
|
|
|
if ($line === '' || !str_starts_with($line, '@')) {
|
|
return null;
|
|
}
|
|
|
|
if (preg_match('/^(?<line>.*)\s+#.*$/', $line, $matches)) {
|
|
['line' => $line] = $matches;
|
|
$this->consumeLineUntil(mb_strlen($line, 'utf-8'));
|
|
} else {
|
|
$this->consumeLine();
|
|
}
|
|
|
|
$token = $this->takeToken('Tag');
|
|
|
|
if ($this->compatibilityMode->shouldRemoveTagPrefixChar()) {
|
|
// Legacy behaviour
|
|
$tags = explode('@', mb_substr($line, 1, mb_strlen($line, 'utf8') - 1, 'utf8'));
|
|
$tags = array_map(trim(...), $tags);
|
|
$token['tags'] = $tags;
|
|
|
|
return $token;
|
|
}
|
|
|
|
$tags = preg_split('/(?=@)/u', $line);
|
|
assert($tags !== false);
|
|
// Remove the empty content before the first tag prefix
|
|
array_shift($tags);
|
|
|
|
// Note: checking for whitespace in tags is done in the Parser to fit with existing logic
|
|
$token['tags'] = array_map(trim(...), $tags);
|
|
|
|
return $token;
|
|
}
|
|
|
|
/**
|
|
* Scans Language specifier from input & returns it if found.
|
|
*
|
|
* @return array|null
|
|
*
|
|
* @phpstan-return TStringValueToken|null
|
|
*/
|
|
protected function scanLanguage()
|
|
{
|
|
if (!$this->allowLanguageTag) {
|
|
return null;
|
|
}
|
|
|
|
if ($this->inPyString) {
|
|
return null;
|
|
}
|
|
|
|
if (!str_starts_with(ltrim($this->line), '#')) {
|
|
return null;
|
|
}
|
|
|
|
$pattern = $this->compatibilityMode->allowWhitespaceInLanguageTag()
|
|
? '/^\s*#\s*language\s*:\s*([\w_\-]+)\s*$/u'
|
|
: '/^\s*#\s*language:\s*([\w_\-]+)\s*$/';
|
|
|
|
$token = $this->scanInput($pattern, 'Language');
|
|
|
|
if ($token) {
|
|
\assert(\is_string($token['value']));
|
|
\assert($token['value'] !== ''); // the regex can only match a non-empty value.
|
|
$this->allowLanguageTag = false;
|
|
$this->setLanguage($token['value']);
|
|
}
|
|
|
|
return $token;
|
|
}
|
|
|
|
/**
|
|
* Scans Comment from input & returns it if found.
|
|
*
|
|
* @return array|null
|
|
*
|
|
* @phpstan-return TStringValueToken|null
|
|
*/
|
|
protected function scanComment()
|
|
{
|
|
if ($this->inPyString) {
|
|
return null;
|
|
}
|
|
|
|
$line = $this->getTrimmedLine();
|
|
if (!str_starts_with($line, '#')) {
|
|
return null;
|
|
}
|
|
|
|
$token = $this->takeToken('Comment', $line);
|
|
$this->consumeLine();
|
|
|
|
return $token;
|
|
}
|
|
|
|
/**
|
|
* Scans Newline from input & returns it if found.
|
|
*
|
|
* @return array|null
|
|
*
|
|
* @phpstan-return TNullValueToken|null
|
|
*/
|
|
protected function scanNewline()
|
|
{
|
|
if ($this->getTrimmedLine() !== '') {
|
|
return null;
|
|
}
|
|
|
|
$token = $this->takeToken('Newline');
|
|
$this->consumeLine();
|
|
|
|
return $token;
|
|
}
|
|
|
|
/**
|
|
* Scans text from input & returns it if found.
|
|
*
|
|
* @return array
|
|
*
|
|
* @phpstan-return TStringValueToken|TNullValueToken
|
|
*/
|
|
protected function scanText()
|
|
{
|
|
$token = $this->takeToken('Text', $this->line);
|
|
$this->consumeLine();
|
|
|
|
return $token;
|
|
}
|
|
|
|
/**
|
|
* Returns step type keyword (Given, When, Then, etc.).
|
|
*
|
|
* @param string $native Step keyword in provided language
|
|
*
|
|
* @phpstan-return TStepKeyword
|
|
*/
|
|
private function getStepKeywordType(string $native): string
|
|
{
|
|
if ($this->stepKeywordTypesCache === null) {
|
|
$this->stepKeywordTypesCache = [];
|
|
$this->addStepKeywordTypes($this->currentDialect->getGivenKeywords(), 'Given');
|
|
$this->addStepKeywordTypes($this->currentDialect->getWhenKeywords(), 'When');
|
|
$this->addStepKeywordTypes($this->currentDialect->getThenKeywords(), 'Then');
|
|
$this->addStepKeywordTypes($this->currentDialect->getAndKeywords(), 'And');
|
|
$this->addStepKeywordTypes($this->currentDialect->getButKeywords(), 'But');
|
|
}
|
|
|
|
if (!isset($this->stepKeywordTypesCache[$native])) { // should not happen when the native keyword belongs to the dialect
|
|
return 'Given'; // cucumber/gherkin has an UNKNOWN type, but we don't have it.
|
|
}
|
|
|
|
if (\count($this->stepKeywordTypesCache[$native]) === 1) {
|
|
return $this->stepKeywordTypesCache[$native][0];
|
|
}
|
|
|
|
// Consider ambiguous keywords as AND keywords so that they are normalized to the previous step type.
|
|
// This happens in English for the `* ` keyword for instance.
|
|
// cucumber/gherkin returns that as an UNKNOWN type, but we don't have it.
|
|
return 'And';
|
|
}
|
|
|
|
/**
|
|
* @param list<string> $keywords
|
|
*
|
|
* @phpstan-param TStepKeyword $type
|
|
*/
|
|
private function addStepKeywordTypes(array $keywords, string $type): void
|
|
{
|
|
foreach ($keywords as $keyword) {
|
|
$this->stepKeywordTypesCache[$keyword][] = $type;
|
|
}
|
|
}
|
|
}
|