aboutsummaryrefslogtreecommitdiffstats
path: root/vendor/bakame/http-structured-fields/src/Parser.php
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/bakame/http-structured-fields/src/Parser.php')
-rw-r--r--vendor/bakame/http-structured-fields/src/Parser.php523
1 files changed, 523 insertions, 0 deletions
diff --git a/vendor/bakame/http-structured-fields/src/Parser.php b/vendor/bakame/http-structured-fields/src/Parser.php
new file mode 100644
index 000000000..4c7448c15
--- /dev/null
+++ b/vendor/bakame/http-structured-fields/src/Parser.php
@@ -0,0 +1,523 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Bakame\Http\StructuredFields;
+
+use DateTimeImmutable;
+use Exception;
+use Stringable;
+
+use function in_array;
+use function ltrim;
+use function preg_match;
+use function str_contains;
+use function strlen;
+use function substr;
+use function trim;
+
+/**
+ * A class to parse HTTP Structured Fields from their HTTP textual representation according to RFC8941.
+ *
+ * Based on gapple\StructuredFields\Parser class in Structured Field Values for PHP v1.0.0.
+ *
+ * @link https://github.com/gapple/structured-fields/blob/v1.0.0/src/Parser.php
+ *
+ * @see https://www.rfc-editor.org/rfc/rfc9651.html#section-4.2
+ *
+ * @see Dictionary::fromHttpValue()
+ * @see Parameters::fromHttpValue()
+ * @see OuterList::fromHttpValue()
+ * @see InnerList::fromHttpValue()
+ * @see Item::fromHttpValue()
+ *
+ * @internal Do not use directly this class as it's behaviour and return type
+ * MAY change significantly even during a major release cycle.
+ *
+ * @phpstan-type SfValue Bytes|Token|DisplayString|DateTimeImmutable|string|int|float|bool
+ * @phpstan-type SfParameter array<array{0:string, 1:SfValue}>
+ * @phpstan-type SfItem array{0:SfValue, 1: SfParameter}
+ * @phpstan-type SfInnerList array{0:array<SfItem>, 1: SfParameter}
+ */
+final class Parser
+{
+ private const REGEXP_BYTES = '/^(?<sequence>:(?<byte>[a-z\d+\/=]*):)/i';
+ private const REGEXP_BOOLEAN = '/^\?[01]/';
+ private const REGEXP_DATE = '/^@(?<date>-?\d{1,15})(?:[^\d.]|$)/';
+ private const REGEXP_DECIMAL = '/^-?\d{1,12}\.\d{1,3}$/';
+ private const REGEXP_INTEGER = '/^-?\d{1,15}$/';
+ private const REGEXP_TOKEN = "/^(?<token>[a-z*][a-z\d:\/!#\$%&'*+\-.^_`|~]*)/i";
+ private const REGEXP_INVALID_CHARACTERS = "/[\r\t\n]|[^\x20-\x7E]/";
+ private const REGEXP_VALID_NUMBER = '/^(?<number>-?\d+(?:\.\d+)?)(?:[^\d.]|$)/';
+ private const REGEXP_VALID_SPACE = '/^(?<space>,[ \t]*)/';
+ private const FIRST_CHARACTER_RANGE_NUMBER = '-1234567890';
+ private const FIRST_CHARACTER_RANGE_TOKEN = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ*';
+
+ public function __construct(private readonly Ietf $rfc)
+ {
+ }
+
+ /**
+ * Returns an Item as a PHP list array from an HTTP textual representation.
+ *
+ * @see https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-an-item
+ *
+ *
+ * @throws Exception|SyntaxError
+ *
+ * @return SfItem
+ */
+ public function parseItem(Stringable|string $httpValue): array
+ {
+ $remainder = trim((string) $httpValue, ' ');
+ if ('' === $remainder || 1 === preg_match(self::REGEXP_INVALID_CHARACTERS, $remainder)) {
+ throw new SyntaxError("The HTTP textual representation \"$httpValue\" for an item contains invalid characters.");
+ }
+
+ [$value, $offset] = $this->extractValue($remainder);
+ $remainder = substr($remainder, $offset);
+ if ('' !== $remainder && !str_contains($remainder, ';')) {
+ throw new SyntaxError("The HTTP textual representation \"$httpValue\" for an item contains invalid characters.");
+ }
+
+ return [$value, $this->parseParameters($remainder)]; /* @phpstan-ignore-line */
+ }
+
+ /**
+ * Returns a Parameters ordered map container as a PHP list array from an HTTP textual representation.
+ *
+ * @see https://www.rfc-editor.org/rfc/rfc9651.html#section-3.1.2
+ *
+ * @throws SyntaxError|Exception
+ *
+ * @return array<SfParameter>
+ */
+ public function parseParameters(Stringable|string $httpValue): array
+ {
+ $remainder = trim((string) $httpValue);
+ [$parameters, $offset] = $this->extractParametersValues($remainder);
+ if (strlen($remainder) !== $offset) {
+ throw new SyntaxError("The HTTP textual representation \"$httpValue\" for Parameters contains invalid characters.");
+ }
+
+ return $parameters; /* @phpstan-ignore-line */
+ }
+
+ /**
+ * Returns an ordered list represented as a PHP list array from an HTTP textual representation.
+ *
+ * @see https://www.rfc-editor.org/rfc/rfc9651.html#section-4.2.1
+ *
+ * @throws SyntaxError|Exception
+ *
+ * @return array<SfInnerList|SfItem>
+ */
+ public function parseList(Stringable|string $httpValue): array
+ {
+ $list = [];
+ $remainder = ltrim((string) $httpValue, ' ');
+ while ('' !== $remainder) {
+ [$list[], $offset] = $this->extractItemOrInnerList($remainder);
+ $remainder = self::removeCommaSeparatedWhiteSpaces($remainder, $offset);
+ }
+
+ return $list;
+ }
+
+ /**
+ * Returns a Dictionary represented as a PHP list array from an HTTP textual representation.
+ *
+ * @see https://www.rfc-editor.org/rfc/rfc9651.html#section-4.2.2
+ *
+ * @throws SyntaxError|Exception
+ *
+ * @return array<array{0:string, 1:SfInnerList|SfItem}>
+ */
+ public function parseDictionary(Stringable|string $httpValue): array
+ {
+ $map = [];
+ $remainder = ltrim((string) $httpValue, ' ');
+ while ('' !== $remainder) {
+ $key = Key::fromStringBeginning($remainder)->value;
+ $remainder = substr($remainder, strlen($key));
+ if ('' === $remainder || '=' !== $remainder[0]) {
+ $remainder = '=?1'.$remainder;
+ }
+ $member = [$key];
+
+ [$member[1], $offset] = $this->extractItemOrInnerList(substr($remainder, 1));
+ $remainder = self::removeCommaSeparatedWhiteSpaces($remainder, ++$offset);
+ $map[] = $member;
+ }
+
+ return $map;
+ }
+
+ /**
+ * Returns an inner list represented as a PHP list array from an HTTP textual representation.
+ *
+ * @see https://www.rfc-editor.org/rfc/rfc9651.html#section-4.2.1.2
+ *
+ * @throws SyntaxError|Exception
+ *
+ * @return SfInnerList
+ */
+ public function parseInnerList(Stringable|string $httpValue): array
+ {
+ $remainder = ltrim((string) $httpValue, ' ');
+ if ('(' !== $remainder[0]) {
+ throw new SyntaxError("The HTTP textual representation \"$httpValue\" for a inner list is missing a parenthesis.");
+ }
+
+ [$list, $offset] = $this->extractInnerList($remainder);
+ $remainder = self::removeOptionalWhiteSpaces(substr($remainder, $offset));
+ if ('' !== $remainder) {
+ throw new SyntaxError("The HTTP textual representation \"$httpValue\" for a inner list contains invalid data.");
+ }
+
+ return $list;
+ }
+
+ /**
+ * Filter optional white spaces before and after comma.
+ *
+ * @see https://tools.ietf.org/html/rfc7230#section-3.2.3
+ */
+ private static function removeCommaSeparatedWhiteSpaces(string $remainder, int $offset): string
+ {
+ $remainder = self::removeOptionalWhiteSpaces(substr($remainder, $offset));
+ if ('' === $remainder) {
+ return '';
+ }
+
+ if (1 !== preg_match(self::REGEXP_VALID_SPACE, $remainder, $found)) {
+ throw new SyntaxError('The HTTP textual representation is missing an excepted comma.');
+ }
+
+ $remainder = substr($remainder, strlen($found['space']));
+
+ if ('' === $remainder) {
+ throw new SyntaxError('The HTTP textual representation has an unexpected end of line.');
+ }
+
+ return $remainder;
+ }
+
+ /**
+ * Remove optional white spaces before field value.
+ *
+ * @see https://tools.ietf.org/html/rfc7230#section-3.2.3
+ */
+ private static function removeOptionalWhiteSpaces(string $httpValue): string
+ {
+ return ltrim($httpValue, " \t");
+ }
+
+ /**
+ * Returns an item or an inner list as a PHP list array from an HTTP textual representation.
+ *
+ * @see https://www.rfc-editor.org/rfc/rfc9651.html#section-4.2.1.1
+ *
+ * @throws SyntaxError|Exception
+ *
+ * @return array{0: SfInnerList|SfItem, 1:int}
+ */
+ private function extractItemOrInnerList(string $httpValue): array
+ {
+ if ('(' === $httpValue[0]) {
+ return $this->extractInnerList($httpValue);
+ }
+
+ [$item, $remainder] = $this->extractItem($httpValue);
+
+ return [$item, strlen($httpValue) - strlen($remainder)];
+ }
+
+ /**
+ * Returns an inner list represented as a PHP list array from an HTTP textual representation and the consumed offset in a tuple.
+ *
+ * @see https://www.rfc-editor.org/rfc/rfc9651.html#section-4.2.1.2
+ *
+ * @throws SyntaxError|Exception
+ *
+ * @return array{0: SfInnerList, 1 :int}
+ */
+ private function extractInnerList(string $httpValue): array
+ {
+ $list = [];
+ $remainder = substr($httpValue, 1);
+ while ('' !== $remainder) {
+ $remainder = ltrim($remainder, ' ');
+
+ if (')' === $remainder[0]) {
+ $remainder = substr($remainder, 1);
+ [$parameters, $offset] = $this->extractParametersValues($remainder);
+ $remainder = substr($remainder, $offset);
+
+ return [[$list, $parameters], strlen($httpValue) - strlen($remainder)];
+ }
+
+ [$list[], $remainder] = $this->extractItem($remainder);
+
+ if ('' !== $remainder && !in_array($remainder[0], [' ', ')'], true)) {
+ throw new SyntaxError("The HTTP textual representation \"$httpValue\" for a inner list is using invalid characters.");
+ }
+ }
+
+ throw new SyntaxError("The HTTP textual representation \"$httpValue\" for a inner list has an unexpected end of line.");
+ }
+
+ /**
+ * Returns an item represented as a PHP array from an HTTP textual representation and the consumed offset in a tuple.
+ *
+ * @throws SyntaxError|Exception
+ *
+ * @return array{0:SfItem, 1:string}
+ */
+ private function extractItem(string $remainder): array
+ {
+ [$value, $offset] = $this->extractValue($remainder);
+ $remainder = substr($remainder, $offset);
+ [$parameters, $offset] = $this->extractParametersValues($remainder);
+
+ return [[$value, $parameters], substr($remainder, $offset)];
+ }
+
+ /**
+ * Returns an item value from an HTTP textual representation and the consumed offset in a tuple.
+ *
+ * @see https://www.rfc-editor.org/rfc/rfc9651.html#section-4.2.3.1
+ *
+ * @throws SyntaxError|Exception
+ *
+ * @return array{0:SfValue, 1:int}
+ */
+ private function extractValue(string $httpValue): array
+ {
+ return match (true) {
+ '"' === $httpValue[0] => self::extractString($httpValue),
+ ':' === $httpValue[0] => self::extractBytes($httpValue),
+ '?' === $httpValue[0] => self::extractBoolean($httpValue),
+ '@' === $httpValue[0] => self::extractDate($httpValue, $this->rfc),
+ str_starts_with($httpValue, '%"') => self::extractDisplayString($httpValue, $this->rfc),
+ str_contains(self::FIRST_CHARACTER_RANGE_NUMBER, $httpValue[0]) => self::extractNumber($httpValue),
+ str_contains(self::FIRST_CHARACTER_RANGE_TOKEN, $httpValue[0]) => self::extractToken($httpValue),
+ default => throw new SyntaxError("The HTTP textual representation \"$httpValue\" for an item value is unknown or unsupported."),
+ };
+ }
+
+ /**
+ * Returns a parameters container represented as a PHP associative array from an HTTP textual representation and the consumed offset in a tuple.
+ *
+ * @see https://www.rfc-editor.org/rfc/rfc9651.html#section-4.2.3.2
+ *
+ * @throws SyntaxError|Exception
+ *
+ * @return array{0:SfParameter, 1:int}
+ */
+ private function extractParametersValues(Stringable|string $httpValue): array
+ {
+ $map = [];
+ $httpValue = (string) $httpValue;
+ $remainder = $httpValue;
+ while ('' !== $remainder && ';' === $remainder[0]) {
+ $remainder = ltrim(substr($remainder, 1), ' ');
+ $key = Key::fromStringBeginning($remainder)->value;
+ $member = [$key, true];
+ $remainder = substr($remainder, strlen($key));
+ if ('' !== $remainder && '=' === $remainder[0]) {
+ $remainder = substr($remainder, 1);
+ [$member[1], $offset] = $this->extractValue($remainder);
+ $remainder = substr($remainder, $offset);
+ }
+
+ $map[] = $member;
+ }
+
+ return [$map, strlen($httpValue) - strlen($remainder)];
+ }
+
+ /**
+ * Returns a boolean from an HTTP textual representation and the consumed offset in a tuple.
+ *
+ * @see https://www.rfc-editor.org/rfc/rfc9651.html#section-4.2.8
+ *
+ * @return array{0:bool, 1:int}
+ */
+ private static function extractBoolean(string $httpValue): array
+ {
+ return match (1) {
+ preg_match(self::REGEXP_BOOLEAN, $httpValue) => ['1' === $httpValue[1], 2],
+ default => throw new SyntaxError("The HTTP textual representation \"$httpValue\" for a Boolean contains invalid characters."),
+ };
+ }
+
+ /**
+ * Returns an int or a float from an HTTP textual representation and the consumed offset in a tuple.
+ *
+ * @see https://www.rfc-editor.org/rfc/rfc9651.html#section-4.2.4
+ *
+ * @return array{0:int|float, 1:int}
+ */
+ private static function extractNumber(string $httpValue): array
+ {
+ if (1 !== preg_match(self::REGEXP_VALID_NUMBER, $httpValue, $found)) {
+ throw new SyntaxError("The HTTP textual representation \"$httpValue\" for a Number contains invalid characters.");
+ }
+
+ return match (1) {
+ preg_match(self::REGEXP_DECIMAL, $found['number']) => [(float) $found['number'], strlen($found['number'])],
+ preg_match(self::REGEXP_INTEGER, $found['number']) => [(int) $found['number'], strlen($found['number'])],
+ default => throw new SyntaxError("The HTTP textual representation \"$httpValue\" for a Number contains too much digit."),
+ };
+ }
+
+ /**
+ * Returns DateTimeImmutable instance from an HTTP textual representation and the consumed offset in a tuple.
+ *
+ * @see https://httpwg.org/http-extensions/draft-ietf-httpbis-sfbis.html#name-dates
+ *
+ * @throws SyntaxError
+ * @throws Exception
+ *
+ * @return array{0:DateTimeImmutable, 1:int}
+ */
+ private static function extractDate(string $httpValue, Ietf $rfc): array
+ {
+ if (!$rfc->supports(Type::Date)) {
+ throw MissingFeature::dueToLackOfSupport(Type::Date, $rfc);
+ }
+
+ if (1 !== preg_match(self::REGEXP_DATE, $httpValue, $found)) {
+ throw new SyntaxError("The HTTP textual representation \"$httpValue\" for a Date contains invalid characters.");
+ }
+
+ return [new DateTimeImmutable('@'.$found['date']), strlen($found['date']) + 1];
+ }
+
+ /**
+ * Returns a string from an HTTP textual representation and the consumed offset in a tuple.
+ *
+ * @see https://www.rfc-editor.org/rfc/rfc9651.html#section-4.2.5
+ *
+ * @return array{0:string, 1:int}
+ */
+ private static function extractString(string $httpValue): array
+ {
+ $offset = 1;
+ $remainder = substr($httpValue, $offset);
+ $output = '';
+
+ if (1 === preg_match(self::REGEXP_INVALID_CHARACTERS, $remainder)) {
+ throw new SyntaxError("The HTTP textual representation \"$httpValue\" for a String contains an invalid end string.");
+ }
+
+ while ('' !== $remainder) {
+ $char = $remainder[0];
+ $offset += 1;
+
+ if ('"' === $char) {
+ return [$output, $offset];
+ }
+
+ $remainder = substr($remainder, 1);
+
+ if ('\\' !== $char) {
+ $output .= $char;
+ continue;
+ }
+
+ $char = $remainder[0] ?? '';
+ $offset += 1;
+ $remainder = substr($remainder, 1);
+
+ if (!in_array($char, ['"', '\\'], true)) {
+ throw new SyntaxError("The HTTP textual representation \"$httpValue\" for a String contains an invalid end string.");
+ }
+
+ $output .= $char;
+ }
+
+ throw new SyntaxError("The HTTP textual representation \"$httpValue\" for a String contains an invalid end string.");
+ }
+
+ /**
+ * Returns a string from an HTTP textual representation and the consumed offset in a tuple.
+ *
+ * @see https://datatracker.ietf.org/doc/html/draft-ietf-httpbis-sfbis#section-4.2.10
+ *
+ * @return array{0:DisplayString, 1:int}
+ */
+ private static function extractDisplayString(string $httpValue, Ietf $rfc): array
+ {
+ if (!$rfc->supports(Type::DisplayString)) {
+ throw MissingFeature::dueToLackOfSupport(Type::DisplayString, $rfc);
+ }
+
+ $offset = 2;
+ $remainder = substr($httpValue, $offset);
+ $output = '';
+
+ if (1 === preg_match(self::REGEXP_INVALID_CHARACTERS, $remainder)) {
+ throw new SyntaxError("The HTTP textual representation \"$httpValue\" for a DisplayString contains an invalid character string.");
+ }
+
+ while ('' !== $remainder) {
+ $char = $remainder[0];
+ $offset += 1;
+
+ if ('"' === $char) {
+ return [DisplayString::fromEncoded($output), $offset];
+ }
+
+ $remainder = substr($remainder, 1);
+ if ('%' !== $char) {
+ $output .= $char;
+ continue;
+ }
+
+ $octet = substr($remainder, 0, 2);
+ $offset += 2;
+ if (1 === preg_match('/^[0-9a-f]]{2}$/', $octet)) {
+ throw new SyntaxError("The HTTP textual representation '$httpValue' for a DisplayString contains uppercased percent encoding sequence.");
+ }
+
+ $remainder = substr($remainder, 2);
+ $output .= $char.$octet;
+ }
+
+ throw new SyntaxError("The HTTP textual representation \"$httpValue\" for a DisplayString contains an invalid end string.");
+ }
+
+ /**
+ * Returns a Token from an HTTP textual representation and the consumed offset in a tuple.
+ *
+ * @see https://www.rfc-editor.org/rfc/rfc9651.html#section-4.2.6
+ *
+ * @return array{0:Token, 1:int}
+ */
+ private static function extractToken(string $httpValue): array
+ {
+ preg_match(self::REGEXP_TOKEN, $httpValue, $found);
+
+ $token = $found['token'] ?? throw new SyntaxError("The HTTP textual representation \"$httpValue\" for a Token contains invalid characters.");
+
+ return [Token::fromString($token), strlen($token)];
+ }
+
+ /**
+ * Returns a Byte Sequence from an HTTP textual representation and the consumed offset in a tuple.
+ *
+ * @see https://www.rfc-editor.org/rfc/rfc9651.html#section-4.2.7
+ *
+ * @return array{0:Bytes, 1:int}
+ */
+ private static function extractBytes(string $httpValue): array
+ {
+ if (1 !== preg_match(self::REGEXP_BYTES, $httpValue, $found)) {
+ throw new SyntaxError("The HTTP textual representation \"$httpValue\" for a Byte Sequence contains invalid characters.");
+ }
+
+ return [Bytes::fromEncoded($found['byte']), strlen($found['sequence'])];
+ }
+}