Drobne bug-fixes (maile i czytanie pdf dla GPT-api

This commit is contained in:
Flatlogic Bot 2026-01-09 07:13:59 +00:00
parent 595f1a1c17
commit 2823787d46
175 changed files with 9188 additions and 27 deletions

View File

@ -1,5 +1,9 @@
<?php
require_once __DIR__ . '/../includes/init.php';
require_once __DIR__ . '/../includes/auth.php';
require_once __DIR__ . '/../includes/pdfparser_autoloader.php';
use \PrinsFrank\PdfParser\PdfParser;
require_admin();
@ -17,11 +21,12 @@ if ($_SERVER['REQUEST_METHOD'] === 'POST') {
$title = $_POST['title'];
$content = $_POST['content'];
$tags = $_POST['tags'] ?? null;
$product_id = $_POST['product_id'] ?: null;
$language = $_POST['language'];
$is_active = isset($_POST['is_active']);
$product_id = !empty($_POST['product_id']) ? $_POST['product_id'] : null;
$language = $_POST['language'] ?? 'en';
$is_active = isset($_POST['is_active']) ? 1 : 0;
$file_path = $document['file_path'] ?? null;
$file_content = $document['file_content'] ?? null;
// Handle file removal
if ($id && isset($_POST['remove_file']) && $_POST['remove_file'] == '1') {
@ -29,6 +34,7 @@ if ($_SERVER['REQUEST_METHOD'] === 'POST') {
unlink(__DIR__ . '/../uploads/kb_documents/' . $file_path);
}
$file_path = null;
$file_content = null;
}
// Handle new file upload
@ -53,16 +59,20 @@ if ($_SERVER['REQUEST_METHOD'] === 'POST') {
if (move_uploaded_file($_FILES['pdf_file']['tmp_name'], $target_path)) {
$file_path = $new_file_name;
$parser = new PdfParser();
$pdf = $parser->parseFile($target_path);
$file_content = $pdf->getText();
}
}
}
if ($id) {
$stmt = db()->prepare("UPDATE kb_documents SET title = ?, content = ?, tags = ?, product_id = ?, language = ?, is_active = ?, file_path = ? WHERE id = ?");
$stmt->execute([$title, $content, $tags, $product_id, $language, $is_active, $file_path, $id]);
$stmt = db()->prepare("UPDATE kb_documents SET title = ?, content = ?, tags = ?, product_id = ?, language = ?, is_active = ?, file_path = ?, file_content = ? WHERE id = ?");
$stmt->execute([$title, $content, $tags, $product_id, $language, $is_active, $file_path, $file_content, $id]);
} else {
$stmt = db()->prepare("INSERT INTO kb_documents (title, content, tags, product_id, language, is_active, file_path) VALUES (?, ?, ?, ?, ?, ?, ?)");
$stmt->execute([$title, $content, $tags, $product_id, $language, $is_active, $file_path]);
$stmt = db()->prepare("INSERT INTO kb_documents (title, content, tags, product_id, language, is_active, file_path, file_content) VALUES (?, ?, ?, ?, ?, ?, ?, ?)");
$stmt->execute([$title, $content, $tags, $product_id, $language, $is_active, $file_path, $file_content]);
}
header('Location: kb_documents.php');
@ -160,4 +170,4 @@ require_once __DIR__ . '/../includes/html_head.php';
</div>
<?php require_once __DIR__ . '/../includes/footer.php'; ?>
<?php require_once __DIR__ . '/../includes/footer.php'; ?>

View File

@ -0,0 +1 @@
[09-Jan-2026 07:03:16 UTC] PHP Warning: Undefined variable $pageTitle in /home/ubuntu/executor/workspace/admin/order_details.php on line 133

View File

@ -26,7 +26,7 @@ function search_kb($message) {
$sql = "SELECT * FROM kb_documents WHERE is_active = 1 AND (";
$conditions = [];
foreach ($terms as $term) {
$conditions[] = "title LIKE ? OR content LIKE ?";
$conditions[] = "title LIKE ? OR content LIKE ? OR file_content LIKE ?";
}
$sql .= implode(' OR ', $conditions) . ") LIMIT 3";
@ -35,6 +35,7 @@ function search_kb($message) {
foreach ($terms as $term) {
$params[] = '%' . $term . '%';
$params[] = '%' . $term . '%';
$params[] = '%' . $term . '%';
}
$stmt->execute($params);
return $stmt->fetchAll();
@ -62,6 +63,9 @@ if (!empty($kb_documents)) {
foreach ($kb_documents as $doc) {
$system_prompt .= "- Title: " . $doc['title'] . "\n";
$system_prompt .= " Content: " . $doc['content'] . "\n";
if (!empty($doc['file_content'])) {
$system_prompt .= " File Content: " . $doc['file_content'] . "\n";
}
}
}

View File

@ -0,0 +1 @@
ALTER TABLE `kb_documents` ADD `file_content` TEXT NULL;

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,20 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\CMap\Registry\Adobe;
use Override;
use PrinsFrank\PdfParser\Document\CMap\Registry\CMapResource;
use PrinsFrank\PdfParser\Document\CMap\ToUnicode\BFRange;
use PrinsFrank\PdfParser\Document\CMap\ToUnicode\CodeSpaceRange;
use PrinsFrank\PdfParser\Document\CMap\ToUnicode\ToUnicodeCMap;
class Identity0 implements CMapResource {
#[Override]
public function getToUnicodeCMap(): ToUnicodeCMap {
return new ToUnicodeCMap(
[new CodeSpaceRange(0x0000, 0xFFFF)],
2,
new BFRange(0x0000, 0xFFFF, ['0000'])
);
}
}

View File

@ -0,0 +1,10 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\CMap\Registry;
use PrinsFrank\PdfParser\Document\CMap\ToUnicode\ToUnicodeCMap;
interface CMapResource {
/** @internal */
public function getToUnicodeCMap(): ToUnicodeCMap;
}

View File

@ -0,0 +1,17 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\CMap\Registry;
use PrinsFrank\PdfParser\Document\CMap\Registry\Adobe\Identity0;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Integer\IntegerValue;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\TextString\TextStringValue;
/** @internal */
class RegistryOrchestrator {
public static function getForRegistryOrderingSupplement(TextStringValue $registry, TextStringValue $ordering, IntegerValue $supplement): ?CMapResource {
return match ([$registry->getText(), $ordering->getText(), $supplement->value]) {
['Adobe', 'Identity', 0] => new Identity0(),
default => null,
};
}
}

View File

@ -0,0 +1,27 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\CMap\ToUnicode;
use PrinsFrank\PdfParser\Exception\ParseFailureException;
/** @internal */
class BFChar {
public function __construct(
public readonly int $sourceCode,
public readonly string $destinationString,
) {
}
public function containsCharacterCode(int $characterCode): bool {
return $characterCode === $this->sourceCode;
}
/** @throws ParseFailureException */
public function toUnicode(int $characterCode): ?string {
if ($characterCode !== $this->sourceCode) {
throw new ParseFailureException(sprintf('This BFChar does not contain character code %d', $characterCode));
}
return CodePoint::toString($this->destinationString);
}
}

View File

@ -0,0 +1,35 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\CMap\ToUnicode;
use PrinsFrank\PdfParser\Exception\ParseFailureException;
/** @internal */
class BFRange {
/** @param list<string> $destinationCodePoints */
public function __construct(
public readonly int $sourceCodeStart,
public readonly int $sourceCodeEnd,
public readonly array $destinationCodePoints,
) {
}
public function containsCharacterCode(int $characterCode): bool {
return $characterCode >= $this->sourceCodeStart
&& $characterCode <= $this->sourceCodeEnd;
}
/** @throws ParseFailureException */
public function toUnicode(int $characterCode): ?string {
if (count($this->destinationCodePoints) === 1) {
return CodePoint::toString(
dechex(((int) hexdec($this->destinationCodePoints[0])) + $characterCode - $this->sourceCodeStart),
);
}
return CodePoint::toString(
$this->destinationCodePoints[$characterCode - $this->sourceCodeStart]
?? throw new ParseFailureException(),
);
}
}

View File

@ -0,0 +1,37 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\CMap\ToUnicode;
use PrinsFrank\PdfParser\Exception\InvalidArgumentException;
use PrinsFrank\PdfParser\Exception\ParseFailureException;
class CodePoint {
/** $codepoint cannot be an int as multiple concatenated single bytes can be more than PHP_INT_MAX */
public static function toString(string $hexString): string {
if (!ctype_xdigit($hexString)) {
throw new InvalidArgumentException(sprintf('Expected hex string, got "%s"', $hexString));
}
$chars = [];
for ($i = 0; $i < strlen($hexString);) {
if (($highSurrogate = (($surrogateCodePoint = (int) hexdec(substr($hexString, $i, 8))) >> 16) & 0xFFFF) >= 0xD800
&& $highSurrogate <= 0xDBFF
&& ($lowSurrogate = $surrogateCodePoint & 0xFFFF) >= 0xDC00
&& $lowSurrogate <= 0xDFFF) {
$charCodepoint = (($highSurrogate - 0xD800) << 10) + ($lowSurrogate - 0xDC00) + 0x10000;
$i += 8; // Surrogate Pairs are 4 bytes long
} else {
$charCodepoint = (int) hexdec(substr($hexString, $i, 4));
$i += 4; // Non surrogate pairs are 2 bytes long
}
if (($char = mb_chr($charCodepoint)) === false) {
throw new ParseFailureException();
}
$chars[] = $char;
}
return implode('', $chars);
}
}

View File

@ -0,0 +1,11 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\CMap\ToUnicode;
class CodeSpaceRange {
public function __construct(
public readonly int $codeSpaceStart,
public readonly int $codeSpaceEnd,
) {
}
}

View File

@ -0,0 +1,64 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\CMap\ToUnicode;
use PrinsFrank\PdfParser\Exception\InvalidArgumentException;
use PrinsFrank\PdfParser\Exception\PdfParserException;
class ToUnicodeCMap {
/** @var list<BFRange|BFChar> */
private readonly array $bfCharRangeInfo;
/**
* @no-named-arguments
*
* @param list<CodeSpaceRange> $codeSpaceRanges
* @param int<1, max> $byteSize
* @throws InvalidArgumentException
*/
public function __construct(
public readonly array $codeSpaceRanges,
public readonly int $byteSize,
BFRange|BFChar ...$bfCharRangeInfo,
) {
$this->bfCharRangeInfo = $bfCharRangeInfo;
if ($this->byteSize < 1) {
throw new InvalidArgumentException();
}
}
/** @throws PdfParserException */
public function textToUnicode(string $characterGroup): string {
return implode(
'',
array_map(
fn (string $character) => $this->charToUnicode((int) hexdec($character)) ?? '',
str_split($characterGroup, $this->byteSize * 2)
)
);
}
/** @throws PdfParserException */
protected function charToUnicode(int $characterCode): ?string {
$char = null;
foreach ($this->bfCharRangeInfo as $bfCharRangeInfo) {
if (!$bfCharRangeInfo->containsCharacterCode($characterCode)) {
continue;
}
if (($char = $bfCharRangeInfo->toUnicode($characterCode)) !== "\0") { // Some characters map to NULL in one BFRange and to an actual character in another
return $char;
}
}
if ($char === "\0") {
return $char; // Only return NULL when it is the only character this is mapped to
}
if ($characterCode === 0) {
return '';
}
return null;
}
}

View File

@ -0,0 +1,12 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\CMap\ToUnicode;
enum ToUnicodeCMapOperator: string {
case BeginCodeSpaceRange = 'begincodespacerange';
case EndCodeSpaceRange = 'endcodespacerange';
case BeginBFChar = 'beginbfchar';
case EndBFChar = 'endbfchar';
case BeginBFRange = 'beginbfrange';
case EndBFRange = 'endbfrange';
}

View File

@ -0,0 +1,91 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\CMap\ToUnicode;
use PrinsFrank\PdfParser\Exception\ParseFailureException;
use PrinsFrank\PdfParser\Exception\PdfParserException;
use PrinsFrank\PdfParser\Stream\Stream;
/** @internal */
class ToUnicodeCMapParser {
/** @throws PdfParserException */
public static function parse(Stream $stream, int $startOffset, int $nrOfBytes): ToUnicodeCMap {
$beginCodeSpaceRangePos = $stream->firstPos(ToUnicodeCMapOperator::BeginCodeSpaceRange, $startOffset, $startOffset + $nrOfBytes)
?? throw new ParseFailureException(sprintf('Missing %s', ToUnicodeCMapOperator::BeginCodeSpaceRange->value));
$beginCodeSpaceRangePos += strlen(ToUnicodeCMapOperator::BeginCodeSpaceRange->value);
$endCodeSpaceRangePos = $stream->firstPos(ToUnicodeCMapOperator::EndCodeSpaceRange, $beginCodeSpaceRangePos, $startOffset + $nrOfBytes)
?? throw new ParseFailureException();
$codeSpaceRangeSectionString = $stream->read($beginCodeSpaceRangePos, $endCodeSpaceRangePos - $beginCodeSpaceRangePos);
$codeSpaceRanges = [];
$byteSize = null;
foreach (explode("\n", $codeSpaceRangeSectionString) as $codeSpaceRangeSectionStringLine) {
if (trim($codeSpaceRangeSectionStringLine) === '') {
continue;
}
if (preg_match('/^\s*<\s*(?P<start>[0-9a-fA-F]+)\s*>\s*<\s*(?P<end>[0-9a-fA-F]+)\s*>\s*$/', $codeSpaceRangeSectionStringLine, $matchesSpaceRange) !== 1) {
throw new ParseFailureException('Unrecognized codespacerange format');
}
if (strlen($matchesSpaceRange['start']) !== strlen($matchesSpaceRange['end'])) {
throw new ParseFailureException(sprintf('Start(%s) and end(%s) of codespacerange don\'t have the same number of bytes', $matchesSpaceRange['start'], $matchesSpaceRange['end']));
}
if (($strlen = strlen($matchesSpaceRange['start'])) % 2 !== 0 || !is_int($byteSizeRange = $strlen / 2)) {
throw new ParseFailureException(sprintf('Codespaceranges must be an even number of hex digits, got %d', $strlen));
}
if ($byteSize !== null && $byteSizeRange !== $byteSize) {
throw new ParseFailureException(sprintf('Byte size of codespaceranges is inconsistent, expected %d, got %d', $byteSize, $byteSizeRange));
}
$byteSize = $byteSizeRange;
$codeSpaceRanges[] = new CodeSpaceRange((int) hexdec($matchesSpaceRange['start']), (int) hexdec($matchesSpaceRange['end']));
}
/** @var array<int, list<BFRange|BFChar>> $bfCharRangeInfo where the first index is used to track the position of the element in the CMap */
$bfCharRangeInfo = [];
$lastPos = $startOffset;
while (($beginBFCharPos = $stream->firstPos(ToUnicodeCMapOperator::BeginBFChar, $lastPos, $startOffset + $nrOfBytes)) !== null) {
$beginBFCharPos += strlen(ToUnicodeCMapOperator::BeginBFChar->value);
$endBFCharPos = $stream->firstPos(ToUnicodeCMapOperator::EndBFChar, $beginBFCharPos, $startOffset + $nrOfBytes)
?? throw new ParseFailureException();
if (preg_match_all('/\s*<(?P<source>[^>]+)>\s*<(?P<destination>[^>]+)>\s*/', $stream->read($beginBFCharPos, $endBFCharPos - $beginBFCharPos), $matchesBFChar, PREG_SET_ORDER) === 0) {
throw new ParseFailureException('Unrecognized bfchar format');
}
foreach ($matchesBFChar as $matchBFChar) {
$bfCharRangeInfo[$beginBFCharPos][] = new BFChar((int) hexdec(trim($matchBFChar['source'])), trim($matchBFChar['destination']));
}
$lastPos = $endBFCharPos;
}
$lastPos = $startOffset;
while (($beginBFRangePos = $stream->firstPos(ToUnicodeCMapOperator::BeginBFRange, $lastPos, $startOffset + $nrOfBytes)) !== null) {
$endBFRangePos = $stream->firstPos(ToUnicodeCMapOperator::EndBFRange, $beginBFRangePos, $startOffset + $nrOfBytes)
?? throw new ParseFailureException();
if (preg_match_all('/\s*<(?P<start>[^>]+)>\s*<(?P<end>[^>]+)>\s*(?P<targetString>(<[^>]+>)|(\[\s*(<[^>]+>\s*)+\]))/', $stream->read($beginBFRangePos, $endBFRangePos - $beginBFRangePos), $matchesBFRange, PREG_SET_ORDER) === 0) {
throw new ParseFailureException('Unrecognized bfrange format');
}
foreach ($matchesBFRange as $matchBFRange) {
$bfCharRangeInfo[$beginBFRangePos][] = new BFRange(
(int) hexdec(trim($matchBFRange['start'])),
(int) hexdec(trim($matchBFRange['end'])),
array_map(
fn (string $value) => trim($value),
explode('><', rtrim(ltrim(str_replace(' ', '', $matchBFRange['targetString']), '[<'), '>]'))
)
);
}
$lastPos = $endBFRangePos;
}
ksort($bfCharRangeInfo); // Make sure that Char and Range are in order they occur in the CMap
return new ToUnicodeCMap(
$codeSpaceRanges,
$byteSize !== null ? $byteSize : 2,
...array_merge(...$bfCharRangeInfo)
);
}
}

View File

@ -0,0 +1,28 @@
<?php
declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\ContentStream\Command;
use PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\Object\CompatibilityOperator;
use PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\Object\InlineImageOperator;
use PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\Object\MarkedContentOperator;
use PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\Object\TextObjectOperator;
use PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\State\ClippingPathOperator;
use PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\State\ColorOperator;
use PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\State\GraphicsStateOperator;
use PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\State\PathConstructionOperator;
use PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\State\PathPaintingOperator;
use PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\State\TextPositioningOperator;
use PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\State\TextShowingOperator;
use PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\State\TextStateOperator;
use PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\State\Type3FontOperator;
use PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\State\XObjectOperator;
/** @internal */
class ContentStreamCommand {
public function __construct(
public readonly CompatibilityOperator|InlineImageOperator|MarkedContentOperator|TextObjectOperator|ClippingPathOperator|ColorOperator|GraphicsStateOperator|PathConstructionOperator|PathPaintingOperator|TextPositioningOperator|TextShowingOperator|TextStateOperator|Type3FontOperator|XObjectOperator $operator,
public readonly string $operands
) {
}
}

View File

@ -0,0 +1,13 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\Object;
/**
* @internal
*
* @specification Table 33 - Compatibility operators
*/
enum CompatibilityOperator: string {
case BeginCompatibilitySection = 'BX';
case EndCompatibilitySection = 'EX';
}

View File

@ -0,0 +1,14 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\Object;
/**
* @internal
*
* @specification table 90 - Inline image operators
*/
enum InlineImageOperator: string {
case Begin = 'BI';
case BeginImageData = 'ID';
case End = 'EI';
}

View File

@ -0,0 +1,16 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\Object;
/**
* @internal
*
* @specification Table 352 - Marked-content operators
*/
enum MarkedContentOperator: string {
case Tag = 'MD';
case TagProperties = 'DP';
case BeginMarkedContent = 'BMC';
case BeginMarkedContentWithProperties = 'BDC';
case EndMarkedContent = 'EMC';
}

View File

@ -0,0 +1,14 @@
<?php
declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\Object;
/**
* @internal
*
* @specification Table 105 - Text object operators
*/
enum TextObjectOperator: string {
case BEGIN = 'BT';
case END = 'ET';
}

View File

@ -0,0 +1,13 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\State;
/**
* @internal
*
* @specification Table 60 - Clipping path operators
*/
enum ClippingPathOperator: string {
case INTERSECT = 'W';
case INTERSECT_EVEN_ODD = 'W*';
}

View File

@ -0,0 +1,23 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\State;
/**
* @internal
*
* @specification table 73 - Colour operators
*/
enum ColorOperator: string {
case SetName = 'CS';
case SetNameNonStroking = 'cs';
case SetStrokingColor = 'SC';
case SetStrokingParams = 'SCN';
case SetColor = 'sc';
case SetColorParams = 'scn';
case SetStrokingColorSpace = 'G';
case SetColorSpace = 'g';
case SetStrokingColorDeviceRGB = 'RG';
case SetColorDeviceRGB = 'rg';
case SetStrokingColorDeviceCMYK = 'K';
case SetColorDeviceCMYK = 'k';
}

View File

@ -0,0 +1,55 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\State;
use Override;
use PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\State\Interaction\InteractsWithTransformationMatrix;
use PrinsFrank\PdfParser\Document\ContentStream\PositionedText\TransformationMatrix;
use PrinsFrank\PdfParser\Exception\ParseFailureException;
/**
* @internal
*
* @specification table 56 - Graphics state operators
*/
enum GraphicsStateOperator: string implements InteractsWithTransformationMatrix {
case SaveCurrentStateToStack = 'q';
case RestoreMostRecentStateFromStack = 'Q';
case ModifyCurrentTransformationMatrix = 'cm';
case SetLineWidth = 'w';
case SetLineCap = 'J';
case SetLineJoin = 'j';
case SetMiterJoin = 'M';
case SetLineDash = 'd';
case SetIntent = 'ri';
case SetFlatness = 'i';
case SetDictName = 'gs';
/** @throws ParseFailureException */
#[Override]
public function applyToTransformationMatrix(string $operands, TransformationMatrix $transformationMatrix): TransformationMatrix {
if ($this === self::ModifyCurrentTransformationMatrix) {
$operands = preg_replace('/\s+/', ' ', $operands)
?? throw new ParseFailureException('An error occurred while trying to remove duplicate spaces from the operands');
$matrix = explode(' ', trim($operands));
if (count($matrix) !== 6) {
throw new ParseFailureException(sprintf('Expected 6 values for matrix transformation, got %d: "%s"', count($matrix), $operands));
}
return $transformationMatrix
->multiplyWith(
new TransformationMatrix(
(float) $matrix[0],
(float) $matrix[1],
(float) $matrix[2],
(float) $matrix[3],
(float) $matrix[4],
(float) $matrix[5],
)
);
}
return $transformationMatrix;
}
}

View File

@ -0,0 +1,9 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\State\Interaction;
use PrinsFrank\PdfParser\Document\ContentStream\PositionedText\TextState;
interface InteractsWithTextState {
public function applyToTextState(string $operands, ?TextState $textState): ?TextState;
}

View File

@ -0,0 +1,9 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\State\Interaction;
use PrinsFrank\PdfParser\Document\ContentStream\PositionedText\TransformationMatrix;
interface InteractsWithTransformationMatrix {
public function applyToTransformationMatrix(string $operands, TransformationMatrix $transformationMatrix): TransformationMatrix;
}

View File

@ -0,0 +1,11 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\State\Interaction;
use PrinsFrank\PdfParser\Document\ContentStream\PositionedText\PositionedTextElement;
use PrinsFrank\PdfParser\Document\ContentStream\PositionedText\TransformationMatrix;
use PrinsFrank\PdfParser\Document\ContentStream\PositionedText\TextState;
interface ProducesPositionedTextElements {
public function getPositionedTextElement(string $operands, TransformationMatrix $textMatrix, TransformationMatrix $globalTransformationMatrix, TextState $textState): PositionedTextElement;
}

View File

@ -0,0 +1,18 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\State;
/**
* @internal
*
* @specification Table 58 - Path construction operators
*/
enum PathConstructionOperator: string {
case MOVE = 'm';
case LINE = 'l';
case CURVE_BEZIER_123 = 'c';
case CURVE_BEZIER_23 = 'v';
case CURVE_BEZIER_13 = 'y';
case CLOSE = 'h';
case RECTANGLE = 're';
}

View File

@ -0,0 +1,22 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\State;
/**
* @internal
*
* @specification Table 59 - Path-painting operators
*/
enum PathPaintingOperator: string {
case STROKE = 'S';
case CLOSE_STROKE = 's';
case FILL = 'f';
/** Identical to FILL */
case FILL_DEPRECATED = 'F';
case FILL_EVEN_ODD = 'f*';
case FILL_STROKE = 'B';
case FILL_STROKE_EVEN_ODD = 'B*';
case CLOSE_FILL_STROKE = 'b*';
case END = 'n';
}

View File

@ -0,0 +1,76 @@
<?php
declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\State;
use Override;
use PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\State\Interaction\InteractsWithTransformationMatrix;
use PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\State\Interaction\InteractsWithTextState;
use PrinsFrank\PdfParser\Document\ContentStream\PositionedText\TransformationMatrix;
use PrinsFrank\PdfParser\Document\ContentStream\PositionedText\TextState;
use PrinsFrank\PdfParser\Exception\ParseFailureException;
use PrinsFrank\PdfParser\Exception\RuntimeException;
/** @internal */
enum TextPositioningOperator: string implements InteractsWithTransformationMatrix, InteractsWithTextState {
case MOVE_OFFSET = 'Td';
case MOVE_OFFSET_LEADING = 'TD';
case SET_MATRIX = 'Tm';
case NEXT_LINE = 'T*';
/** @throws ParseFailureException */
#[Override]
public function applyToTransformationMatrix(string $operands, TransformationMatrix $transformationMatrix): TransformationMatrix {
$operands = preg_replace('/\s+/', ' ', $operands) ?? throw new RuntimeException();
if ($this === self::MOVE_OFFSET || $this === self::MOVE_OFFSET_LEADING) {
$offsets = explode(' ', trim($operands));
if (count($offsets) !== 2) {
throw new ParseFailureException();
}
return new TransformationMatrix(
$transformationMatrix->scaleX,
$transformationMatrix->shearX,
$transformationMatrix->shearY,
$transformationMatrix->scaleY,
$transformationMatrix->offsetX + (float) $offsets[0],
$transformationMatrix->offsetY + (float) $offsets[1]
);
}
if ($this === self::SET_MATRIX) {
$matrix = explode(' ', trim($operands));
if (count($matrix) !== 6) {
throw new ParseFailureException();
}
return new TransformationMatrix((float) $matrix[0], (float) $matrix[1], (float) $matrix[2], (float) $matrix[3], (float) $matrix[4], (float) $matrix[5]);
}
return $transformationMatrix;
}
/** @throws ParseFailureException */
#[Override]
public function applyToTextState(string $operands, ?TextState $textState): ?TextState {
if ($this === self::MOVE_OFFSET_LEADING) {
$offsets = explode(' ', trim($operands));
if (count($offsets) !== 2) {
throw new ParseFailureException();
}
return new TextState(
$textState->fontName ?? null,
$textState->fontSize ?? null,
$textState->charSpace ?? 0,
$textState->wordSpace ?? 0,
$textState->scale ?? 100,
-1 * (float) $offsets[1],
$textState->render ?? 0,
$textState->rise ?? 0,
);
}
return $textState;
}
}

View File

@ -0,0 +1,53 @@
<?php
declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\State;
use Override;
use PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\State\Interaction\InteractsWithTextState;
use PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\State\Interaction\ProducesPositionedTextElements;
use PrinsFrank\PdfParser\Document\ContentStream\PositionedText\PositionedTextElement;
use PrinsFrank\PdfParser\Document\ContentStream\PositionedText\TransformationMatrix;
use PrinsFrank\PdfParser\Document\ContentStream\PositionedText\TextState;
use PrinsFrank\PdfParser\Exception\ParseFailureException;
/** @internal */
enum TextShowingOperator: string implements InteractsWithTextState, ProducesPositionedTextElements {
case SHOW = 'Tj';
case MOVE_SHOW = '\'';
case MOVE_SHOW_SPACING = '"';
case SHOW_ARRAY = 'TJ';
/** @throws ParseFailureException */
#[Override]
public function applyToTextState(string $operands, ?TextState $textState): ?TextState {
if ($this === self::MOVE_SHOW_SPACING) {
$spacing = explode(' ', trim($operands));
if (count($spacing) !== 2) {
throw new ParseFailureException();
}
return new TextState(
$textState->fontName ?? null,
$textState->fontSize ?? null,
(float) $spacing[1],
(float) $spacing[0],
$textState->scale ?? 100,
$textState->leading ?? 0,
$textState->render ?? 0,
$textState->rise ?? 0,
);
}
return $textState;
}
#[Override]
public function getPositionedTextElement(string $operands, TransformationMatrix $textMatrix, TransformationMatrix $globalTransformationMatrix, TextState $textState): PositionedTextElement {
return new PositionedTextElement(
$operands,
$globalTransformationMatrix->multiplyWith($textMatrix),
$textState
);
}
}

View File

@ -0,0 +1,124 @@
<?php
declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\State;
use Override;
use PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\State\Interaction\InteractsWithTextState;
use PrinsFrank\PdfParser\Document\ContentStream\PositionedText\TextState;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryKey\DictionaryKey;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryKey\ExtendedDictionaryKey;
use PrinsFrank\PdfParser\Exception\InvalidArgumentException;
use PrinsFrank\PdfParser\Exception\ParseFailureException;
/** @internal */
enum TextStateOperator: string implements InteractsWithTextState {
case CHAR_SPACE = 'Tc';
case WORD_SPACE = 'Tw';
case SCALE = 'Tz';
case LEADING = 'TL';
case FONT_SIZE = 'Tf';
case RENDER = 'Tr';
case RISE = 'Ts';
/** @throws ParseFailureException|InvalidArgumentException */
#[Override]
public function applyToTextState(string $operands, ?TextState $textState): TextState {
if ($this === self::CHAR_SPACE) {
return new TextState(
$textState->fontName ?? null,
$textState->fontSize ?? null,
(float) $operands,
$textState->wordSpace ?? 0,
$textState->scale ?? 100,
$textState->leading ?? 0,
$textState->render ?? 0,
$textState->rise ?? 0,
);
}
if ($this === self::WORD_SPACE) {
return new TextState(
$textState->fontName ?? null,
$textState->fontSize ?? null,
$textState->charSpace ?? 0,
(float) $operands,
$textState->scale ?? 100,
$textState->leading ?? 0,
$textState->render ?? 0,
$textState->rise ?? 0,
);
}
if ($this === self::SCALE) {
if (trim($operands) !== (string)($scale = (int) $operands) && trim($operands) !== (string)($scale = (float) $operands)) {
throw new ParseFailureException(sprintf('Invalid scale operand "%s" for scale operator', $operands));
}
return new TextState(
$textState->fontName ?? null,
$textState->fontSize ?? null,
$textState->charSpace ?? 0,
$textState->wordSpace ?? 0,
$scale,
$textState->leading ?? 0,
$textState->render ?? 0,
$textState->rise ?? 0,
);
}
if ($this === self::LEADING) {
return new TextState(
$textState->fontName ?? null,
$textState->fontSize ?? null,
$textState->charSpace ?? 0,
$textState->wordSpace ?? 0,
$textState->scale ?? 100,
(float) $operands,
$textState->render ?? 0,
$textState->rise ?? 0,
);
}
if ($this === self::FONT_SIZE) {
if (preg_match('/^\/(?<fontReference>[A-Za-z_0-9\.\-\+]+)\s+(?<FontSize>-?[0-9]+(\.[0-9]+)?)$/', $operands, $matches) !== 1) {
throw new InvalidArgumentException(sprintf('Invalid font operand "%s" for Tf operator', substr($operands, 0, 200)));
}
return new TextState(
DictionaryKey::tryFrom($matches['fontReference']) ?? new ExtendedDictionaryKey($matches['fontReference']),
(float) $matches['FontSize'],
$textState->charSpace ?? 0,
$textState->wordSpace ?? 0,
$textState->scale ?? 100,
$textState->leading ?? 0,
$textState->render ?? 0,
$textState->rise ?? 0,
);
}
if ($this === self::RENDER) {
return new TextState(
$textState->fontName ?? null,
$textState->fontSize ?? null,
$textState->charSpace ?? 0,
$textState->wordSpace ?? 0,
$textState->scale ?? 100,
$textState->leading ?? 0,
(int) $operands,
$textState->rise ?? 0,
);
}
return new TextState(
$textState->fontName ?? null,
$textState->fontSize ?? null,
$textState->charSpace ?? 0,
$textState->wordSpace ?? 0,
$textState->scale ?? 100,
$textState->leading ?? 0,
$textState->render ?? 0,
(float) $operands,
);
}
}

View File

@ -0,0 +1,13 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\State;
/**
* @internal
*
* @specification table 111 - Type 3 font operators
*/
enum Type3FontOperator: string {
case SetWidth = 'd0';
case SetWidthAndBoundingBox = 'd1';
}

View File

@ -0,0 +1,12 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\State;
/**
* @internal
*
* @specification table 86 - XObject operator
*/
enum XObjectOperator: string {
case Paint = 'Do';
}

View File

@ -0,0 +1,101 @@
<?php
declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\ContentStream;
use PrinsFrank\PdfParser\Document\ContentStream\Command\ContentStreamCommand;
use PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\State\GraphicsStateOperator;
use PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\State\Interaction\InteractsWithTransformationMatrix;
use PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\State\Interaction\InteractsWithTextState;
use PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\State\Interaction\ProducesPositionedTextElements;
use PrinsFrank\PdfParser\Document\ContentStream\Object\TextObject;
use PrinsFrank\PdfParser\Document\ContentStream\PositionedText\PositionedTextElement;
use PrinsFrank\PdfParser\Document\ContentStream\PositionedText\TransformationMatrix;
use PrinsFrank\PdfParser\Document\Document;
use PrinsFrank\PdfParser\Document\Object\Decorator\Page;
use PrinsFrank\PdfParser\Exception\ParseFailureException;
use PrinsFrank\PdfParser\Exception\PdfParserException;
/** @api */
class ContentStream {
/** @var list<TextObject|ContentStreamCommand> */
public readonly array $content;
/** @no-named-arguments */
public function __construct(
TextObject|ContentStreamCommand... $content
) {
$this->content = $content;
}
/** @return list<PositionedTextElement> */
public function getPositionedTextElements(): array {
$positionedTextElements = $transformationStateStack = [];
$textState = null; // See table 103, Tf operator for initial value
$transformationMatrix = new TransformationMatrix(1, 0, 0, 1, 0, 0); // Identity matrix
foreach ($this->content as $content) {
if ($content instanceof ContentStreamCommand) {
if ($content->operator instanceof InteractsWithTextState) {
$textState = $content->operator->applyToTextState($content->operands, $textState);
} elseif ($content->operator === GraphicsStateOperator::SaveCurrentStateToStack) {
$transformationStateStack[] = clone $transformationMatrix;
} elseif ($content->operator === GraphicsStateOperator::RestoreMostRecentStateFromStack) {
$transformationMatrix = array_pop($transformationStateStack)
?? throw new ParseFailureException();
} elseif ($content->operator instanceof InteractsWithTransformationMatrix) {
$transformationMatrix = $content->operator->applyToTransformationMatrix($content->operands, $transformationMatrix);
}
continue;
}
$textMatrix = new TransformationMatrix(1, 0, 0, 1, 0, 0); // Identity matrix, See Table 106, Tm operator for initial value in text object
foreach ($content->contentStreamCommands as $contentStreamCommand) {
if ($contentStreamCommand->operator instanceof InteractsWithTextState) {
$textState = $contentStreamCommand->operator->applyToTextState($contentStreamCommand->operands, $textState);
}
if ($contentStreamCommand->operator instanceof InteractsWithTransformationMatrix) {
$textMatrix = $contentStreamCommand->operator->applyToTransformationMatrix($contentStreamCommand->operands, $textMatrix);
}
if ($contentStreamCommand->operator instanceof ProducesPositionedTextElements && $textState !== null) {
$positionedTextElements[] = $contentStreamCommand->operator->getPositionedTextElement($contentStreamCommand->operands, $textMatrix, $transformationMatrix, $textState);
}
}
}
usort(
$positionedTextElements,
static function (PositionedTextElement $a, PositionedTextElement $b): int {
if (($differenceY = $b->absoluteMatrix->offsetY <=> $a->absoluteMatrix->offsetY) !== 0) {
return $differenceY;
}
return $a->absoluteMatrix->offsetX <=> $b->absoluteMatrix->offsetX;
}
);
return $positionedTextElements;
}
/** @throws PdfParserException */
public function getText(Document $document, Page $page): string {
$text = '';
$previousPositionedTextElement = null;
foreach ($this->getPositionedTextElements() as $positionedTextElement) {
if ($previousPositionedTextElement !== null) {
if ($previousPositionedTextElement->absoluteMatrix->offsetY !== $positionedTextElement->absoluteMatrix->offsetY) {
$text .= "\n";
} elseif (($positionedTextElement->absoluteMatrix->offsetX - $previousPositionedTextElement->absoluteMatrix->offsetX - $positionedTextElement->getFont($document, $page)->getWidthForChars($previousPositionedTextElement->getCodePoints(), $previousPositionedTextElement->textState, $previousPositionedTextElement->absoluteMatrix)) >= ($previousPositionedTextElement->textState->fontSize ?? 10) * $previousPositionedTextElement->absoluteMatrix->scaleX * 0.40) {
$text .= ' ';
}
}
$text .= $positionedTextElement->getText($document, $page);
$previousPositionedTextElement = $positionedTextElement;
}
return $text;
}
}

View File

@ -0,0 +1,217 @@
<?php
declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\ContentStream;
use PrinsFrank\PdfParser\Document\ContentStream\Command\ContentStreamCommand;
use PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\Object\CompatibilityOperator;
use PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\Object\InlineImageOperator;
use PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\Object\MarkedContentOperator;
use PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\Object\TextObjectOperator;
use PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\State\ClippingPathOperator;
use PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\State\ColorOperator;
use PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\State\GraphicsStateOperator;
use PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\State\PathConstructionOperator;
use PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\State\PathPaintingOperator;
use PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\State\TextPositioningOperator;
use PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\State\TextShowingOperator;
use PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\State\TextStateOperator;
use PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\State\Type3FontOperator;
use PrinsFrank\PdfParser\Document\ContentStream\Command\Operator\State\XObjectOperator;
use PrinsFrank\PdfParser\Document\ContentStream\Object\TextObject;
use PrinsFrank\PdfParser\Document\Object\Decorator\DecoratedObject;
use PrinsFrank\PdfParser\Exception\ParseFailureException;
/** @internal */
class ContentStreamParser {
/**
* @param list<DecoratedObject> $contentsObjects
* @throws ParseFailureException
*/
public static function parse(array $contentsObjects): ContentStream {
$content = [];
$inStringLiteral = $inResourceName = $inDictionary = false;
$inArrayLevel = $inStringLevel = 0;
$textObject = $previousChar = $secondToLastChar = $thirdToLastChar = $previousContentStream = $startPreviousOperandIndex = null;
foreach ($contentsObjects as $contentsObject) {
$startCurrentOperandIndex = 0;
$contentStream = $contentsObject->getStream();
$contentStreamSize = $contentStream->getSizeInBytes();
for ($index = 0; $index < $contentStreamSize; $index++) {
$char = $contentStream->read($index, 1);
if ($inStringLiteral === true) {
if ($char === ')' && $previousChar !== '\\') {
$inStringLiteral = false;
}
} elseif ($inResourceName === true) {
if (in_array($char, [' ', '<', '(', '/', "\r", "\n"], true) && $previousChar !== '\\') {
$inResourceName = false;
}
} elseif ($inDictionary === true) {
if ($char === '>' && $previousChar === '>' && $secondToLastChar !== '\\') {
$inDictionary = false;
}
} elseif ($char === '[' && $previousChar !== '\\') {
$inArrayLevel++;
} elseif ($char === '<' && $previousChar === '<' && $secondToLastChar !== '\\') {
$inDictionary = true;
} elseif ($char === '<' && $previousChar !== '\\' && $contentStream->read($index + 1, 1) !== '<') {
$inStringLevel++;
} elseif ($char === '(' && $previousChar !== '\\') {
$inStringLiteral = true;
} elseif ($char === '/' && $previousChar !== '\\') {
$inResourceName = true;
} elseif ($inStringLevel > 0 || $inArrayLevel > 0) {
if ($inStringLevel > 0 && $char === '>' && $previousChar !== '\\') {
$inStringLevel--;
} elseif ($inArrayLevel > 0 && $char === ']' && $previousChar !== '\\') {
$inArrayLevel--;
}
} elseif ($char === 'T' && $previousChar === 'B') { // TextObjectOperator::BEGIN
$startCurrentOperandIndex = $index + 1;
$textObject = new TextObject();
} elseif ($char === 'T' && $previousChar === 'E') { // TextObjectOperator::END
$startCurrentOperandIndex = $index + 1;
if ($textObject === null) {
throw new ParseFailureException('Encountered TextObjectOperator::END without preceding TextObjectOperator::BEGIN');
}
$content[] = $textObject;
$textObject = null;
} elseif ($char === 'C'
&& (($secondToLastChar === 'B' && ($previousChar === 'M' || $previousChar === 'D')) || ($secondToLastChar === 'E' && $previousChar === 'M'))) { // MarkedContentOperator::BeginMarkedContent, MarkedContentOperator::EndMarkedContent, MarkedContentOperator::BeginMarkedContentWithProperties
$startCurrentOperandIndex = $index + 1;
} elseif (($operator = self::getOperator($char, $previousChar, $secondToLastChar, $thirdToLastChar)) !== null
&& (($nextChar = $contentStream->read($index + 1, 1)) === '' || self::getOperator($nextChar, $char, $previousChar, $secondToLastChar) === null)) { // Skip the current hit if the next iteration is also a valid operator
$operands = '';
if ($previousContentStream !== null && $startPreviousOperandIndex !== null && $startPreviousOperandIndex < $previousContentStream->getSizeInBytes()) {
$operands .= $previousContentStream->read($startPreviousOperandIndex, $previousContentStream->getSizeInBytes() - $startPreviousOperandIndex);
$startPreviousOperandIndex = null;
}
if (($operandLength = $index + 1 - $startCurrentOperandIndex - strlen($operator->value)) > 0) {
$operands .= $contentStream->read($startCurrentOperandIndex, $operandLength);
}
$command = new ContentStreamCommand($operator, trim($operands));
if ($textObject !== null) {
$textObject->addContentStreamCommand($command);
} else {
$content[] = $command;
}
$startCurrentOperandIndex = $index + 1;
}
$thirdToLastChar = $secondToLastChar;
$secondToLastChar = $previousChar;
$previousChar = $char;
}
$previousContentStream = $contentStream;
$startPreviousOperandIndex = $startCurrentOperandIndex;
}
return new ContentStream(...$content);
}
/**
* This method uses three maps instead of calling $enum::tryFrom for all possible enums
* as operator retrieval happens possibly millions of times in a single file
*/
public static function getOperator(string $currentChar, ?string $previousChar, ?string $secondToLastChar, ?string $thirdToLastChar): CompatibilityOperator|InlineImageOperator|MarkedContentOperator|TextObjectOperator|ClippingPathOperator|ColorOperator|GraphicsStateOperator|PathConstructionOperator|PathPaintingOperator|TextPositioningOperator|TextShowingOperator|TextStateOperator|Type3FontOperator|XObjectOperator|null {
$threeLetterMatch = match ($secondToLastChar . $previousChar . $currentChar) {
'BMC' => MarkedContentOperator::BeginMarkedContent,
'BDC' => MarkedContentOperator::BeginMarkedContentWithProperties,
'EMC' => MarkedContentOperator::EndMarkedContent,
'SCN' => ColorOperator::SetStrokingParams,
'scn' => ColorOperator::SetColorParams,
default => null,
};
if ($threeLetterMatch !== null) {
return in_array($thirdToLastChar, ['\\', '/'], true) ? null : $threeLetterMatch;
}
$twoLetterMatch = match ($previousChar . $currentChar) {
'BX' => CompatibilityOperator::BeginCompatibilitySection,
'EX' => CompatibilityOperator::EndCompatibilitySection,
'BI' => InlineImageOperator::Begin,
'ID' => InlineImageOperator::BeginImageData,
'EI' => InlineImageOperator::End,
'MD' => MarkedContentOperator::Tag,
'DP' => MarkedContentOperator::TagProperties,
'BT' => TextObjectOperator::BEGIN,
'ET' => TextObjectOperator::END,
'W*' => ClippingPathOperator::INTERSECT_EVEN_ODD,
'CS' => ColorOperator::SetName,
'cs' => ColorOperator::SetNameNonStroking,
'SC' => ColorOperator::SetStrokingColor,
'sc' => ColorOperator::SetColor,
'RG' => ColorOperator::SetStrokingColorDeviceRGB,
'rg' => ColorOperator::SetColorDeviceRGB,
'cm' => GraphicsStateOperator::ModifyCurrentTransformationMatrix,
'ri' => GraphicsStateOperator::SetIntent,
'gs' => GraphicsStateOperator::SetDictName,
're' => PathConstructionOperator::RECTANGLE,
'f*' => PathPaintingOperator::FILL_EVEN_ODD,
'B*' => PathPaintingOperator::FILL_STROKE_EVEN_ODD,
'b*' => PathPaintingOperator::CLOSE_FILL_STROKE,
'Td' => TextPositioningOperator::MOVE_OFFSET,
'TD' => TextPositioningOperator::MOVE_OFFSET_LEADING,
'Tm' => TextPositioningOperator::SET_MATRIX,
'T*' => TextPositioningOperator::NEXT_LINE,
'Tj' => TextShowingOperator::SHOW,
'TJ' => TextShowingOperator::SHOW_ARRAY,
'Tc' => TextStateOperator::CHAR_SPACE,
'Tw' => TextStateOperator::WORD_SPACE,
'Tz' => TextStateOperator::SCALE,
'TL' => TextStateOperator::LEADING,
'Tf' => TextStateOperator::FONT_SIZE,
'Tr' => TextStateOperator::RENDER,
'Ts' => TextStateOperator::RISE,
'd0' => Type3FontOperator::SetWidth,
'd1' => Type3FontOperator::SetWidthAndBoundingBox,
'Do' => XObjectOperator::Paint,
default => null,
};
if ($twoLetterMatch !== null) {
return in_array($secondToLastChar, ['\\', '/'], true) ? null : $twoLetterMatch;
}
$oneLetterMatch = match ($currentChar) {
'W' => ClippingPathOperator::INTERSECT,
'G' => ColorOperator::SetStrokingColorSpace,
'g' => ColorOperator::SetColorSpace,
'K' => ColorOperator::SetStrokingColorDeviceCMYK,
'k' => ColorOperator::SetColorDeviceCMYK,
'q' => GraphicsStateOperator::SaveCurrentStateToStack,
'Q' => GraphicsStateOperator::RestoreMostRecentStateFromStack,
'w' => GraphicsStateOperator::SetLineWidth,
'J' => GraphicsStateOperator::SetLineCap,
'j' => GraphicsStateOperator::SetLineJoin,
'M' => GraphicsStateOperator::SetMiterJoin,
'd' => GraphicsStateOperator::SetLineDash,
'i' => GraphicsStateOperator::SetFlatness,
'm' => PathConstructionOperator::MOVE,
'l' => PathConstructionOperator::LINE,
'c' => PathConstructionOperator::CURVE_BEZIER_123,
'v' => PathConstructionOperator::CURVE_BEZIER_23,
'y' => PathConstructionOperator::CURVE_BEZIER_13,
'h' => PathConstructionOperator::CLOSE,
'S' => PathPaintingOperator::STROKE,
's' => PathPaintingOperator::CLOSE_STROKE,
'f' => PathPaintingOperator::FILL,
'F' => PathPaintingOperator::FILL_DEPRECATED,
'B' => PathPaintingOperator::FILL_STROKE,
'n' => PathPaintingOperator::END,
'\'' => TextShowingOperator::MOVE_SHOW,
'"' => TextShowingOperator::MOVE_SHOW_SPACING,
default => null,
};
if ($oneLetterMatch !== null) {
return in_array($previousChar, ['\\', '/'], true) ? null : $oneLetterMatch;
}
return null;
}
}

View File

@ -0,0 +1,22 @@
<?php
declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\ContentStream\Object;
use PrinsFrank\PdfParser\Document\ContentStream\Command\ContentStreamCommand;
/** @internal */
class TextObject {
/** @var list<ContentStreamCommand> */
public array $contentStreamCommands = [];
public function addContentStreamCommand(ContentStreamCommand $textOperator): self {
$this->contentStreamCommands[] = $textOperator;
return $this;
}
public function isEmpty(): bool {
return $this->contentStreamCommands === [];
}
}

View File

@ -0,0 +1,106 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\ContentStream\PositionedText;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Name\EncodingNameValue;
use PrinsFrank\PdfParser\Document\Document;
use PrinsFrank\PdfParser\Document\Generic\Character\LiteralStringEscapeCharacter;
use PrinsFrank\PdfParser\Document\Object\Decorator\Font;
use PrinsFrank\PdfParser\Document\Object\Decorator\Page;
use PrinsFrank\PdfParser\Exception\ParseFailureException;
class PositionedTextElement {
public function __construct(
public readonly string $rawTextContent,
public readonly TransformationMatrix $absoluteMatrix,
public readonly TextState $textState,
) {
}
public function getFont(Document $document, Page $page): Font {
if ($this->textState->fontName === null) {
throw new ParseFailureException('Unable to locate font for text element');
}
return $page->getFontDictionary()?->getObjectForReference($document, $this->textState->fontName, Font::class)
?? throw new ParseFailureException(sprintf('Unable to locate font with reference "/%s"', $this->textState->fontName->value));
}
/** @throws ParseFailureException */
public function getText(Document $document, Page $page): string {
if (($result = preg_match_all('/(?<chars>(<(\\\\>|[^>])*>)|(\((\\\\\)|[^)])*\)))(?<offset>-?[0-9]+(\.[0-9]+)?)?/', $this->rawTextContent, $matches, PREG_SET_ORDER)) === false) {
throw new ParseFailureException(sprintf('Error with regex'));
} elseif ($result === 0) {
throw new ParseFailureException(sprintf('Operands "%s" is not in a recognized format', $this->rawTextContent));
}
$string = '';
$font = $this->getFont($document, $page);
foreach ($matches as $match) {
if (str_starts_with($match['chars'], '(') && str_ends_with($match['chars'], ')')) {
$unescapedChars = LiteralStringEscapeCharacter::unescapeCharacters(substr($match['chars'], 1, -1));
if (preg_match('/^\\\\\d{3}$/', substr($match['chars'], 1, -1)) === 1 && ($glyph = $font->getDifferences()?->getGlyph((int) octdec(substr($match['chars'], 2, -1)))) !== null) {
$chars = $glyph->getChar();
} elseif (strlen($unescapedChars) === 1 && ($glyph = $font->getDifferences()?->getGlyph(ord($unescapedChars))) !== null) {
$chars = $glyph->getChar();
} elseif (in_array($encoding = $font->getEncoding(), [EncodingNameValue::MacExpertEncoding, EncodingNameValue::WinAnsiEncoding], true)) {
$chars = $encoding->decodeString($unescapedChars);
} elseif (($toUnicodeCMap = $font->getToUnicodeCMap() ?? $font->getToUnicodeCMapDescendantFont()) !== null) {
$chars = $toUnicodeCMap->textToUnicode(bin2hex($unescapedChars));
} elseif ($encoding !== null) {
$chars = $encoding->decodeString($unescapedChars);
} else {
$chars = $unescapedChars;
}
$string .= $chars;
} elseif (str_starts_with($match['chars'], '<') && str_ends_with($match['chars'], '>')) {
$chars = substr($match['chars'], 1, -1);
if (($toUnicodeCMap = $font->getToUnicodeCMap() ?? $font->getToUnicodeCMapDescendantFont()) !== null) {
$string .= $toUnicodeCMap->textToUnicode($chars);
} elseif (($encoding = $font->getEncoding()) !== null) {
$string .= $encoding->decodeString(implode('', array_map(fn (string $character) => mb_chr((int) hexdec($character)), str_split($chars, 2))));
} else {
throw new ParseFailureException('Unable to use CMap or decode string to retrieve characters for text object');
}
} else {
throw new ParseFailureException(sprintf('Unrecognized character group format "%s"', $match['chars']));
}
if (isset($match['offset']) && (float) $match['offset'] < -100) {
$string .= ' ';
}
}
return $string;
}
/** @return list<int> */
public function getCodePoints(): array {
$codePoints = [];
if (($result = preg_match_all('/(?<chars>(<(\\\\>|[^>])*>)|(\((\\\\\)|[^)])*\)))(?<offset>-?[0-9]+(\.[0-9]+)?)?/', $this->rawTextContent, $matches, PREG_SET_ORDER)) === false) {
throw new ParseFailureException(sprintf('Error with regex'));
} elseif ($result === 0) {
throw new ParseFailureException(sprintf('Operands "%s" is not in a recognized format', $this->rawTextContent));
}
foreach ($matches as $match) {
if (str_starts_with($match['chars'], '(') && str_ends_with($match['chars'], ')')) {
$chars = str_replace(['\(', '\)', '\n', '\r'], ['(', ')', "\n", "\r"], substr($match['chars'], 1, -1));
$chars = preg_replace_callback('/\\\\([0-7]{3})/', fn (array $matches) => mb_chr((int) octdec($matches[1])), $chars)
?? throw new ParseFailureException();
foreach (str_split($chars) as $char) {
$codePoints[] = ord($char);
}
} elseif (str_starts_with($match['chars'], '<') && str_ends_with($match['chars'], '>')) {
foreach (str_split(substr($match['chars'], 1, -1), 4) as $char) {
$codePoints[] = is_int($codePoint = hexdec($char)) ? $codePoint : throw new ParseFailureException();
}
} else {
throw new ParseFailureException(sprintf('Unrecognized character group format "%s"', $match['chars']));
}
}
return $codePoints;
}
}

View File

@ -0,0 +1,20 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\ContentStream\PositionedText;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryKey\DictionaryKey;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryKey\ExtendedDictionaryKey;
class TextState {
public function __construct(
public readonly DictionaryKey|ExtendedDictionaryKey|null $fontName, // Tf
public readonly ?float $fontSize, // Tfs
public float $charSpace = 0, // Tc
public float $wordSpace = 0, // Tw
public float $scale = 100, // Th
public float $leading = 0, // Tl
public int $render = 0, // Tmode
public float $rise = 0, // Trise
) {
}
}

View File

@ -0,0 +1,27 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\ContentStream\PositionedText;
class TransformationMatrix {
public function __construct(
public readonly float $scaleX, // a
public readonly float $shearX, // b
public readonly float $shearY, // c
public readonly float $scaleY, // d
public readonly float $offsetX, // e
public readonly float $offsetY, // f
) {
}
/** Please note that a concatenated transformation matrix of A B !== B A */
public function multiplyWith(self $other): self {
return new self(
$this->scaleX * $other->scaleX + $this->shearX * $other->shearY,
$this->scaleX * $other->shearX + $this->shearX * $other->scaleY,
$this->shearY * $other->scaleX + $this->scaleY * $other->shearY,
$this->shearY * $other->shearX + $this->scaleY * $other->scaleY,
$this->offsetX * $other->scaleX + $this->offsetY * $other->shearY + $other->offsetX,
$this->offsetX * $other->shearX + $this->offsetY * $other->scaleY + $other->offsetY,
);
}
}

View File

@ -0,0 +1,92 @@
<?php
declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\CrossReference;
use PrinsFrank\PdfParser\Document\CrossReference\Source\CrossReferenceSource;
use PrinsFrank\PdfParser\Document\CrossReference\Stream\CrossReferenceStreamParser;
use PrinsFrank\PdfParser\Document\CrossReference\Table\CrossReferenceTableParser;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryKey\DictionaryKey;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Integer\IntegerValue;
use PrinsFrank\PdfParser\Document\Generic\Marker;
use PrinsFrank\PdfParser\Exception\ParseFailureException;
use PrinsFrank\PdfParser\Exception\PdfParserException;
use PrinsFrank\PdfParser\Stream\Stream;
/** @internal */
class CrossReferenceSourceParser {
/** @throws PdfParserException */
public static function parse(Stream $stream): CrossReferenceSource {
$eofMarkerPos = $stream->lastPos(Marker::EOF, 0)
?? throw new ParseFailureException(sprintf('Unable to locate marker %s', Marker::EOF->value));
$startXrefMarkerPos = $stream->lastPos(Marker::START_XREF, $stream->getSizeInBytes() - $eofMarkerPos)
?? throw new ParseFailureException(sprintf('Unable to locate marker %s', Marker::START_XREF->value));
$startByteOffset = $stream->getStartOfNextLine($startXrefMarkerPos, $stream->getSizeInBytes())
?? throw new ParseFailureException('Expected a carriage return or line feed after startxref marker, none found');
$endByteOffset = $stream->getEndOfCurrentLine($startByteOffset, $stream->getSizeInBytes())
?? throw new ParseFailureException('Expected a carriage return or line feed after the byte offset, none found');
$byteOffsetLastCrossReferenceSection = trim($stream->read($startByteOffset, $endByteOffset - $startByteOffset));
if ($byteOffsetLastCrossReferenceSection !== (string)(int) $byteOffsetLastCrossReferenceSection) {
throw new ParseFailureException(sprintf('Invalid byte offset last crossReference section "%s", "%s"', $byteOffsetLastCrossReferenceSection, $stream->read($startXrefMarkerPos, $stream->getSizeInBytes() - $startXrefMarkerPos)));
}
$byteOffsetLastCrossReferenceSection = (int) $byteOffsetLastCrossReferenceSection;
if ($byteOffsetLastCrossReferenceSection > $stream->getSizeInBytes()) {
throw new ParseFailureException(sprintf('Invalid byte offset: position of last crossReference section %d is greater than total size of stream %d. Should this be %d?', $byteOffsetLastCrossReferenceSection, $stream->getSizeInBytes(), $stream->lastPos(Marker::XREF, $stream->getSizeInBytes() - $startXrefMarkerPos) ?? $stream->lastPos(Marker::OBJ, $stream->getSizeInBytes() - $startXrefMarkerPos) ?? 0));
}
$eolPosByteOffset = $stream->getEndOfCurrentLine($byteOffsetLastCrossReferenceSection, $stream->getSizeInBytes())
?? throw new ParseFailureException('Expected a newline after byte offset for last cross reference stream');
$crossReferenceType = self::getCrossReferenceType($stream, $byteOffsetLastCrossReferenceSection, $eolPosByteOffset);
if ($crossReferenceType === null) { // Try to recover from an invalid byte offset crossReference section
$lastPosXrefSection = $stream->lastPos(Marker::XREF, $stream->getSizeInBytes() - $startXrefMarkerPos);
$lastPosObject = $stream->lastPos(Marker::OBJ, $stream->getSizeInBytes() - $startXrefMarkerPos);
if ($lastPosXrefSection === null && $lastPosObject === null) {
throw new ParseFailureException(sprintf('Unable to determine cross reference type for start line "%s" of crossReference source, and no other crossReference table or stream was found.', $stream->read($byteOffsetLastCrossReferenceSection, $eolPosByteOffset - $byteOffsetLastCrossReferenceSection)));
}
$lastPossibleXrefSectionPos = $lastPosObject === null ? $lastPosXrefSection : ($lastPosXrefSection === null ? $lastPosObject : max($lastPosXrefSection, $lastPosObject));
$eolStartXrefSectionPos = $stream->getEndOfCurrentLine($lastPossibleXrefSectionPos, $stream->getSizeInBytes())
?? throw new ParseFailureException(sprintf('Unable to determine cross reference type for start line "%s" of crossReference source, and no other crossReference table or stream was found.', $stream->read($startByteOffset, $endByteOffset - $startByteOffset)));
$crossReferenceType = self::getCrossReferenceType($stream, $lastPossibleXrefSectionPos, $eolStartXrefSectionPos)
?? throw new ParseFailureException(sprintf('Unable to determine cross reference type for start line "%s" of crossReference source, and no other crossReference table or stream was found.', $stream->read($startByteOffset, $endByteOffset - $startByteOffset)));
}
$endCrossReferenceSection = $crossReferenceType === CrossReferenceType::Table
? ($stream->firstPos(Marker::START_XREF, $eolPosByteOffset, $stream->getSizeInBytes()) ?? throw new ParseFailureException(sprintf('Unable to locate marker %s', Marker::START_XREF->value)))
: ($stream->firstPos(Marker::END_OBJ, $eolPosByteOffset, $stream->getSizeInBytes()) ?? throw new ParseFailureException(sprintf('Unable to locate marker %s', Marker::END_OBJ->value)));
$currentCrossReferenceSection = $crossReferenceType === CrossReferenceType::Table
? CrossReferenceTableParser::parse($stream, $eolPosByteOffset, $endCrossReferenceSection - $eolPosByteOffset)
: CrossReferenceStreamParser::parse($stream, $eolPosByteOffset, $endCrossReferenceSection - $eolPosByteOffset);
$crossReferenceSections = [$currentCrossReferenceSection];
while (($previous = $currentCrossReferenceSection->dictionary->getValueForKey(DictionaryKey::PREV, IntegerValue::class)) !== null && $previous->value !== 0) {
$eolPosByteOffset = $stream->getEndOfCurrentLine($previous->value + 1, $stream->getSizeInBytes())
?? throw new ParseFailureException('Expected a newline after byte offset for cross reference stream');
$endCrossReferenceSection = $crossReferenceType === CrossReferenceType::Table
? $stream->firstPos(Marker::START_XREF, $eolPosByteOffset, $stream->getSizeInBytes()) ?? throw new ParseFailureException('Unable to locate startxref')
: $stream->firstPos(Marker::END_OBJ, $eolPosByteOffset, $stream->getSizeInBytes()) ?? throw new ParseFailureException('Unable to locate endobj');
$currentCrossReferenceSection = $crossReferenceType === CrossReferenceType::Table
? CrossReferenceTableParser::parse($stream, $eolPosByteOffset, $endCrossReferenceSection - $eolPosByteOffset)
: CrossReferenceStreamParser::parse($stream, $eolPosByteOffset, $endCrossReferenceSection - $eolPosByteOffset);
$crossReferenceSections[] = $currentCrossReferenceSection;
}
return new CrossReferenceSource(... $crossReferenceSections);
}
private static function getCrossReferenceType(Stream $stream, int $byteOffsetLastCrossReferenceSection, int $byteOffsetEndOfCurrentLine): ?CrossReferenceType {
$startCrossReferenceContent = trim($stream->read($byteOffsetLastCrossReferenceSection, $byteOffsetEndOfCurrentLine - $byteOffsetLastCrossReferenceSection));
if ($startCrossReferenceContent === Marker::XREF->value) {
return CrossReferenceType::Table;
}
if (preg_match('/^[0-9]*\s*[0-9]*\s*obj$/', $startCrossReferenceContent) === 1) {
return CrossReferenceType::Stream;
}
return null;
}
}

View File

@ -0,0 +1,8 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\CrossReference;
enum CrossReferenceType {
case Table;
case Stream;
}

View File

@ -0,0 +1,78 @@
<?php
declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\CrossReference\Source;
use PrinsFrank\PdfParser\Document\CrossReference\Source\Section\CrossReferenceSection;
use PrinsFrank\PdfParser\Document\CrossReference\Source\Section\SubSection\Entry\CrossReferenceEntryCompressed;
use PrinsFrank\PdfParser\Document\CrossReference\Source\Section\SubSection\Entry\CrossReferenceEntryInUseObject;
use PrinsFrank\PdfParser\Document\Dictionary\Dictionary;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryKey\DictionaryKey;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Array\ArrayValue;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\DictionaryValue;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Name\NameValue;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Reference\ReferenceValue;
use PrinsFrank\PdfParser\Exception\ParseFailureException;
/** Can be both from a crossReferenceTable or a crossReferenceStream */
class CrossReferenceSource {
/** @var list<CrossReferenceSection> Where the first is the newest incremental update and the last one is the oldest */
private readonly array $crossReferenceSections;
/** @no-named-arguments */
public function __construct(
CrossReferenceSection... $crossReferenceSections,
) {
$this->crossReferenceSections = $crossReferenceSections;
}
public function getCrossReferenceEntry(int $objNumber): CrossReferenceEntryInUseObject|CrossReferenceEntryCompressed|null {
foreach ($this->crossReferenceSections as $crossReferenceSection) {
$crossReferenceEntry = $crossReferenceSection->getCrossReferenceEntry($objNumber);
if ($crossReferenceEntry !== null) {
return $crossReferenceEntry;
}
}
return null;
}
public function getReferenceForKey(DictionaryKey $dictionaryKey): ?ReferenceValue {
return $this->getValueForKey($dictionaryKey, ReferenceValue::class);
}
/**
* @template T of DictionaryValue|NameValue|Dictionary
* @param class-string<T> $valueType
* @return T
*/
public function getValueForKey(DictionaryKey $dictionaryKey, string $valueType): DictionaryValue|Dictionary|NameValue|null {
foreach ($this->crossReferenceSections as $crossReferenceSection) {
$valueForKey = $crossReferenceSection->dictionary->getValueForKey($dictionaryKey, $valueType);
if ($valueForKey !== null) {
return $valueForKey;
}
}
return null;
}
public function getFirstId(): string {
$value = $this->getValueForKey(DictionaryKey::ID, ArrayValue::class)->value[0]
?? throw new ParseFailureException('Unable to retrieve first id from cross reference source');
if (!is_string($value)) {
throw new ParseFailureException('First id is not a string');
}
if (!str_starts_with($value, '<') || !str_ends_with($value, '>')) {
throw new ParseFailureException('Unsupported first id format, expected "<hex>"');
}
$firstId = hex2bin(substr($value, 1, -1));
if ($firstId === false) {
throw new ParseFailureException('Unable to retrieve binary value from first id');
}
return $firstId;
}
}

View File

@ -0,0 +1,32 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\CrossReference\Source\Section;
use PrinsFrank\PdfParser\Document\CrossReference\Source\Section\SubSection\CrossReferenceSubSection;
use PrinsFrank\PdfParser\Document\CrossReference\Source\Section\SubSection\Entry\CrossReferenceEntryCompressed;
use PrinsFrank\PdfParser\Document\CrossReference\Source\Section\SubSection\Entry\CrossReferenceEntryInUseObject;
use PrinsFrank\PdfParser\Document\Dictionary\Dictionary;
/** There are multiple crossReference sections if there are incremental updates. See 7.5.6 */
class CrossReferenceSection {
/** @var list<CrossReferenceSubSection> */
public readonly array $crossReferenceSubSections;
/** @no-named-arguments */
public function __construct(
public readonly Dictionary $dictionary,
CrossReferenceSubSection... $crossReferenceSubSections,
) {
$this->crossReferenceSubSections = $crossReferenceSubSections;
}
public function getCrossReferenceEntry(int $objNumber): CrossReferenceEntryInUseObject|CrossReferenceEntryCompressed|null {
foreach ($this->crossReferenceSubSections as $crossReferenceSubSection) {
if ($crossReferenceSubSection->containsObject($objNumber)) {
return $crossReferenceSubSection->getCrossReferenceEntry($objNumber);
}
}
return null;
}
}

View File

@ -0,0 +1,54 @@
<?php
declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\CrossReference\Source\Section\SubSection;
use PrinsFrank\PdfParser\Document\CrossReference\Source\Section\SubSection\Entry\CrossReferenceEntryCompressed;
use PrinsFrank\PdfParser\Document\CrossReference\Source\Section\SubSection\Entry\CrossReferenceEntryFreeObject;
use PrinsFrank\PdfParser\Document\CrossReference\Source\Section\SubSection\Entry\CrossReferenceEntryInUseObject;
use PrinsFrank\PdfParser\Exception\InvalidArgumentException;
use PrinsFrank\PdfParser\Exception\RuntimeException;
class CrossReferenceSubSection {
/** @var array<CrossReferenceEntryInUseObject|CrossReferenceEntryFreeObject|CrossReferenceEntryCompressed> */
public array $crossReferenceEntries = [];
/**
* @phpstan-assert int<0, max> $nrOfEntries
*
* @throws InvalidArgumentException
*
* @no-named-arguments
*/
public function __construct(
public readonly int $firstObjectNumber,
public readonly int $nrOfEntries,
CrossReferenceEntryInUseObject|CrossReferenceEntryFreeObject|CrossReferenceEntryCompressed... $crossReferenceEntries
) {
if ($this->nrOfEntries < 0) {
throw new InvalidArgumentException('$nrOfEntries should be a positive number');
}
$this->crossReferenceEntries = $crossReferenceEntries;
}
public function containsObject(int $objNumber): bool {
return $objNumber >= $this->firstObjectNumber
&& $objNumber < $this->firstObjectNumber + $this->nrOfEntries;
}
/** @throws RuntimeException */
public function getCrossReferenceEntry(int $objNumber): CrossReferenceEntryInUseObject|CrossReferenceEntryCompressed|null {
if (self::containsObject($objNumber) === false) {
return null;
}
$object = $this->crossReferenceEntries[$objNumber - $this->firstObjectNumber]
?? throw new RuntimeException(sprintf('Object with key %d not found', $objNumber - $this->firstObjectNumber));
if ($object instanceof CrossReferenceEntryFreeObject) {
throw new RuntimeException('Cross reference entry for object should point to either a compressed or uncompressed entry, not a free object nr');
}
return $object;
}
}

View File

@ -0,0 +1,21 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\CrossReference\Source\Section\SubSection\Entry;
/** 7.5.8, Table 18, only present in crossReferenceStreams */
class CrossReferenceEntryCompressed {
/**
* @see Table 18
*
* The object number of the object stream in which this object is
* stored. (The generation number of the object stream shall be
* implicitly 0.)
*/
final public const GENERATION_NUMBER = 0;
public function __construct(
public readonly int $storedInStreamWithObjectNumber,
public readonly int $indexOfThisObjectWithinObjectStream,
) {
}
}

View File

@ -0,0 +1,13 @@
<?php
declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\CrossReference\Source\Section\SubSection\Entry;
/** Present in both crossReferenceTable and crossReferenceStream */
class CrossReferenceEntryFreeObject {
public function __construct(
public readonly int $objectNumberNextFreeObject,
public readonly int $generationNumber,
) {
}
}

View File

@ -0,0 +1,13 @@
<?php
declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\CrossReference\Source\Section\SubSection\Entry;
/** Present in both crossReferenceTable and crossReferenceStream */
class CrossReferenceEntryInUseObject {
public function __construct(
public readonly int $byteOffsetInDecodedStream,
public readonly int $generationNumber,
) {
}
}

View File

@ -0,0 +1,83 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\CrossReference\Stream;
use PrinsFrank\PdfParser\Document\CrossReference\Source\Section\CrossReferenceSection;
use PrinsFrank\PdfParser\Document\CrossReference\Source\Section\SubSection\CrossReferenceSubSection;
use PrinsFrank\PdfParser\Document\CrossReference\Source\Section\SubSection\Entry\CrossReferenceEntryCompressed;
use PrinsFrank\PdfParser\Document\CrossReference\Source\Section\SubSection\Entry\CrossReferenceEntryFreeObject;
use PrinsFrank\PdfParser\Document\CrossReference\Source\Section\SubSection\Entry\CrossReferenceEntryInUseObject;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryKey\DictionaryKey;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryParser;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Array\ArrayValue;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Array\CrossReferenceStreamByteSizes;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Integer\IntegerValue;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Name\TypeNameValue;
use PrinsFrank\PdfParser\Document\Generic\Marker;
use PrinsFrank\PdfParser\Document\Object\Item\CompressedObject\CompressedObjectContent\CompressedObjectContentParser;
use PrinsFrank\PdfParser\Exception\ParseFailureException;
use PrinsFrank\PdfParser\Exception\PdfParserException;
use PrinsFrank\PdfParser\Stream\Stream;
/** @internal */
class CrossReferenceStreamParser {
private const HEX_CHARS_IN_BYTE = 2;
/**
* @phpstan-assert int<0, max> $startPos
* @phpstan-assert int<1, max> $nrOfBytes
*
* @throws PdfParserException
*/
public static function parse(Stream $stream, int $startPos, int $nrOfBytes): CrossReferenceSection {
$dictionary = DictionaryParser::parse($stream, $startPos, $nrOfBytes);
if ($dictionary->getType() !== TypeNameValue::X_REF) {
throw new ParseFailureException('Expected stream of type xref');
}
$wValue = $dictionary->getValueForKey(DictionaryKey::W, CrossReferenceStreamByteSizes::class)
?? throw new ParseFailureException('Cross reference streams should have a dictionary entry for "W"');
$startStream = $stream->getStartNextLineAfter(Marker::STREAM, $startPos, $startPos + $nrOfBytes)
?? throw new ParseFailureException(sprintf('Unable to locate marker %s', Marker::STREAM->value));
if (($length = $dictionary->getValueForKey(DictionaryKey::LENGTH, IntegerValue::class)?->value) === null) {
$endStream = $stream->lastPos(Marker::END_STREAM, $stream->getSizeInBytes() - $startPos + $nrOfBytes);
if ($endStream === null || $endStream > ($startPos + $nrOfBytes)) {
throw new ParseFailureException(sprintf('Expected end of stream content marked by %s, none found', Marker::END_STREAM->value));
}
$length = $endStream - $startStream - 1;
}
$entries = [];
$hexContent = bin2hex(CompressedObjectContentParser::parseBinary($stream, $startStream, $length, $dictionary)->toString());
foreach (str_split($hexContent, $wValue->getTotalLengthInBytes() * self::HEX_CHARS_IN_BYTE) as $referenceRow) {
$field1 = hexdec(substr($referenceRow, 0, $wValue->lengthRecord1InBytes * self::HEX_CHARS_IN_BYTE));
$field2 = hexdec(substr($referenceRow, $wValue->lengthRecord1InBytes * self::HEX_CHARS_IN_BYTE, $wValue->lengthRecord2InBytes * self::HEX_CHARS_IN_BYTE));
$field3 = hexdec(substr($referenceRow, ($wValue->lengthRecord1InBytes + $wValue->lengthRecord2InBytes) * self::HEX_CHARS_IN_BYTE, $wValue->lengthRecord3InBytes * self::HEX_CHARS_IN_BYTE));
if (!is_int($field1) || !is_int($field2) || !is_int($field3)) {
throw new ParseFailureException(sprintf('Field 1, 2 and 3 in cross reference entries should be int, got %s, %s and %s', gettype($field1), gettype($field2), gettype($field3)));
}
$entries[] = match (CrossReferenceStreamType::tryFrom($field1)) {
CrossReferenceStreamType::LINKED_LIST_FREE_OBJECT => new CrossReferenceEntryFreeObject($field2, $field3),
CrossReferenceStreamType::UNCOMPRESSED_OBJECT => new CrossReferenceEntryInUseObject($field2, $field3),
CrossReferenceStreamType::COMPRESSED_OBJECT => new CrossReferenceEntryCompressed($field2, $field3),
null => throw new ParseFailureException(sprintf('Unrecognized CrossReferenceStream type "%s"', $field1)),
};
}
/** @var list<int> $startObjNrOfItemsArray where all even items are the start object number and all odd items are the number of objects */
$startObjNrOfItemsArray = $dictionary->getValueForKey(DictionaryKey::INDEX, ArrayValue::class)->value
?? [0, $dictionary->getValueForKey(DictionaryKey::SIZE, IntegerValue::class)->value ?? throw new ParseFailureException('Cross reference streams should have either an index or a size, neither was found')];
$crossReferenceSubSections = [];
foreach (array_chunk($startObjNrOfItemsArray, 2) as $startNrNrOfObjects) {
/** @phpstan-ignore offsetAccess.notFound, offsetAccess.notFound */
$crossReferenceSubSections[] = new CrossReferenceSubSection($startNrNrOfObjects[0], $startNrNrOfObjects[1], ... array_slice($entries, 0, $startNrNrOfObjects[1]));
$entries = array_slice($entries, $startNrNrOfObjects[1]);
}
return new CrossReferenceSection($dictionary, ... $crossReferenceSubSections);
}
}

View File

@ -0,0 +1,11 @@
<?php
declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\CrossReference\Stream;
/** @internal */
enum CrossReferenceStreamType: int {
case LINKED_LIST_FREE_OBJECT = 0;
case UNCOMPRESSED_OBJECT = 1;
case COMPRESSED_OBJECT = 2;
}

View File

@ -0,0 +1,10 @@
<?php
declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\CrossReference\Table;
/** @internal */
enum CrossReferenceTableInUseOrFree: string {
case IN_USE = 'n';
case FREE = 'f';
}

View File

@ -0,0 +1,57 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\CrossReference\Table;
use PrinsFrank\PdfParser\Document\CrossReference\Source\Section\CrossReferenceSection;
use PrinsFrank\PdfParser\Document\CrossReference\Source\Section\SubSection\CrossReferenceSubSection;
use PrinsFrank\PdfParser\Document\CrossReference\Source\Section\SubSection\Entry\CrossReferenceEntryFreeObject;
use PrinsFrank\PdfParser\Document\CrossReference\Source\Section\SubSection\Entry\CrossReferenceEntryInUseObject;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryParser;
use PrinsFrank\PdfParser\Document\Generic\Character\WhitespaceCharacter;
use PrinsFrank\PdfParser\Document\Generic\Marker;
use PrinsFrank\PdfParser\Exception\ParseFailureException;
use PrinsFrank\PdfParser\Exception\PdfParserException;
use PrinsFrank\PdfParser\Stream\Stream;
/** @internal */
class CrossReferenceTableParser {
/** @throws PdfParserException */
public static function parse(Stream $stream, int $startPos, int $nrOfBytes): CrossReferenceSection {
$startTrailerPos = $stream->firstPos(Marker::TRAILER, $startPos, $startPos + $nrOfBytes)
?? throw new ParseFailureException('Unable to locate trailer for crossReferenceTable');
$dictionary = DictionaryParser::parse($stream, $startTrailerPos + Marker::TRAILER->length(), $nrOfBytes - ($startTrailerPos + Marker::TRAILER->length() - $startPos));
$firstObjectNumber = $nrOfEntries = null;
$crossReferenceSubSections = $crossReferenceEntries = [];
$content = trim($stream->read($startPos, $startTrailerPos - $startPos));
$content = str_replace([WhitespaceCharacter::CARRIAGE_RETURN->value, WhitespaceCharacter::LINE_FEED->value . WhitespaceCharacter::LINE_FEED->value], WhitespaceCharacter::LINE_FEED->value, $content);
foreach (explode(WhitespaceCharacter::LINE_FEED->value, $content) as $line) {
$sections = explode(WhitespaceCharacter::SPACE->value, trim($line));
switch (count($sections)) {
case 2:
if ($firstObjectNumber !== null && $nrOfEntries !== null) {
$crossReferenceSubSections[] = new CrossReferenceSubSection($firstObjectNumber, $nrOfEntries, ... $crossReferenceEntries); // Use previous objectNr and nrOfEntries
}
$crossReferenceEntries = [];
$firstObjectNumber = (int) $sections[0];
$nrOfEntries = (int) $sections[1];
break;
case 3:
$crossReferenceEntries[] = match (CrossReferenceTableInUseOrFree::tryFrom(trim($sections[2]))) {
CrossReferenceTableInUseOrFree::IN_USE => new CrossReferenceEntryInUseObject((int) $sections[0], (int) $sections[1]),
CrossReferenceTableInUseOrFree::FREE => new CrossReferenceEntryFreeObject((int) $sections[0], (int) $sections[1]),
null => throw new ParseFailureException(sprintf('Unrecognized crossReference table record type %s', trim($sections[2])))
};
break;
default:
throw new ParseFailureException(sprintf('Invalid line "%s", 2 or 3 sections expected, %d found', substr(trim($line), 0, 30), count($sections)));
}
}
if ($firstObjectNumber !== null && $nrOfEntries !== null) {
$crossReferenceSubSections[] = new CrossReferenceSubSection($firstObjectNumber, $nrOfEntries, ... $crossReferenceEntries);
}
return new CrossReferenceSection($dictionary, ... $crossReferenceSubSections);
}
}

View File

@ -0,0 +1,143 @@
<?php
declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\Dictionary;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryEntry\DictionaryEntry;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryKey\DictionaryKey;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryKey\ExtendedDictionaryKey;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Array\DictionaryArrayValue;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\DictionaryValue;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Name\NameValue;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Name\SubtypeNameValue;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Name\TypeNameValue;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Reference\ReferenceValue;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Reference\ReferenceValueArray;
use PrinsFrank\PdfParser\Document\Document;
use PrinsFrank\PdfParser\Document\Object\Decorator\DecoratedObject;
use PrinsFrank\PdfParser\Exception\InvalidArgumentException;
use PrinsFrank\PdfParser\Exception\ParseFailureException;
use PrinsFrank\PdfParser\Exception\RuntimeException;
class Dictionary {
/** @var array<DictionaryEntry> */
public readonly array $dictionaryEntries;
/** @no-named-arguments */
public function __construct(
DictionaryEntry... $dictionaryEntries
) {
$this->dictionaryEntries = $dictionaryEntries;
}
/**
* @template T of DictionaryValue|NameValue|Dictionary
* @param class-string<T> $valueType
* @return T
*/
public function getValueForKey(DictionaryKey|ExtendedDictionaryKey $dictionaryKey, string $valueType): DictionaryValue|Dictionary|NameValue|null {
foreach ($this->dictionaryEntries as $dictionaryEntry) {
if (($dictionaryKey instanceof DictionaryKey && $dictionaryEntry->key === $dictionaryKey)
|| ($dictionaryKey instanceof ExtendedDictionaryKey && $dictionaryEntry->key instanceof ExtendedDictionaryKey && $dictionaryEntry->key->value === $dictionaryKey->value)) {
$value = $dictionaryEntry->value;
if (is_a($value, $valueType) === false) {
throw new InvalidArgumentException(sprintf('Expected value with value %s to be of type %s, got %s', $dictionaryKey->value, $valueType, get_class($value)));
}
return $value;
}
}
return null;
}
/** @return class-string<DictionaryValue|NameValue|Dictionary> */
public function getTypeForKey(DictionaryKey $dictionaryKey): ?string {
foreach ($this->dictionaryEntries as $dictionaryEntry) {
if ($dictionaryEntry->key === $dictionaryKey) {
return $dictionaryEntry->value::class;
}
}
return null;
}
public function getSubDictionary(?Document $document, DictionaryKey $dictionaryKey): ?Dictionary {
$subDictionaryType = $this->getTypeForKey($dictionaryKey);
if ($subDictionaryType === null) {
return null;
}
if ($subDictionaryType === Dictionary::class) {
return $this->getValueForKey($dictionaryKey, Dictionary::class) ?? throw new RuntimeException();
}
if ($subDictionaryType === DictionaryArrayValue::class) {
return ($this->getValueForKey($dictionaryKey, DictionaryArrayValue::class) ?? throw new RuntimeException())->toSingleDictionary();
}
if ($subDictionaryType === ReferenceValue::class) {
if ($document === null) {
throw new ParseFailureException('Document is required to get subDictionary for reference');
}
return ($this->getObjectForReference($document, $dictionaryKey) ?? throw new ParseFailureException())
->getDictionary();
}
throw new ParseFailureException(sprintf('Invalid type "%s" for subDictionary with key %s', $subDictionaryType, $dictionaryKey->name));
}
/**
* @template T of DecoratedObject
* @param class-string<T>|null $expectedDecoratorFQN
* @return ($expectedDecoratorFQN is null ? DecoratedObject : T)
*/
public function getObjectForReference(Document $document, DictionaryKey|ExtendedDictionaryKey $dictionaryKey, ?string $expectedDecoratorFQN = null): ?DecoratedObject {
$reference = $this->getValueForKey($dictionaryKey, ReferenceValue::class);
if ($reference === null) {
return null;
}
return $document->getObject($reference->objectNumber, $expectedDecoratorFQN)
?? throw new ParseFailureException();
}
/**
* @template T of DecoratedObject
* @param class-string<T>|null $expectedDecoratorFQN
* @return ($expectedDecoratorFQN is null ? list<DecoratedObject> : list<T>)
*/
public function getObjectsForReference(Document $document, DictionaryKey|ExtendedDictionaryKey $dictionaryKey, ?string $expectedDecoratorFQN = null): array {
$references = $this->getValueForKey($dictionaryKey, ReferenceValueArray::class);
if ($references === null) {
return [];
}
$objects = [];
foreach ($references->referenceValues as $referenceValue) {
$objects[] = $document->getObject($referenceValue->objectNumber, $expectedDecoratorFQN)
?? throw new ParseFailureException();
}
return $objects;
}
public function getType(): ?TypeNameValue {
if ($this->getTypeForKey(DictionaryKey::TYPE) === Dictionary::class) {
return $this->getValueForKey(DictionaryKey::TYPE, Dictionary::class)
?->getValueForKey(DictionaryKey::TYPE, TypeNameValue::class);
}
return $this->getValueForKey(DictionaryKey::TYPE, TypeNameValue::class);
}
public function getSubType(): ?SubtypeNameValue {
if ($this->getTypeForKey(DictionaryKey::SUBTYPE) === Dictionary::class) {
return $this->getValueForKey(DictionaryKey::SUBTYPE, Dictionary::class)
?->getValueForKey(DictionaryKey::SUBTYPE, SubtypeNameValue::class);
}
return $this->getValueForKey(DictionaryKey::SUBTYPE, SubtypeNameValue::class);
}
}

View File

@ -0,0 +1,19 @@
<?php
declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\Dictionary\DictionaryEntry;
use PrinsFrank\PdfParser\Document\Dictionary\Dictionary;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryKey\DictionaryKey;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryKey\ExtendedDictionaryKey;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\DictionaryValue;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Name\NameValue;
/** @api */
class DictionaryEntry {
public function __construct(
public readonly DictionaryKey|ExtendedDictionaryKey $key,
public readonly DictionaryValue|Dictionary|NameValue $value,
) {
}
}

View File

@ -0,0 +1,78 @@
<?php
declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\Dictionary\DictionaryEntry;
use BackedEnum;
use PrinsFrank\PdfParser\Document\Dictionary\Dictionary;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryFactory;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryKey\DictionaryKey;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryKey\ExtendedDictionaryKey;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Array\ArrayValue;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\DictionaryValue;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Name\NameValue;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Reference\ReferenceValue;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\TextString\TextStringValue;
use PrinsFrank\PdfParser\Document\Dictionary\Normalization\NameValueNormalizer;
use PrinsFrank\PdfParser\Exception\ParseFailureException;
use PrinsFrank\PdfParser\Exception\PdfParserException;
/** @internal */
class DictionaryEntryFactory {
/**
* @param string|array<string, mixed> $dictionaryValue
* @throws PdfParserException
*/
public static function fromKeyValuePair(string $keyString, string|array $dictionaryValue): ?DictionaryEntry {
$dictionaryKey = DictionaryKey::tryFromKeyString($keyString)
?? ExtendedDictionaryKey::fromKeyString($keyString);
return new DictionaryEntry($dictionaryKey, self::getValue($dictionaryKey, $dictionaryValue));
}
/**
* @param string|array<string, mixed> $value
* @throws PdfParserException
*/
protected static function getValue(DictionaryKey|ExtendedDictionaryKey $dictionaryKey, string|array $value): Dictionary|DictionaryValue|NameValue {
$allowedValueTypes = $dictionaryKey->getValueTypes();
if ((in_array(Dictionary::class, $allowedValueTypes, true) || in_array(ArrayValue::class, $allowedValueTypes, true))
&& is_array($value)) {
return DictionaryFactory::fromArray($value);
}
if ((in_array(Dictionary::class, $allowedValueTypes, true) || in_array(ArrayValue::class, $allowedValueTypes, true))
&& is_string($value)
&& preg_match('/^[0-9]+ [0-9]+ R$/', $value) === 1
&& ($referenceValue = ReferenceValue::fromValue($value)) !== null) {
return $referenceValue;
}
foreach ($allowedValueTypes as $allowedValueType) {
if (is_a($allowedValueType, BackedEnum::class, true)
&& is_string($value)
&& ($resolvedValue = $allowedValueType::tryFrom(NameValueNormalizer::normalize($value))) !== null) {
return $resolvedValue;
}
}
foreach ($allowedValueTypes as $allowedValueType) {
if (!is_a($allowedValueType, DictionaryValue::class, true)
|| $allowedValueType === TextStringValue::class) { // TextStrings accept everything, so we check that last
continue;
}
if (!is_string($value) || ($valueObject = $allowedValueType::fromValue($value)) === null) {
continue;
}
return $valueObject;
}
if (in_array(TextStringValue::class, $allowedValueTypes, true) && is_string($value)) {
return TextStringValue::fromValue($value);
}
throw new ParseFailureException(sprintf('Value "%s" for dictionary key %s could not be parsed to a valid value type', is_array($value) ? 'array()' : $value, $dictionaryKey->value));
}
}

View File

@ -0,0 +1,34 @@
<?php
declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\Dictionary;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryEntry\DictionaryEntryFactory;
use PrinsFrank\PdfParser\Exception\InvalidArgumentException;
use PrinsFrank\PdfParser\Exception\PdfParserException;
/** @internal */
class DictionaryFactory {
/**
* @param array<string, mixed> $dictionaryArray
* @throws PdfParserException
*/
public static function fromArray(array $dictionaryArray): Dictionary {
$dictionaryEntries = [];
foreach ($dictionaryArray as $keyString => $value) {
if (!is_string($value) && (!is_array($value) || array_is_list($value))) {
throw new InvalidArgumentException(sprintf('values should be either strings or non-list array, %s given', gettype($value)));
}
/** @var non-empty-array<string, mixed>|string $value */
$dictionaryEntry = DictionaryEntryFactory::fromKeyValuePair($keyString, $value);
if ($dictionaryEntry === null) {
continue;
}
$dictionaryEntries[] = $dictionaryEntry;
}
return new Dictionary(... $dictionaryEntries);
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,12 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\Dictionary\DictionaryKey;
use PrinsFrank\PdfParser\Document\Dictionary\Dictionary;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\DictionaryValue;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Name\NameValue;
interface DictionaryKeyInterface {
/** @return list<class-string<DictionaryValue|Dictionary|NameValue>> */
public function getValueTypes(): array;
}

View File

@ -0,0 +1,26 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\Dictionary\DictionaryKey;
use Override;
use PrinsFrank\PdfParser\Document\Dictionary\Dictionary;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Reference\ReferenceValue;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\TextString\TextStringValue;
class ExtendedDictionaryKey implements DictionaryKeyInterface {
public function __construct(
public readonly string $value,
) {
}
/** @internal */
public static function fromKeyString(string $keyString): self {
return new self(rtrim(ltrim($keyString, '/'), "\n\t "));
}
/** @api */
#[Override]
public function getValueTypes(): array {
return [ReferenceValue::class, TextStringValue::class, Dictionary::class];
}
}

View File

@ -0,0 +1,17 @@
<?php
declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\Dictionary\DictionaryParseContext;
/** @internal */
enum DictionaryParseContext {
case ROOT;
case DICTIONARY;
case KEY;
case KEY_VALUE_SEPARATOR;
case VALUE;
case VALUE_IN_PARENTHESES;
case VALUE_IN_SQUARE_BRACKETS;
case VALUE_IN_ANGLE_BRACKETS;
case COMMENT;
}

View File

@ -0,0 +1,101 @@
<?php
declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\Dictionary\DictionaryParseContext;
use PrinsFrank\PdfParser\Document\Generic\Parsing\InfiniteBuffer;
/** @internal */
class NestingContext {
private string $currentLevel;
/** @var array<string, DictionaryParseContext> */
private array $nestingContext = [];
/** @var array<string, InfiniteBuffer> */
private array $keyBuffer = [];
/** @var array<string, InfiniteBuffer> */
private array $valueBuffer = [];
public function __construct() {
$this->currentLevel = '';
}
public function incrementNesting(): self {
$this->currentLevel = (string) ($this->keyBuffer[$this->currentLevel] ?? (int) $this->currentLevel + 1);
return $this;
}
public function decrementNesting(): self {
array_pop($this->nestingContext);
$this->currentLevel = (string) array_key_last($this->nestingContext);
return $this;
}
public function setContext(DictionaryParseContext $dictionaryParseContext): self {
$this->nestingContext[$this->currentLevel] = $dictionaryParseContext;
return $this;
}
public function getContext(): DictionaryParseContext {
return $this->nestingContext[$this->currentLevel] ?? DictionaryParseContext::ROOT;
}
public function getKeyBuffer(): InfiniteBuffer {
return $this->keyBuffer[$this->currentLevel] ??= new InfiniteBuffer();
}
public function addToKeyBuffer(string $char): self {
$this->getKeyBuffer()->addChar($char);
return $this;
}
public function removeFromKeyBuffer(int $nChars = 1): self {
$this->getKeyBuffer()->removeChar($nChars);
return $this;
}
public function getValueBuffer(): InfiniteBuffer {
return $this->valueBuffer[$this->currentLevel] ??= new InfiniteBuffer();
}
public function addToValueBuffer(string $char): self {
$this->getValueBuffer()->addChar($char);
return $this;
}
public function removeFromValueBuffer(int $nChars = 1): self {
$this->getValueBuffer()->removeChar($nChars);
return $this;
}
/** @return list<string> */
public function getKeysFromRoot(): array {
$keysFromRoot = [];
foreach ($this->keyBuffer as $keyBuffer) {
$keyBufferString = (string) $keyBuffer;
if ($keyBufferString === '') {
continue;
}
$keysFromRoot[] = $keyBufferString;
}
return $keysFromRoot;
}
public function flush(): self {
($this->valueBuffer[$this->currentLevel] ?? null)?->flush();
($this->keyBuffer[$this->currentLevel] ?? null)?->flush();
return $this;
}
}

View File

@ -0,0 +1,112 @@
<?php
declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\Dictionary;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryParseContext\DictionaryParseContext;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryParseContext\NestingContext;
use PrinsFrank\PdfParser\Document\Generic\Character\DelimiterCharacter;
use PrinsFrank\PdfParser\Document\Generic\Character\LiteralStringEscapeCharacter;
use PrinsFrank\PdfParser\Document\Generic\Character\WhitespaceCharacter;
use PrinsFrank\PdfParser\Document\Generic\Parsing\RollingCharBuffer;
use PrinsFrank\PdfParser\Exception\PdfParserException;
use PrinsFrank\PdfParser\Stream\Stream;
/** @internal */
class DictionaryParser {
/**
* @phpstan-assert int<0, max> $startPos
* @phpstan-assert int<1, max> $nrOfBytes
*
* @throws PdfParserException
*/
public static function parse(Stream $stream, int $startPos, int $nrOfBytes): Dictionary {
$dictionaryArray = [];
$rollingCharBuffer = new RollingCharBuffer(6);
$nestingContext = (new NestingContext())->setContext(DictionaryParseContext::ROOT);
$arrayNestingLevel = 0;
foreach ($stream->chars($startPos, $nrOfBytes) as $char) {
$rollingCharBuffer->next($char);
if ($char === DelimiterCharacter::LESS_THAN_SIGN->value && $rollingCharBuffer->getPreviousCharacter() === DelimiterCharacter::LESS_THAN_SIGN->value && $rollingCharBuffer->getPreviousCharacter(2) !== LiteralStringEscapeCharacter::REVERSE_SOLIDUS->value && $nestingContext->getContext() !== DictionaryParseContext::VALUE_IN_SQUARE_BRACKETS) {
if ($nestingContext->getContext() === DictionaryParseContext::KEY) {
$nestingContext->removeFromKeyBuffer();
}
$nestingContext->setContext(DictionaryParseContext::DICTIONARY)->incrementNesting()->setContext(DictionaryParseContext::DICTIONARY);
} elseif ($char === DelimiterCharacter::LESS_THAN_SIGN->value && $nestingContext->getContext() === DictionaryParseContext::KEY) {
$nestingContext->setContext(DictionaryParseContext::VALUE);
} elseif ($char === DelimiterCharacter::GREATER_THAN_SIGN->value && $rollingCharBuffer->getPreviousCharacter() === DelimiterCharacter::GREATER_THAN_SIGN->value && $rollingCharBuffer->getPreviousCharacter(2) !== LiteralStringEscapeCharacter::REVERSE_SOLIDUS->value && $nestingContext->getContext() !== DictionaryParseContext::VALUE_IN_SQUARE_BRACKETS) {
$nestingContext->removeFromValueBuffer();
self::flush($dictionaryArray, $nestingContext);
$nestingContext->decrementNesting()->flush();
} elseif ($char === DelimiterCharacter::SOLIDUS->value && $rollingCharBuffer->getPreviousCharacter() !== LiteralStringEscapeCharacter::REVERSE_SOLIDUS->value && $nestingContext->getContext() !== DictionaryParseContext::VALUE_IN_SQUARE_BRACKETS) {
if ($nestingContext->getContext() === DictionaryParseContext::DICTIONARY) {
$nestingContext->setContext(DictionaryParseContext::KEY);
} elseif ($nestingContext->getContext() === DictionaryParseContext::VALUE) {
self::flush($dictionaryArray, $nestingContext);
$nestingContext->setContext(DictionaryParseContext::KEY);
} elseif ($nestingContext->getContext() === DictionaryParseContext::KEY || $nestingContext->getContext() === DictionaryParseContext::KEY_VALUE_SEPARATOR) {
$nestingContext->setContext(DictionaryParseContext::VALUE);
}
} elseif ($char === WhitespaceCharacter::LINE_FEED->value && $nestingContext->getContext() !== DictionaryParseContext::VALUE_IN_SQUARE_BRACKETS) {
if ($nestingContext->getContext() === DictionaryParseContext::KEY) {
$nestingContext->setContext(DictionaryParseContext::KEY_VALUE_SEPARATOR);
} elseif ($nestingContext->getContext() === DictionaryParseContext::VALUE) {
self::flush($dictionaryArray, $nestingContext);
} elseif ($nestingContext->getContext() === DictionaryParseContext::COMMENT) {
$nestingContext->setContext(DictionaryParseContext::DICTIONARY);
}
} elseif (WhitespaceCharacter::tryFrom($char) !== null && $nestingContext->getContext() === DictionaryParseContext::KEY) {
$nestingContext->setContext(DictionaryParseContext::KEY_VALUE_SEPARATOR);
} elseif ($char === DelimiterCharacter::LEFT_PARENTHESIS->value && (in_array($nestingContext->getContext(), [DictionaryParseContext::KEY, DictionaryParseContext::KEY_VALUE_SEPARATOR, DictionaryParseContext::VALUE], true))) {
$nestingContext->setContext(DictionaryParseContext::VALUE_IN_PARENTHESES);
} elseif ($char === DelimiterCharacter::RIGHT_PARENTHESIS->value && $rollingCharBuffer->getPreviousCharacter() !== LiteralStringEscapeCharacter::REVERSE_SOLIDUS->value && $nestingContext->getContext() === DictionaryParseContext::VALUE_IN_PARENTHESES) {
$nestingContext->setContext(DictionaryParseContext::VALUE);
} elseif ($char === DelimiterCharacter::LEFT_SQUARE_BRACKET->value && (in_array($nestingContext->getContext(), [DictionaryParseContext::KEY, DictionaryParseContext::KEY_VALUE_SEPARATOR, DictionaryParseContext::VALUE, DictionaryParseContext::VALUE_IN_SQUARE_BRACKETS], true))) {
$nestingContext->setContext(DictionaryParseContext::VALUE_IN_SQUARE_BRACKETS);
$arrayNestingLevel++;
} elseif ($char === DelimiterCharacter::RIGHT_SQUARE_BRACKET->value && $nestingContext->getContext() === DictionaryParseContext::VALUE_IN_SQUARE_BRACKETS) {
$arrayNestingLevel--;
if ($arrayNestingLevel === 0) {
$nestingContext->setContext(DictionaryParseContext::VALUE);
}
} elseif (trim($char) !== '' && $nestingContext->getContext() === DictionaryParseContext::KEY_VALUE_SEPARATOR) {
$nestingContext->setContext(DictionaryParseContext::VALUE);
} elseif ($char === DelimiterCharacter::PERCENT_SIGN->value && $rollingCharBuffer->getPreviousCharacter() !== LiteralStringEscapeCharacter::REVERSE_SOLIDUS->value && $nestingContext->getContext() !== DictionaryParseContext::VALUE_IN_PARENTHESES) {
$nestingContext->setContext(DictionaryParseContext::COMMENT);
}
match ($nestingContext->getContext()) {
DictionaryParseContext::KEY => $nestingContext->addToKeyBuffer($char),
DictionaryParseContext::VALUE_IN_PARENTHESES,
DictionaryParseContext::VALUE_IN_SQUARE_BRACKETS,
DictionaryParseContext::VALUE => $nestingContext->addToValueBuffer($char),
default => null,
};
}
return DictionaryFactory::fromArray($dictionaryArray);
}
/** @param array<string, mixed> $dictionaryArray */
private static function flush(array &$dictionaryArray, NestingContext $nestingContext): void {
if ($nestingContext->getValueBuffer()->isEmpty() || $nestingContext->getKeyBuffer()->isEmpty()) {
return;
}
$dictionaryArrayPointer = &$dictionaryArray;
$keys = $nestingContext->getKeysFromRoot();
foreach ($keys as $index => $key) {
if ($key === (string) $nestingContext->getKeyBuffer() && $index === array_key_last($keys)) {
break;
}
/** @phpstan-ignore offsetAccess.nonOffsetAccessible */
$dictionaryArrayPointer = &$dictionaryArrayPointer[trim($key)];
}
/** @phpstan-ignore offsetAccess.nonOffsetAccessible */
$dictionaryArrayPointer[(string) $nestingContext->getKeyBuffer()] = trim((string) $nestingContext->getValueBuffer());
$nestingContext->flush();
}
}

View File

@ -0,0 +1,69 @@
<?php
declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Array;
use Override;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\DictionaryValue;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Reference\ReferenceValue;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Reference\ReferenceValueArray;
use PrinsFrank\PdfParser\Exception\ParseFailureException;
use PrinsFrank\PdfParser\Exception\PdfParserException;
use PrinsFrank\PdfParser\Exception\RuntimeException;
/** @api */
class ArrayValue implements DictionaryValue {
/** @param list<int|string|ArrayValue|ReferenceValueArray|null> $value */
public function __construct(
public readonly array $value
) {
}
#[Override]
/** @throws PdfParserException */
public static function fromValue(string $valueString): null|self|ReferenceValueArray {
$valueString = trim($valueString);
if (!str_starts_with($valueString, '[') || !str_ends_with($valueString, ']')) {
return null;
}
$valueString = preg_replace('/(<[^>]*>)(?=<[^>]*>)/', '$1 $2', $valueString)
?? throw new RuntimeException('An error occurred while sanitizing array value');
$valueString = str_replace(['/', "\n"], [' /', ' '], rtrim(ltrim($valueString, '[ '), ' ]'));
$valueString = preg_replace('/\s+/', ' ', $valueString)
?? throw new RuntimeException('An error occurred while removing duplicate spaces from array value');
$values = explode(' ', $valueString);
if (count($values) % 3 === 0 && array_key_exists(2, $values) && $values[2] === 'R') {
return ReferenceValueArray::fromValue($valueString);
}
$array = [];
foreach ($values as $value) {
if (str_starts_with($value, '[') && str_ends_with($value, ']')) {
$array[] = self::fromValue($value);
} elseif ((string) (int) $value === $value) {
$array[] = (int) $value;
} elseif ($value !== '') {
$array[] = $value;
}
}
return new self($array);
}
public function toString(): string {
$string = '';
foreach ($this->value as $value) {
$string .= ' ' . match (true) {
is_int($value),
is_float($value),
is_string($value) => $value,
$value instanceof ArrayValue => $value->toString(),
$value instanceof ReferenceValueArray => implode(' ', array_map(fn (ReferenceValue $referenceValue) => $referenceValue->objectNumber . ' R', $value->referenceValues)),
default => throw new ParseFailureException('Unsupported array value type: ' . gettype($value)),
};
}
return '[' . trim($string) . ']';
}
}

View File

@ -0,0 +1,65 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Array;
use Override;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Array\Item\ConsecutiveCIDWidth;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Array\Item\RangeCIDWidth;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\DictionaryValue;
/** @see 9.7.4.3 Glyph metrics in CIDFonts */
class CIDFontWidths implements DictionaryValue {
/** @var list<ConsecutiveCIDWidth|RangeCIDWidth> */
private readonly array $widths;
/** @no-named-arguments */
public function __construct(
ConsecutiveCIDWidth|RangeCIDWidth ...$widths,
) {
$this->widths = $widths;
}
public function getWidthForCharacter(int $characterCode): ?float {
foreach ($this->widths as $widthItem) {
if (($widthForCharacterCode = $widthItem->getWidthForCharacterCode($characterCode)) !== null) {
return $widthForCharacterCode;
}
}
return null;
}
#[Override]
public static function fromValue(string $valueString): ?self {
$valueString = str_replace("\n", ' ', $valueString);
if (preg_match_all('/(?<startingCID>[0-9]+)\s*(?<CIDS>[0-9]+\s*[0-9.]+|\[[0-9. ]+\])/', $valueString, $matches, PREG_SET_ORDER) <= 0) {
return null;
}
$widths = [];
foreach ($matches as $match) {
if ((string) ($startingCID = (int) $match['startingCID']) !== $match['startingCID']) {
return null;
}
if (str_starts_with($match['CIDS'], '[') && str_ends_with($match['CIDS'], ']')) {
$widths[] = new ConsecutiveCIDWidth($startingCID, array_map('floatval', explode(' ', rtrim(ltrim($match['CIDS'], '['), ']'))));
continue;
}
$arguments = explode(' ', $match['CIDS']);
if (count($arguments) !== 2) {
return null;
}
if ((string)($endCID = (int) $arguments[0]) !== $arguments[0] || (string)($width = (float) $arguments[1]) !== $arguments[1]) {
return null;
}
$widths[] = new RangeCIDWidth($startingCID, $endCID, $width);
}
return new self(... $widths);
}
}

View File

@ -0,0 +1,50 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Array;
use Override;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\DictionaryValue;
use PrinsFrank\PdfParser\Exception\RuntimeException;
/** @api */
class CrossReferenceStreamByteSizes implements DictionaryValue {
public function __construct(
public readonly int $lengthRecord1InBytes,
public readonly int $lengthRecord2InBytes,
public readonly int $lengthRecord3InBytes,
) {
}
/**
* @throws RuntimeException
* @return int<1, max>
*/
public function getTotalLengthInBytes(): int {
$totalLength = $this->lengthRecord1InBytes + $this->lengthRecord2InBytes + $this->lengthRecord3InBytes;
if ($totalLength < 1) {
throw new RuntimeException(sprintf('Total length should not be less than 1, got %d', $totalLength));
}
return $totalLength;
}
#[Override]
public static function fromValue(string $valueString): ?self {
if (!str_starts_with($valueString, '[') || !str_ends_with($valueString, ']')) {
return null;
}
$values = explode(' ', trim(rtrim(ltrim($valueString, '['), ']')));
if (count($values) !== 3) {
return null;
}
if ((string) (int) trim($values[0]) !== trim($values[0])
|| (string) (int) trim($values[1]) !== trim($values[1])
|| (string) (int) trim($values[2]) !== trim($values[2])) {
return null;
}
return new self((int) $values[0], (int) $values[1], (int) $values[2]);
}
}

View File

@ -0,0 +1,55 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Array;
use Override;
use PrinsFrank\PdfParser\Document\Dictionary\Dictionary;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryParser;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\DictionaryValue;
use PrinsFrank\PdfParser\Exception\PdfParserException;
use PrinsFrank\PdfParser\Exception\RuntimeException;
use PrinsFrank\PdfParser\Stream\InMemoryStream;
class DictionaryArrayValue implements DictionaryValue {
/** @var list<Dictionary> */
public readonly array $dictionaries;
/** @no-named-arguments */
public function __construct(
Dictionary... $dictionaries,
) {
$this->dictionaries = $dictionaries;
}
#[Override]
/** @throws PdfParserException */
public static function fromValue(string $valueString): ?self {
$valueStringWithoutSpaces = str_replace([' ', "\r", "\n"], '', $valueString);
if ((str_starts_with($valueStringWithoutSpaces, '[<<') === false && str_starts_with($valueStringWithoutSpaces, '[null') === false)
|| (str_ends_with($valueStringWithoutSpaces, '>>]') === false && str_ends_with($valueStringWithoutSpaces, 'null]') === false)) {
return null;
}
$dictionaryEntries = [];
$valueString = preg_replace('/(<<[^>]*>>)(?=<<[^>]*>>)/', '$1 $2', $valueString)
?? throw new RuntimeException('An error occurred while sanitizing dictionary array value');
foreach (explode('>> <<', substr($valueString, 3, -3)) as $dictionaryValueString) {
$dictionaryEntries[] = $dictionaryValueString === ''
? new Dictionary()
: DictionaryParser::parse($memoryStream = new InMemoryStream('<<' . $dictionaryValueString . '>>'), 0, $memoryStream->getSizeInBytes());
}
return new self(... $dictionaryEntries);
}
public function toSingleDictionary(): ?Dictionary {
$dictionaryEntries = [];
foreach ($this->dictionaries as $dictionary) {
foreach ($dictionary->dictionaryEntries as $dictionaryEntry) {
$dictionaryEntries[] = $dictionaryEntry;
}
}
return new Dictionary(... $dictionaryEntries);
}
}

View File

@ -0,0 +1,57 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Array;
use Override;
use PrinsFrank\GlyphLists\AGlyphList;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Array\Item\DifferenceRange;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\DictionaryValue;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Reference\ReferenceValueArray;
class DifferencesArrayValue implements DictionaryValue {
/** @param list<DifferenceRange> $differenceRanges */
public function __construct(
private readonly array $differenceRanges,
) {
}
#[Override]
public static function fromValue(string $valueString): ?self {
if (($arrayValue = ArrayValue::fromValue($valueString)) === null || $arrayValue instanceof ReferenceValueArray) {
return null;
}
$startIndex = null;
$characters = $differenceRanges = [];
foreach ($arrayValue->value as $arrayValueItem) {
if (is_int($arrayValueItem)) {
if ($startIndex !== null) {
$differenceRanges[] = new DifferenceRange($startIndex, $characters);
$characters = [];
}
$startIndex = $arrayValueItem;
} elseif (is_string($arrayValueItem)) {
$characters[] = AGlyphList::tryFrom(ltrim($arrayValueItem, '/'));
} else {
return null;
}
}
if ($startIndex !== null) {
$differenceRanges[] = new DifferenceRange($startIndex, $characters);
}
return new self($differenceRanges);
}
public function getGlyph(int $int): ?AGlyphList {
foreach ($this->differenceRanges as $differenceRange) {
if ($differenceRange->contains($int)) {
return $differenceRange->getGlyph($int);
}
}
return null;
}
}

View File

@ -0,0 +1,20 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Array\Item;
class ConsecutiveCIDWidth {
/** @param list<float> $widths */
public function __construct(
public readonly int $cidStart,
public readonly array $widths,
) {
}
public function getWidthForCharacterCode(int $characterCode): ?float {
if (array_key_exists($characterCode - $this->cidStart, $this->widths) === false) {
return null;
}
return $this->widths[$characterCode - $this->cidStart] / 1000;
}
}

View File

@ -0,0 +1,33 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Array\Item;
use PrinsFrank\GlyphLists\AGlyphList;
use PrinsFrank\PdfParser\Exception\InvalidArgumentException;
use PrinsFrank\PdfParser\Exception\RuntimeException;
class DifferenceRange {
/** @param list<AGlyphList|null> $characters */
public function __construct(
private readonly int $firstIndex,
private readonly array $characters,
) {
}
public function contains(int $index): bool {
return $index >= $this->firstIndex
&& $index < $this->firstIndex + count($this->characters);
}
public function getGlyph(int $index): ?AGlyphList {
if (!$this->contains($index)) {
throw new InvalidArgumentException('This difference range does not contain index ' . $index);
}
if (!array_key_exists($index - $this->firstIndex, $this->characters)) {
throw new RuntimeException('Expected glyph to be present, but it was not');
}
return $this->characters[$index - $this->firstIndex];
}
}

View File

@ -0,0 +1,20 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Array\Item;
class RangeCIDWidth {
public function __construct(
public readonly int $cidStart,
public readonly int $cidEnd,
public readonly float $width,
) {
}
public function getWidthForCharacterCode(int $characterCode): ?float {
if ($characterCode < $this->cidStart || $characterCode > $this->cidEnd) {
return null;
}
return $this->width / 1000;
}
}

View File

@ -0,0 +1,27 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Boolean;
use Override;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\DictionaryValue;
/** @api */
class BooleanValue implements DictionaryValue {
public function __construct(
public readonly bool $value,
) {
}
#[Override]
public static function fromValue(string $valueString): ?self {
if ($valueString === 'true') {
return new self(true);
}
if ($valueString === 'false') {
return new self(false);
}
return null;
}
}

View File

@ -0,0 +1,64 @@
<?php
declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Date;
use DateTimeImmutable;
use Override;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\DictionaryValue;
use PrinsFrank\PdfParser\Exception\InvalidArgumentException;
use PrinsFrank\PdfParser\Exception\ParseFailureException;
use ValueError;
/** @api */
class DateValue implements DictionaryValue {
public function __construct(
public readonly ?DateTimeImmutable $value
) {
}
#[Override]
public static function fromValue(string $valueString): ?self {
if (str_starts_with($valueString, '<') && str_ends_with($valueString, '>')) {
$valueString = substr($valueString, 1, -1);
if (!ctype_xdigit($valueString) || strlen($valueString) % 2 !== 0) {
throw new InvalidArgumentException(sprintf('String "%s" is not hexadecimal', substr($valueString, 0, 10)));
}
$valueString = hex2bin($valueString);
if ($valueString === false) {
return null;
}
}
if (str_starts_with($valueString, '(') && str_ends_with($valueString, ')')) {
$valueString = preg_replace_callback(
'/\\\\([0-7]{3})/',
fn (array $matches) => mb_chr((int) octdec($matches[1])),
substr($valueString, 1, -1)
) ?? throw new ParseFailureException();
}
if (!str_starts_with($valueString, 'D:')) {
$valueString = mb_convert_encoding($valueString, 'UTF-8', 'UTF-16');
if ($valueString === false || !str_starts_with($valueString, 'D:')) {
return null;
}
}
try {
$parsedDate = DateTimeImmutable::createFromFormat(
preg_match('/^D:\d{14}$/', $valueString) === 1 ? '\D\:YmdHis' : '\D\:YmdHisP',
str_replace("'", '', $valueString)
);
} catch (ValueError) {
return null;
}
if ($parsedDate === false) {
return null;
}
return new self($parsedDate);
}
}

View File

@ -0,0 +1,8 @@
<?php
declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue;
interface DictionaryValue {
public static function fromValue(string $valueString): ?self;
}

View File

@ -0,0 +1,25 @@
<?php
declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Float;
use Override;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\DictionaryValue;
/** @api */
class FloatValue implements DictionaryValue {
public function __construct(
public readonly float $value
) {
}
#[Override]
public static function fromValue(string $valueString): ?self {
$valueAsFloat = (float) $valueString;
if (number_format($valueAsFloat, (int) strpos(strrev($valueString), ".")) !== $valueString) {
return null;
}
return new self($valueAsFloat);
}
}

View File

@ -0,0 +1,25 @@
<?php
declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Integer;
use Override;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\DictionaryValue;
/** @api */
class IntegerValue implements DictionaryValue {
public function __construct(
public readonly int $value
) {
}
#[Override]
public static function fromValue(string $valueString): ?self {
$valueAsInt = (int) $valueString;
if ((string) $valueAsInt !== $valueString) {
return null;
}
return new self($valueAsInt);
}
}

View File

@ -0,0 +1,8 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Name;
enum AuthEventNameValue: string implements NameValue {
case DocOpen = 'DocOpen';
case EFOpen = 'EFOpen';
}

View File

@ -0,0 +1,19 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Name;
enum BlendModeNameValue: string implements NameValue {
case Normal = 'Normal';
case Compatible = 'Compatible';
case Multiply = 'Multiply';
case Screen = 'Screen';
case Overlay = 'Overlay';
case Darken = 'Darken';
case Lighten = 'Lighten';
case ColorDodge = 'ColorDodge';
case ColorBurn = 'ColorBurn';
case HardLight = 'HardLight';
case SoftLight = 'SoftLight';
case Difference = 'Difference';
case Exclusion = 'Exclusion';
}

View File

@ -0,0 +1,11 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Name;
enum BorderStyleNameValue: string implements NameValue {
case Solid = 'S';
case Dashed = 'D';
case Beveled = 'B';
case Inset = 'I';
case Underline = 'U';
}

View File

@ -0,0 +1,9 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Name;
enum CFMNameValue: string implements NameValue {
case None = 'None';
case V2 = 'V2';
case AESV2 = 'AESV2';
}

View File

@ -0,0 +1,10 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Name;
enum CIEColorSpaceNameValue: string implements NameValue {
case CalGray = 'CalGray';
case CalRGB = 'CalRGB';
case Lab = 'Lab';
case ICCBased = 'ICCBased';
}

View File

@ -0,0 +1,19 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Name;
use PrinsFrank\PdfParser\Document\Image\ColorSpace\Components;
enum DeviceColorSpaceNameValue: string implements NameValue {
case DeviceGray = 'DeviceGray';
case DeviceRGB = 'DeviceRGB';
case DeviceCMYK = 'DeviceCMYK';
public function getComponents(): Components {
return match ($this) {
self::DeviceGray => Components::Gray,
self::DeviceRGB => Components::RGB,
self::DeviceCMYK => Components::CMYK,
};
}
}

View File

@ -0,0 +1,8 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Name;
enum DirectionNameValue: string implements NameValue {
case L2R = 'L2R';
case R2L = 'R2L';
}

View File

@ -0,0 +1,26 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Name;
use PrinsFrank\PdfParser\Document\CMap\Registry\Adobe\Identity0;
use PrinsFrank\PdfParser\Document\Encoding\MacRoman;
use PrinsFrank\PdfParser\Document\Encoding\WinAnsi;
use PrinsFrank\PdfParser\Exception\ParseFailureException;
enum EncodingNameValue: string implements NameValue {
case IdentityV = 'Identity-V';
case IdentityH = 'Identity-H';
case MacRomanEncoding = 'MacRomanEncoding';
case MacExpertEncoding = 'MacExpertEncoding';
case WinAnsiEncoding = 'WinAnsiEncoding';
public function decodeString(string $characterGroup): string {
return match ($this) {
self::IdentityH,
self::IdentityV => (new Identity0())->getToUnicodeCMap()->textToUnicode($characterGroup),
self::WinAnsiEncoding => WinAnsi::textToUnicode($characterGroup),
self::MacRomanEncoding => MacRoman::textToUnicode($characterGroup),
default => throw new ParseFailureException(sprintf('Unsupported encoding %s', $this->name)),
};
}
}

View File

@ -0,0 +1,9 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Name;
enum EventNameValue: string implements NameValue {
case View = 'View';
case Print = 'Print';
case Export = 'Export';
}

View File

@ -0,0 +1,75 @@
<?php
declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Name;
use PrinsFrank\PdfParser\Document\Dictionary\Dictionary;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryKey\DictionaryKey;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Integer\IntegerValue;
use PrinsFrank\PdfParser\Document\Document;
use PrinsFrank\PdfParser\Document\Filter\Decode\ASCII85Decode;
use PrinsFrank\PdfParser\Document\Filter\Decode\CCITTFaxDecode;
use PrinsFrank\PdfParser\Document\Filter\Decode\FlateDecode;
use PrinsFrank\PdfParser\Document\Filter\Decode\LZWFlatePredictorValue;
use PrinsFrank\PdfParser\Document\Image\ImageType;
use PrinsFrank\PdfParser\Exception\ParseFailureException;
enum FilterNameValue: string implements NameValue {
case ASCII_HEX_DECODE = 'ASCIIHexDecode';
case ASCII_85_DECODE = 'ASCII85Decode';
case LZW_DECODE = 'LZWDecode';
case FLATE_DECODE = 'FlateDecode';
case RUN_LENGTH_DECODE = 'RunLengthDecode';
case CCITT_FAX_DECODE = 'CCITTFaxDecode';
case JBIG2_DECODE = 'JBIG2Decode';
case DCT_DECODE = 'DCTDecode'; // Grayscale or color image data encoded in JPEG baseline format
case JPX_DECODE = 'JPXDecode';
case CRYPT = 'Crypt';
case ADOBE_PPK_LITE = 'Adobe.PPKLite';
case ADOBE_PUB_SEC = 'Adobe.PubSec';
case ENTRUST_PPKEF = 'Entrust.PPKEF';
case CICI_SIGN_IT = 'CIC.SignIt';
case VERISIGN_PPKVS = 'Verisign.PPKVS';
public function decodeBinary(string $content, ?Dictionary $dictionary, ?Document $document): string {
$decodeParams = $dictionary?->getSubDictionary($document, DictionaryKey::DECODE_PARMS);
return match($this) {
self::JPX_DECODE,
self::JBIG2_DECODE,
self::DCT_DECODE => $content, // Don't decode JPEG content
self::FLATE_DECODE => FlateDecode::decodeBinary(
$content,
$decodeParams !== null && ($predictorValue = LZWFlatePredictorValue::tryFrom((int) $decodeParams->getValueForKey(DictionaryKey::PREDICTOR, IntegerValue::class)?->value)) !== null
? $predictorValue
: LZWFlatePredictorValue::None,
$decodeParams?->getValueForKey(DictionaryKey::COLUMNS, IntegerValue::class)->value ?? 1
),
self::CCITT_FAX_DECODE => CCITTFaxDecode::addHeaderAndIFD(
$content,
$decodeParams?->getValueForKey(DictionaryKey::COLUMNS, IntegerValue::class)->value
?? throw new ParseFailureException('Missing columns'),
$decodeParams->getValueForKey(DictionaryKey::ROWS, IntegerValue::class)->value
?? $dictionary->getValueForKey(DictionaryKey::HEIGHT, IntegerValue::class)->value
?? throw new ParseFailureException('Missing rows'),
$decodeParams->getValueForKey(DictionaryKey::K, IntegerValue::class)->value
?? throw new ParseFailureException('Missing K'),
),
self::ASCII_85_DECODE => ASCII85Decode::decodeBinary($content),
default => throw new ParseFailureException(sprintf('Content "%.100s..." cannot be decoded for filter "%s"', $content, $this->name))
};
}
public function getImageType(): ?ImageType {
return match ($this) {
self::LZW_DECODE => ImageType::TIFF,
self::FLATE_DECODE => ImageType::PNG,
self::RUN_LENGTH_DECODE => ImageType::RAW,
self::CCITT_FAX_DECODE => ImageType::TIFF_FAX,
self::DCT_DECODE => ImageType::JPEG,
self::JPX_DECODE => ImageType::JPEG2000,
self::JBIG2_DECODE => ImageType::JBIG2,
default => null,
};
}
}

View File

@ -0,0 +1,9 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Name;
enum IntentNameValue: string implements NameValue {
case All = 'All';
case View = 'View';
case Design = 'Design';
}

View File

@ -0,0 +1,8 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Name;
enum ListModeNameValue: string implements NameValue {
case AllPages = 'AllPages';
case VisiblePages = 'VisiblePages';
}

View File

@ -0,0 +1,8 @@
<?php
declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Name;
/** @api */
interface NameValue {
}

View File

@ -0,0 +1,10 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Name;
enum NonFullScreenPageModeNameValue: string implements NameValue {
case UseNone = 'UseNone';
case UseOutlines = 'UseOutlines';
case UseThumbs = 'UseThumbs';
case UseOC = 'UseOC';
}

View File

@ -0,0 +1,11 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Name;
enum NumberingStyleNameValue: string implements NameValue {
case DecimalArabic = 'D';
case UpperCaseRomanNumerals = 'R';
case LowerCaseRomanNumerals = 'r';
case UpperCaseLetters = 'A';
case LowerCaseLetters = 'a';
}

View File

@ -0,0 +1,12 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Name;
enum PageLayoutNameValue: string implements NameValue {
case SinglePage = 'SinglePage';
case OneColumn = 'OneColumn';
case TwoColumnLeft = 'TwoColumnLeft';
case TwoColumnRight = 'TwoColumnRight';
case TwoPageLeft = 'TwoPageLeft';
case TwoPageRight = 'TwoPageRight';
}

View File

@ -0,0 +1,12 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Name;
enum PageModeNameValue: string implements NameValue {
case USE_NONE = 'UseNone';
case USE_OUTLINES = 'UseOutlines';
case USE_THUMBS = 'UseThumbs';
case FULL_SCREEN = 'FullScreen';
case USE_O_C = 'UseOC';
case USE_ATTACHMENTS = 'UseAttachments';
}

View File

@ -0,0 +1,9 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Name;
enum PaperHandlingNameValue: string implements NameValue {
case Simplex = 'Simplex';
case DuplexFlipShortEdge = 'DuplexFlipShortEdge';
case DuplexFlipLongEdge = 'DuplexFlipLongEdge';
}

View File

@ -0,0 +1,10 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Name;
enum RenderingIntentNameValue: string implements NameValue {
case AbsoluteColorimetric = 'AbsoluteColorimetric';
case RelativeColorimetric = 'RelativeColorimetric';
case Saturation = 'Saturation';
case Perceptual = 'Perceptual';
}

View File

@ -0,0 +1,7 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Name;
enum SecurityHandlerNameValue: string implements NameValue {
case Standard = 'Standard';
}

View File

@ -0,0 +1,10 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Name;
enum SpecialColorSpaceNameValue: string implements NameValue {
case Pattern = 'Pattern';
case Indexed = 'Indexed';
case DeviceN = 'DeviceN';
case Separation = 'Separation';
}

View File

@ -0,0 +1,21 @@
<?php
declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Name;
enum SubtypeNameValue: string implements NameValue {
case CID_FONT_TYPE_0 = 'CIDFontType0';
case CID_FONT_TYPE_0_C = 'CIDFontType0C';
case CID_FONT_TYPE_2 = 'CIDFontType2';
case FORM = 'Form';
case IMAGE = 'Image';
case LINK = 'Link';
case STREAM = 'Stream';
case TRUE_TYPE = 'TrueType';
case TYPE_0 = 'Type0';
case TYPE_1 = 'Type1';
case TYPE_1_C = 'Type1C';
case TYPE_3 = 'Type3';
case XML = 'XML';
case TEXT = 'Text';
}

View File

@ -0,0 +1,15 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Name;
enum TabsNameValue: string implements NameValue {
case RowOrder = 'R';
case ColumnOrder = 'C';
case StructureOrder = 'S';
/** @since PDF2.0 */
case AnnotationsArrayOrder = 'A';
/** @since PDF2.0 */
case WidgetOrder = 'W';
}

View File

@ -0,0 +1,18 @@
<?php declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Name;
enum TransitionStyleNameValue: string implements NameValue {
case Split = 'Split';
case Blinds = 'Blinds';
case Box = 'Box';
case Wipe = 'Wipe';
case Dissolve = 'Dissolve';
case Glitter = 'Glitter';
case R = 'R';
case Fly = 'Fly';
case Push = 'Push';
case Cover = 'Cover';
case Uncover = 'Uncover';
case Fade = 'Fade';
}

View File

@ -0,0 +1,10 @@
<?php
declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Name;
enum TrappedNameValue: string implements NameValue {
case TRUE = 'True';
case FALSE = 'False';
case UNKNOWN = 'Unknown';
}

View File

@ -0,0 +1,155 @@
<?php
declare(strict_types=1);
namespace PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Name;
use PrinsFrank\PdfParser\Document\Object\Decorator\Catalog;
use PrinsFrank\PdfParser\Document\Object\Decorator\DecoratedObject;
use PrinsFrank\PdfParser\Document\Object\Decorator\EmbeddedFile;
use PrinsFrank\PdfParser\Document\Object\Decorator\FileSpecification;
use PrinsFrank\PdfParser\Document\Object\Decorator\Font;
use PrinsFrank\PdfParser\Document\Object\Decorator\GenericObject;
use PrinsFrank\PdfParser\Document\Object\Decorator\Page;
use PrinsFrank\PdfParser\Document\Object\Decorator\Pages;
use PrinsFrank\PdfParser\Document\Object\Decorator\XObject;
enum TypeNameValue: string implements NameValue {
case _3_D = '3D';
case _3_D_ANIMATION_STYLE = '3DAnimationStyle';
case _3_D_B_G = '3DBG';
case _3_D_CROSS_SECTION = '3DCrossSection';
case _3_D_LIGHTING_SCHEME = '3DLightingScheme';
case _3_D_MEASURE = '3DMeasure';
case _3_D_NODE = '3DNode';
case _3_D_REF = '3DRef';
case _3_D_RENDER_MODE = '3DRenderMode';
case _3_D_VIEW = '3DView';
case ACTION = 'Action';
case ANNOT = 'Annot';
case BACKGROUND = 'Background';
case BEAD = 'Bead';
case BORDER = 'Border';
case C_I_D_FONT = 'CIDFont';
case C_MAP = 'CMap';
case CATALOG = 'Catalog';
case COLLECTION = 'Collection';
case COLLECTION_COLORS = 'CollectionColors';
case COLLECTION_FIELD = 'CollectionField';
case COLLECTION_ITEM = 'CollectionItem';
case COLLECTION_SCHEMA = 'CollectionSchema';
case COLLECTION_SORT = 'CollectionSort';
case COLLECTION_SPLIT = 'CollectionSplit';
case COLLECTION_SUB_ITEM = 'CollectionSubItem';
case CRYPT = 'Crypt';
case CRYPT_ALGORITHM = 'CryptAlgorithm';
case CRYPT_FILTER = 'CryptFilter';
case CRYPT_FILTER_DECODE_PARMS = 'CryptFilterDecodeParms';
case D_PART = 'DPart';
case D_PART_ROOT = 'DPartRoot';
case DEVELOPER_EXTENSIONS = 'DeveloperExtensions';
case DOC_TIME_STAMP = 'DocTimeStamp';
case DSS = 'DSS';
case EMBEDDED_FILE = 'EmbeddedFile';
case ENCODING = 'Encoding';
case ENCRYPTED_PAYLOAD = 'EncryptedPayload';
case EX_DATA = 'ExData';
case EXT_G_STATE = 'ExtGState';
case EXTENSIONS = 'Extensions';
case F_W_PARAMS = 'FWParams';
case FILE_SPEC = 'Filespec';
case FILL_SIGN_DATA = 'FillSignData';
case FIXED_PRINT = 'FixedPrint';
case FOLDER = 'Folder';
case FONT = 'Font';
case FONT_DESCRIPTOR = 'FontDescriptor';
case GEO_G_C_S = 'GEOGCS';
case GROUP = 'Group';
case HALF_TONE = 'Halftone';
case INLINE = 'Inline';
case LAYOUT = 'Layout';
case M_C_R = 'MCR';
case MARK_INFO = 'MarkInfo';
case MASK = 'Mask';
case MEASURE = 'Measure';
case MEDIA_CLIP = 'MediaClip';
case MEDIA_CRITERIA = 'MediaCriteria';
case MEDIA_DURATION = 'MediaDuration';
case MEDIA_OFFSET = 'MediaOffset';
case MEDIA_PERMISSIONS = 'MediaPermissions';
case MEDIA_PLAY_PARAMS = 'MediaPlayParams';
case MEDIA_PLAYER_INFO = 'MediaPlayerInfo';
case MEDIA_PLAYERS = 'MediaPlayers';
case MEDIA_SCREEN_PARAMS = 'MediaScreenParams';
case METADATA = 'Metadata';
case MIN_BIT_DEPTH = 'MinBitDepth';
case MIN_SCREEN_SIZE = 'MinScreenSize';
case NAMESPACE = 'Namespace';
case NAV_NODE = 'NavNode';
case NAVIGATOR = 'Navigator';
case NUMBER_FORMAT = 'NumberFormat';
case O_B_J_R = 'OBJR';
case O_C_G = 'OCG';
case O_C_M_D = 'OCMD';
case O_P_I = 'OPI';
case OBJ_STM = 'ObjStm';
case OUTLINES = 'Outlines';
case OUTPUT_INTENT = 'OutputIntent';
case PAGE = 'Page';
case PAGE_LABEL = 'PageLabel';
case PAGES = 'Pages';
case PAGINATION = 'Pagination';
case PATTERN = 'Pattern';
case PROJ_C_S = 'PROJCS';
case PT_DATA = 'PtData';
case RENDITION = 'Rendition';
case RESOURCE = 'Resource';
case REQ_HANDLER = 'ReqHandler';
case REQUIREMENT = 'Requirement';
case RICH_MEDIA_ACTIVATION = 'RichMediaActivation';
case RICH_MEDIA_ANIMATION = 'RichMediaAnimation';
case RICH_MEDIA_COMMAND = 'RichMediaCommand';
case RICH_MEDIA_CONFIGURATION = 'RichMediaConfiguration';
case RICH_MEDIA_CONTENT = 'RichMediaContent';
case RICH_MEDIA_DEACTIVATION = 'RichMediaDeactivation';
case RICH_MEDIA_INSTANCE = 'RichMediaInstance';
case RICH_MEDIA_POSITION = 'RichMediaPosition';
case RICH_MEDIA_PRESENTATION = 'RichMediaPresentation';
case RICH_MEDIA_SETTINGS = 'RichMediaSettings';
case RICH_MEDIA_WINDOW = 'RichMediaWindow';
case S_V = 'SV';
case S_V_CERT = 'SVCert';
case SIG = 'Sig';
case SIG_FIELD_LOCK = 'SigFieldLock';
case SIG_REF = 'SigRef';
case SLIDESHOW = 'Slideshow';
case SOFTWARE_IDENTIFIER = 'SoftwareIdentifier';
case SOUND = 'Sound';
case SPIDER_CONTENT_SET = 'SpiderContentSet';
case STREAM = 'Stream';
case STRUCT_ELEM = 'StructElem';
case STRUCT_TREE_ROOT = 'StructTreeRoot';
case TEMPLATE = 'Template';
case THREAD = 'Thread';
case TIMESPAN = 'Timespan';
case TRANS = 'Trans';
case TRANSFORM_PARAMS = 'TransformParams';
case VIEWER_PREFERENCES = 'ViewerPreferences';
case VIEWPORT = 'Viewport';
case VRI = 'VRI';
case X_OBJECT = 'XObject';
case X_REF = 'XRef';
/** @return class-string<DecoratedObject> */
public function getDecoratorFQN(): string {
return match($this) {
TypeNameValue::CATALOG => Catalog::class,
TypeNameValue::EMBEDDED_FILE => EmbeddedFile::class,
TypeNameValue::FILE_SPEC => FileSpecification::class,
TypeNameValue::FONT => Font::class,
TypeNameValue::PAGE => Page::class,
TypeNameValue::PAGES => Pages::class,
TypeNameValue::X_OBJECT => XObject::class,
default => GenericObject::class,
};
}
}

Some files were not shown because too many files have changed in this diff Show More