$contentsObjects * @throws ParseFailureException */ public static function parse(array $contentsObjects): ContentStream { $content = []; $inStringLiteral = $inResourceName = $inDictionary = false; $inArrayLevel = $inStringLevel = 0; $textObject = $previousChar = $secondToLastChar = $thirdToLastChar = $previousContentStream = $startPreviousOperandIndex = null; foreach ($contentsObjects as $contentsObject) { $startCurrentOperandIndex = 0; $contentStream = $contentsObject->getStream(); $contentStreamSize = $contentStream->getSizeInBytes(); for ($index = 0; $index < $contentStreamSize; $index++) { $char = $contentStream->read($index, 1); if ($inStringLiteral === true) { if ($char === ')' && $previousChar !== '\\') { $inStringLiteral = false; } } elseif ($inResourceName === true) { if (in_array($char, [' ', '<', '(', '/', "\r", "\n"], true) && $previousChar !== '\\') { $inResourceName = false; } } elseif ($inDictionary === true) { if ($char === '>' && $previousChar === '>' && $secondToLastChar !== '\\') { $inDictionary = false; } } elseif ($char === '[' && $previousChar !== '\\') { $inArrayLevel++; } elseif ($char === '<' && $previousChar === '<' && $secondToLastChar !== '\\') { $inDictionary = true; } elseif ($char === '<' && $previousChar !== '\\' && $contentStream->read($index + 1, 1) !== '<') { $inStringLevel++; } elseif ($char === '(' && $previousChar !== '\\') { $inStringLiteral = true; } elseif ($char === '/' && $previousChar !== '\\') { $inResourceName = true; } elseif ($inStringLevel > 0 || $inArrayLevel > 0) { if ($inStringLevel > 0 && $char === '>' && $previousChar !== '\\') { $inStringLevel--; } elseif ($inArrayLevel > 0 && $char === ']' && $previousChar !== '\\') { $inArrayLevel--; } } elseif ($char === 'T' && $previousChar === 'B') { // TextObjectOperator::BEGIN $startCurrentOperandIndex = $index + 1; $textObject = new TextObject(); } elseif ($char === 'T' && $previousChar === 'E') { // TextObjectOperator::END $startCurrentOperandIndex = $index + 1; if ($textObject === null) { throw new ParseFailureException('Encountered TextObjectOperator::END without preceding TextObjectOperator::BEGIN'); } $content[] = $textObject; $textObject = null; } elseif ($char === 'C' && (($secondToLastChar === 'B' && ($previousChar === 'M' || $previousChar === 'D')) || ($secondToLastChar === 'E' && $previousChar === 'M'))) { // MarkedContentOperator::BeginMarkedContent, MarkedContentOperator::EndMarkedContent, MarkedContentOperator::BeginMarkedContentWithProperties $startCurrentOperandIndex = $index + 1; } elseif (($operator = self::getOperator($char, $previousChar, $secondToLastChar, $thirdToLastChar)) !== null && (($nextChar = $contentStream->read($index + 1, 1)) === '' || self::getOperator($nextChar, $char, $previousChar, $secondToLastChar) === null)) { // Skip the current hit if the next iteration is also a valid operator $operands = ''; if ($previousContentStream !== null && $startPreviousOperandIndex !== null && $startPreviousOperandIndex < $previousContentStream->getSizeInBytes()) { $operands .= $previousContentStream->read($startPreviousOperandIndex, $previousContentStream->getSizeInBytes() - $startPreviousOperandIndex); $startPreviousOperandIndex = null; } if (($operandLength = $index + 1 - $startCurrentOperandIndex - strlen($operator->value)) > 0) { $operands .= $contentStream->read($startCurrentOperandIndex, $operandLength); } $command = new ContentStreamCommand($operator, trim($operands)); if ($textObject !== null) { $textObject->addContentStreamCommand($command); } else { $content[] = $command; } $startCurrentOperandIndex = $index + 1; } $thirdToLastChar = $secondToLastChar; $secondToLastChar = $previousChar; $previousChar = $char; } $previousContentStream = $contentStream; $startPreviousOperandIndex = $startCurrentOperandIndex; } return new ContentStream(...$content); } /** * This method uses three maps instead of calling $enum::tryFrom for all possible enums * as operator retrieval happens possibly millions of times in a single file */ public static function getOperator(string $currentChar, ?string $previousChar, ?string $secondToLastChar, ?string $thirdToLastChar): CompatibilityOperator|InlineImageOperator|MarkedContentOperator|TextObjectOperator|ClippingPathOperator|ColorOperator|GraphicsStateOperator|PathConstructionOperator|PathPaintingOperator|TextPositioningOperator|TextShowingOperator|TextStateOperator|Type3FontOperator|XObjectOperator|null { $threeLetterMatch = match ($secondToLastChar . $previousChar . $currentChar) { 'BMC' => MarkedContentOperator::BeginMarkedContent, 'BDC' => MarkedContentOperator::BeginMarkedContentWithProperties, 'EMC' => MarkedContentOperator::EndMarkedContent, 'SCN' => ColorOperator::SetStrokingParams, 'scn' => ColorOperator::SetColorParams, default => null, }; if ($threeLetterMatch !== null) { return in_array($thirdToLastChar, ['\\', '/'], true) ? null : $threeLetterMatch; } $twoLetterMatch = match ($previousChar . $currentChar) { 'BX' => CompatibilityOperator::BeginCompatibilitySection, 'EX' => CompatibilityOperator::EndCompatibilitySection, 'BI' => InlineImageOperator::Begin, 'ID' => InlineImageOperator::BeginImageData, 'EI' => InlineImageOperator::End, 'MD' => MarkedContentOperator::Tag, 'DP' => MarkedContentOperator::TagProperties, 'BT' => TextObjectOperator::BEGIN, 'ET' => TextObjectOperator::END, 'W*' => ClippingPathOperator::INTERSECT_EVEN_ODD, 'CS' => ColorOperator::SetName, 'cs' => ColorOperator::SetNameNonStroking, 'SC' => ColorOperator::SetStrokingColor, 'sc' => ColorOperator::SetColor, 'RG' => ColorOperator::SetStrokingColorDeviceRGB, 'rg' => ColorOperator::SetColorDeviceRGB, 'cm' => GraphicsStateOperator::ModifyCurrentTransformationMatrix, 'ri' => GraphicsStateOperator::SetIntent, 'gs' => GraphicsStateOperator::SetDictName, 're' => PathConstructionOperator::RECTANGLE, 'f*' => PathPaintingOperator::FILL_EVEN_ODD, 'B*' => PathPaintingOperator::FILL_STROKE_EVEN_ODD, 'b*' => PathPaintingOperator::CLOSE_FILL_STROKE, 'Td' => TextPositioningOperator::MOVE_OFFSET, 'TD' => TextPositioningOperator::MOVE_OFFSET_LEADING, 'Tm' => TextPositioningOperator::SET_MATRIX, 'T*' => TextPositioningOperator::NEXT_LINE, 'Tj' => TextShowingOperator::SHOW, 'TJ' => TextShowingOperator::SHOW_ARRAY, 'Tc' => TextStateOperator::CHAR_SPACE, 'Tw' => TextStateOperator::WORD_SPACE, 'Tz' => TextStateOperator::SCALE, 'TL' => TextStateOperator::LEADING, 'Tf' => TextStateOperator::FONT_SIZE, 'Tr' => TextStateOperator::RENDER, 'Ts' => TextStateOperator::RISE, 'd0' => Type3FontOperator::SetWidth, 'd1' => Type3FontOperator::SetWidthAndBoundingBox, 'Do' => XObjectOperator::Paint, default => null, }; if ($twoLetterMatch !== null) { return in_array($secondToLastChar, ['\\', '/'], true) ? null : $twoLetterMatch; } $oneLetterMatch = match ($currentChar) { 'W' => ClippingPathOperator::INTERSECT, 'G' => ColorOperator::SetStrokingColorSpace, 'g' => ColorOperator::SetColorSpace, 'K' => ColorOperator::SetStrokingColorDeviceCMYK, 'k' => ColorOperator::SetColorDeviceCMYK, 'q' => GraphicsStateOperator::SaveCurrentStateToStack, 'Q' => GraphicsStateOperator::RestoreMostRecentStateFromStack, 'w' => GraphicsStateOperator::SetLineWidth, 'J' => GraphicsStateOperator::SetLineCap, 'j' => GraphicsStateOperator::SetLineJoin, 'M' => GraphicsStateOperator::SetMiterJoin, 'd' => GraphicsStateOperator::SetLineDash, 'i' => GraphicsStateOperator::SetFlatness, 'm' => PathConstructionOperator::MOVE, 'l' => PathConstructionOperator::LINE, 'c' => PathConstructionOperator::CURVE_BEZIER_123, 'v' => PathConstructionOperator::CURVE_BEZIER_23, 'y' => PathConstructionOperator::CURVE_BEZIER_13, 'h' => PathConstructionOperator::CLOSE, 'S' => PathPaintingOperator::STROKE, 's' => PathPaintingOperator::CLOSE_STROKE, 'f' => PathPaintingOperator::FILL, 'F' => PathPaintingOperator::FILL_DEPRECATED, 'B' => PathPaintingOperator::FILL_STROKE, 'n' => PathPaintingOperator::END, '\'' => TextShowingOperator::MOVE_SHOW, '"' => TextShowingOperator::MOVE_SHOW_SPACING, default => null, }; if ($oneLetterMatch !== null) { return in_array($previousChar, ['\\', '/'], true) ? null : $oneLetterMatch; } return null; } }