diff --git a/admin/edit_kb_document.php b/admin/edit_kb_document.php index 4161e66..abbcb6d 100644 --- a/admin/edit_kb_document.php +++ b/admin/edit_kb_document.php @@ -1,5 +1,9 @@ parseFile($target_path); + $file_content = $pdf->getText(); } } } if ($id) { - $stmt = db()->prepare("UPDATE kb_documents SET title = ?, content = ?, tags = ?, product_id = ?, language = ?, is_active = ?, file_path = ? WHERE id = ?"); - $stmt->execute([$title, $content, $tags, $product_id, $language, $is_active, $file_path, $id]); + $stmt = db()->prepare("UPDATE kb_documents SET title = ?, content = ?, tags = ?, product_id = ?, language = ?, is_active = ?, file_path = ?, file_content = ? WHERE id = ?"); + $stmt->execute([$title, $content, $tags, $product_id, $language, $is_active, $file_path, $file_content, $id]); } else { - $stmt = db()->prepare("INSERT INTO kb_documents (title, content, tags, product_id, language, is_active, file_path) VALUES (?, ?, ?, ?, ?, ?, ?)"); - $stmt->execute([$title, $content, $tags, $product_id, $language, $is_active, $file_path]); + $stmt = db()->prepare("INSERT INTO kb_documents (title, content, tags, product_id, language, is_active, file_path, file_content) VALUES (?, ?, ?, ?, ?, ?, ?, ?)"); + $stmt->execute([$title, $content, $tags, $product_id, $language, $is_active, $file_path, $file_content]); } header('Location: kb_documents.php'); @@ -160,4 +170,4 @@ require_once __DIR__ . '/../includes/html_head.php'; - + \ No newline at end of file diff --git a/admin/order_details_error.log b/admin/order_details_error.log new file mode 100644 index 0000000..4aef25d --- /dev/null +++ b/admin/order_details_error.log @@ -0,0 +1 @@ +[09-Jan-2026 07:03:16 UTC] PHP Warning: Undefined variable $pageTitle in /home/ubuntu/executor/workspace/admin/order_details.php on line 133 diff --git a/chat_send.php b/chat_send.php index caf4789..57321bb 100644 --- a/chat_send.php +++ b/chat_send.php @@ -26,7 +26,7 @@ function search_kb($message) { $sql = "SELECT * FROM kb_documents WHERE is_active = 1 AND ("; $conditions = []; foreach ($terms as $term) { - $conditions[] = "title LIKE ? OR content LIKE ?"; + $conditions[] = "title LIKE ? OR content LIKE ? OR file_content LIKE ?"; } $sql .= implode(' OR ', $conditions) . ") LIMIT 3"; @@ -35,6 +35,7 @@ function search_kb($message) { foreach ($terms as $term) { $params[] = '%' . $term . '%'; $params[] = '%' . $term . '%'; + $params[] = '%' . $term . '%'; } $stmt->execute($params); return $stmt->fetchAll(); @@ -62,6 +63,9 @@ if (!empty($kb_documents)) { foreach ($kb_documents as $doc) { $system_prompt .= "- Title: " . $doc['title'] . "\n"; $system_prompt .= " Content: " . $doc['content'] . "\n"; + if (!empty($doc['file_content'])) { + $system_prompt .= " File Content: " . $doc['file_content'] . "\n"; + } } } diff --git a/db/migrations/036_add_file_content_to_kb_documents.sql b/db/migrations/036_add_file_content_to_kb_documents.sql new file mode 100644 index 0000000..0db133d --- /dev/null +++ b/db/migrations/036_add_file_content_to_kb_documents.sql @@ -0,0 +1 @@ +ALTER TABLE `kb_documents` ADD `file_content` TEXT NULL; \ No newline at end of file diff --git a/debug_price.log b/debug_price.log index 9b1b8de..60250af 100644 --- a/debug_price.log +++ b/debug_price.log @@ -12099,3 +12099,1099 @@ Product price query executed. Found: {"price_net":"233.20","price_gross":"286.84 Found product price. Net: 233.2, Gross: 286.84 FINAL: Returning Net: 233.2, Gross: 286.84 --- +--- +START getEffectivePrice for product 1, client 1 +Client price query executed. Found: {"price_net":"837.40","price_gross":"1030.00"} +Found client price. Net: 837.4, Gross: 1030 +FINAL: Returning Net: 837.4, Gross: 1030 +--- +--- +START getEffectivePrice for product 2, client 1 +Client price query executed. Found: {"price_net":"1056.91","price_gross":"1300.00"} +Found client price. Net: 1056.91, Gross: 1300 +FINAL: Returning Net: 1056.91, Gross: 1300 +--- +--- +START getEffectivePrice for product 3, client 1 +Client price query executed. Found: {"price_net":"32.52","price_gross":"40.00"} +Found client price. Net: 32.52, Gross: 40 +FINAL: Returning Net: 32.52, Gross: 40 +--- +--- +START getEffectivePrice for product 4, client 1 +Client price query executed. Found: No +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"9.95","price_gross":"12.24"} +Found product price. Net: 9.95, Gross: 12.24 +FINAL: Returning Net: 9.95, Gross: 12.24 +--- +--- +START getEffectivePrice for product 5, client 1 +Client price query executed. Found: No +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"68.00","price_gross":"83.64"} +Found product price. Net: 68, Gross: 83.64 +FINAL: Returning Net: 68, Gross: 83.64 +--- +--- +START getEffectivePrice for product 6, client 1 +Client price query executed. Found: No +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"171.60","price_gross":"211.07"} +Found product price. Net: 171.6, Gross: 211.07 +FINAL: Returning Net: 171.6, Gross: 211.07 +--- +--- +START getEffectivePrice for product 7, client 1 +Client price query executed. Found: No +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"233.20","price_gross":"286.84"} +Found product price. Net: 233.2, Gross: 286.84 +FINAL: Returning Net: 233.2, Gross: 286.84 +--- +--- +START getEffectivePrice for product 1, client 1 +Client price query executed. Found: {"price_net":"837.40","price_gross":"1030.00"} +Found client price. Net: 837.4, Gross: 1030 +FINAL: Returning Net: 837.4, Gross: 1030 +--- +--- +START getEffectivePrice for product 2, client 1 +Client price query executed. Found: {"price_net":"1056.91","price_gross":"1300.00"} +Found client price. Net: 1056.91, Gross: 1300 +FINAL: Returning Net: 1056.91, Gross: 1300 +--- +--- +START getEffectivePrice for product 3, client 1 +Client price query executed. Found: {"price_net":"32.52","price_gross":"40.00"} +Found client price. Net: 32.52, Gross: 40 +FINAL: Returning Net: 32.52, Gross: 40 +--- +--- +START getEffectivePrice for product 4, client 1 +Client price query executed. Found: No +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"9.95","price_gross":"12.24"} +Found product price. Net: 9.95, Gross: 12.24 +FINAL: Returning Net: 9.95, Gross: 12.24 +--- +--- +START getEffectivePrice for product 5, client 1 +Client price query executed. Found: No +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"68.00","price_gross":"83.64"} +Found product price. Net: 68, Gross: 83.64 +FINAL: Returning Net: 68, Gross: 83.64 +--- +--- +START getEffectivePrice for product 6, client 1 +Client price query executed. Found: No +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"171.60","price_gross":"211.07"} +Found product price. Net: 171.6, Gross: 211.07 +FINAL: Returning Net: 171.6, Gross: 211.07 +--- +--- +START getEffectivePrice for product 7, client 1 +Client price query executed. Found: No +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"233.20","price_gross":"286.84"} +Found product price. Net: 233.2, Gross: 286.84 +FINAL: Returning Net: 233.2, Gross: 286.84 +--- +--- +START getEffectivePrice for product 2, client 1 +Client price query executed. Found: {"price_net":"1056.91","price_gross":"1300.00"} +Found client price. Net: 1056.91, Gross: 1300 +FINAL: Returning Net: 1056.91, Gross: 1300 +--- +--- +START getEffectivePrice for product 3, client 1 +Client price query executed. Found: {"price_net":"32.52","price_gross":"40.00"} +Found client price. Net: 32.52, Gross: 40 +FINAL: Returning Net: 32.52, Gross: 40 +--- +--- +START getEffectivePrice for product 4, client 1 +Client price query executed. Found: No +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"9.95","price_gross":"12.24"} +Found product price. Net: 9.95, Gross: 12.24 +FINAL: Returning Net: 9.95, Gross: 12.24 +--- +--- +START getEffectivePrice for product 5, client 1 +Client price query executed. Found: No +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"68.00","price_gross":"83.64"} +Found product price. Net: 68, Gross: 83.64 +FINAL: Returning Net: 68, Gross: 83.64 +--- +--- +START getEffectivePrice for product 6, client 1 +Client price query executed. Found: No +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"171.60","price_gross":"211.07"} +Found product price. Net: 171.6, Gross: 211.07 +FINAL: Returning Net: 171.6, Gross: 211.07 +--- +--- +START getEffectivePrice for product 7, client 1 +Client price query executed. Found: No +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"233.20","price_gross":"286.84"} +Found product price. Net: 233.2, Gross: 286.84 +FINAL: Returning Net: 233.2, Gross: 286.84 +--- +--- +START getEffectivePrice for product 2, client 1 +Client price query executed. Found: {"price_net":"1056.91","price_gross":"1300.00"} +Found client price. Net: 1056.91, Gross: 1300 +FINAL: Returning Net: 1056.91, Gross: 1300 +--- +--- +START getEffectivePrice for product 2, client 1 +Client price query executed. Found: {"price_net":"1056.91","price_gross":"1300.00"} +Found client price. Net: 1056.91, Gross: 1300 +FINAL: Returning Net: 1056.91, Gross: 1300 +--- +--- +START getEffectivePrice for product 2, client 1 +Client price query executed. Found: {"price_net":"1056.91","price_gross":"1300.00"} +Found client price. Net: 1056.91, Gross: 1300 +FINAL: Returning Net: 1056.91, Gross: 1300 +--- +--- +START getEffectivePrice for product 1, client 1 +Client price query executed. Found: {"price_net":"837.40","price_gross":"1030.00"} +Found client price. Net: 837.4, Gross: 1030 +FINAL: Returning Net: 837.4, Gross: 1030 +--- +--- +START getEffectivePrice for product 2, client 1 +Client price query executed. Found: {"price_net":"1056.91","price_gross":"1300.00"} +Found client price. Net: 1056.91, Gross: 1300 +FINAL: Returning Net: 1056.91, Gross: 1300 +--- +--- +START getEffectivePrice for product 3, client 1 +Client price query executed. Found: {"price_net":"32.52","price_gross":"40.00"} +Found client price. Net: 32.52, Gross: 40 +FINAL: Returning Net: 32.52, Gross: 40 +--- +--- +START getEffectivePrice for product 4, client 1 +Client price query executed. Found: No +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"9.95","price_gross":"12.24"} +Found product price. Net: 9.95, Gross: 12.24 +FINAL: Returning Net: 9.95, Gross: 12.24 +--- +--- +START getEffectivePrice for product 5, client 1 +Client price query executed. Found: No +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"68.00","price_gross":"83.64"} +Found product price. Net: 68, Gross: 83.64 +FINAL: Returning Net: 68, Gross: 83.64 +--- +--- +START getEffectivePrice for product 6, client 1 +Client price query executed. Found: No +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"171.60","price_gross":"211.07"} +Found product price. Net: 171.6, Gross: 211.07 +FINAL: Returning Net: 171.6, Gross: 211.07 +--- +--- +START getEffectivePrice for product 7, client 1 +Client price query executed. Found: No +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"233.20","price_gross":"286.84"} +Found product price. Net: 233.2, Gross: 286.84 +FINAL: Returning Net: 233.2, Gross: 286.84 +--- +--- +START getEffectivePrice for product 1, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"1111.00","price_gross":"1366.53"} +Found product price. Net: 1111, Gross: 1366.53 +FINAL: Returning Net: 1111, Gross: 1366.53 +--- +--- +START getEffectivePrice for product 2, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"1318.05","price_gross":"1621.20"} +Found product price. Net: 1318.05, Gross: 1621.2 +FINAL: Returning Net: 1318.05, Gross: 1621.2 +--- +--- +START getEffectivePrice for product 3, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"9.95","price_gross":"12.24"} +Found product price. Net: 9.95, Gross: 12.24 +FINAL: Returning Net: 9.95, Gross: 12.24 +--- +--- +START getEffectivePrice for product 4, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"9.95","price_gross":"12.24"} +Found product price. Net: 9.95, Gross: 12.24 +FINAL: Returning Net: 9.95, Gross: 12.24 +--- +--- +START getEffectivePrice for product 5, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"68.00","price_gross":"83.64"} +Found product price. Net: 68, Gross: 83.64 +FINAL: Returning Net: 68, Gross: 83.64 +--- +--- +START getEffectivePrice for product 6, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"171.60","price_gross":"211.07"} +Found product price. Net: 171.6, Gross: 211.07 +FINAL: Returning Net: 171.6, Gross: 211.07 +--- +--- +START getEffectivePrice for product 7, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"233.20","price_gross":"286.84"} +Found product price. Net: 233.2, Gross: 286.84 +FINAL: Returning Net: 233.2, Gross: 286.84 +--- +--- +START getEffectivePrice for product 1, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"1111.00","price_gross":"1366.53"} +Found product price. Net: 1111, Gross: 1366.53 +FINAL: Returning Net: 1111, Gross: 1366.53 +--- +--- +START getEffectivePrice for product 2, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"1318.05","price_gross":"1621.20"} +Found product price. Net: 1318.05, Gross: 1621.2 +FINAL: Returning Net: 1318.05, Gross: 1621.2 +--- +--- +START getEffectivePrice for product 3, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"9.95","price_gross":"12.24"} +Found product price. Net: 9.95, Gross: 12.24 +FINAL: Returning Net: 9.95, Gross: 12.24 +--- +--- +START getEffectivePrice for product 4, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"9.95","price_gross":"12.24"} +Found product price. Net: 9.95, Gross: 12.24 +FINAL: Returning Net: 9.95, Gross: 12.24 +--- +--- +START getEffectivePrice for product 5, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"68.00","price_gross":"83.64"} +Found product price. Net: 68, Gross: 83.64 +FINAL: Returning Net: 68, Gross: 83.64 +--- +--- +START getEffectivePrice for product 6, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"171.60","price_gross":"211.07"} +Found product price. Net: 171.6, Gross: 211.07 +FINAL: Returning Net: 171.6, Gross: 211.07 +--- +--- +START getEffectivePrice for product 7, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"233.20","price_gross":"286.84"} +Found product price. Net: 233.2, Gross: 286.84 +FINAL: Returning Net: 233.2, Gross: 286.84 +--- +--- +START getEffectivePrice for product 1, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"1111.00","price_gross":"1366.53"} +Found product price. Net: 1111, Gross: 1366.53 +FINAL: Returning Net: 1111, Gross: 1366.53 +--- +--- +START getEffectivePrice for product 2, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"1318.05","price_gross":"1621.20"} +Found product price. Net: 1318.05, Gross: 1621.2 +FINAL: Returning Net: 1318.05, Gross: 1621.2 +--- +--- +START getEffectivePrice for product 3, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"9.95","price_gross":"12.24"} +Found product price. Net: 9.95, Gross: 12.24 +FINAL: Returning Net: 9.95, Gross: 12.24 +--- +--- +START getEffectivePrice for product 4, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"9.95","price_gross":"12.24"} +Found product price. Net: 9.95, Gross: 12.24 +FINAL: Returning Net: 9.95, Gross: 12.24 +--- +--- +START getEffectivePrice for product 5, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"68.00","price_gross":"83.64"} +Found product price. Net: 68, Gross: 83.64 +FINAL: Returning Net: 68, Gross: 83.64 +--- +--- +START getEffectivePrice for product 6, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"171.60","price_gross":"211.07"} +Found product price. Net: 171.6, Gross: 211.07 +FINAL: Returning Net: 171.6, Gross: 211.07 +--- +--- +START getEffectivePrice for product 7, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"233.20","price_gross":"286.84"} +Found product price. Net: 233.2, Gross: 286.84 +FINAL: Returning Net: 233.2, Gross: 286.84 +--- +--- +START getEffectivePrice for product 1, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"1111.00","price_gross":"1366.53"} +Found product price. Net: 1111, Gross: 1366.53 +FINAL: Returning Net: 1111, Gross: 1366.53 +--- +--- +START getEffectivePrice for product 2, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"1318.05","price_gross":"1621.20"} +Found product price. Net: 1318.05, Gross: 1621.2 +FINAL: Returning Net: 1318.05, Gross: 1621.2 +--- +--- +START getEffectivePrice for product 3, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"9.95","price_gross":"12.24"} +Found product price. Net: 9.95, Gross: 12.24 +FINAL: Returning Net: 9.95, Gross: 12.24 +--- +--- +START getEffectivePrice for product 4, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"9.95","price_gross":"12.24"} +Found product price. Net: 9.95, Gross: 12.24 +FINAL: Returning Net: 9.95, Gross: 12.24 +--- +--- +START getEffectivePrice for product 5, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"68.00","price_gross":"83.64"} +Found product price. Net: 68, Gross: 83.64 +FINAL: Returning Net: 68, Gross: 83.64 +--- +--- +START getEffectivePrice for product 6, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"171.60","price_gross":"211.07"} +Found product price. Net: 171.6, Gross: 211.07 +FINAL: Returning Net: 171.6, Gross: 211.07 +--- +--- +START getEffectivePrice for product 7, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"233.20","price_gross":"286.84"} +Found product price. Net: 233.2, Gross: 286.84 +FINAL: Returning Net: 233.2, Gross: 286.84 +--- +--- +START getEffectivePrice for product 1, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"1111.00","price_gross":"1366.53"} +Found product price. Net: 1111, Gross: 1366.53 +FINAL: Returning Net: 1111, Gross: 1366.53 +--- +--- +START getEffectivePrice for product 2, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"1318.05","price_gross":"1621.20"} +Found product price. Net: 1318.05, Gross: 1621.2 +FINAL: Returning Net: 1318.05, Gross: 1621.2 +--- +--- +START getEffectivePrice for product 3, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"9.95","price_gross":"12.24"} +Found product price. Net: 9.95, Gross: 12.24 +FINAL: Returning Net: 9.95, Gross: 12.24 +--- +--- +START getEffectivePrice for product 4, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"9.95","price_gross":"12.24"} +Found product price. Net: 9.95, Gross: 12.24 +FINAL: Returning Net: 9.95, Gross: 12.24 +--- +--- +START getEffectivePrice for product 5, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"68.00","price_gross":"83.64"} +Found product price. Net: 68, Gross: 83.64 +FINAL: Returning Net: 68, Gross: 83.64 +--- +--- +START getEffectivePrice for product 6, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"171.60","price_gross":"211.07"} +Found product price. Net: 171.6, Gross: 211.07 +FINAL: Returning Net: 171.6, Gross: 211.07 +--- +--- +START getEffectivePrice for product 7, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"233.20","price_gross":"286.84"} +Found product price. Net: 233.2, Gross: 286.84 +FINAL: Returning Net: 233.2, Gross: 286.84 +--- +--- +START getEffectivePrice for product 1, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"1111.00","price_gross":"1366.53"} +Found product price. Net: 1111, Gross: 1366.53 +FINAL: Returning Net: 1111, Gross: 1366.53 +--- +--- +START getEffectivePrice for product 2, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"1318.05","price_gross":"1621.20"} +Found product price. Net: 1318.05, Gross: 1621.2 +FINAL: Returning Net: 1318.05, Gross: 1621.2 +--- +--- +START getEffectivePrice for product 3, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"9.95","price_gross":"12.24"} +Found product price. Net: 9.95, Gross: 12.24 +FINAL: Returning Net: 9.95, Gross: 12.24 +--- +--- +START getEffectivePrice for product 4, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"9.95","price_gross":"12.24"} +Found product price. Net: 9.95, Gross: 12.24 +FINAL: Returning Net: 9.95, Gross: 12.24 +--- +--- +START getEffectivePrice for product 5, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"68.00","price_gross":"83.64"} +Found product price. Net: 68, Gross: 83.64 +FINAL: Returning Net: 68, Gross: 83.64 +--- +--- +START getEffectivePrice for product 6, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"171.60","price_gross":"211.07"} +Found product price. Net: 171.6, Gross: 211.07 +FINAL: Returning Net: 171.6, Gross: 211.07 +--- +--- +START getEffectivePrice for product 7, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"233.20","price_gross":"286.84"} +Found product price. Net: 233.2, Gross: 286.84 +FINAL: Returning Net: 233.2, Gross: 286.84 +--- +--- +START getEffectivePrice for product 1, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"1111.00","price_gross":"1366.53"} +Found product price. Net: 1111, Gross: 1366.53 +FINAL: Returning Net: 1111, Gross: 1366.53 +--- +--- +START getEffectivePrice for product 2, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"1318.05","price_gross":"1621.20"} +Found product price. Net: 1318.05, Gross: 1621.2 +FINAL: Returning Net: 1318.05, Gross: 1621.2 +--- +--- +START getEffectivePrice for product 3, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"9.95","price_gross":"12.24"} +Found product price. Net: 9.95, Gross: 12.24 +FINAL: Returning Net: 9.95, Gross: 12.24 +--- +--- +START getEffectivePrice for product 4, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"9.95","price_gross":"12.24"} +Found product price. Net: 9.95, Gross: 12.24 +FINAL: Returning Net: 9.95, Gross: 12.24 +--- +--- +START getEffectivePrice for product 5, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"68.00","price_gross":"83.64"} +Found product price. Net: 68, Gross: 83.64 +FINAL: Returning Net: 68, Gross: 83.64 +--- +--- +START getEffectivePrice for product 6, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"171.60","price_gross":"211.07"} +Found product price. Net: 171.6, Gross: 211.07 +FINAL: Returning Net: 171.6, Gross: 211.07 +--- +--- +START getEffectivePrice for product 7, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"233.20","price_gross":"286.84"} +Found product price. Net: 233.2, Gross: 286.84 +FINAL: Returning Net: 233.2, Gross: 286.84 +--- +--- +START getEffectivePrice for product 1, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"1111.00","price_gross":"1366.53"} +Found product price. Net: 1111, Gross: 1366.53 +FINAL: Returning Net: 1111, Gross: 1366.53 +--- +--- +START getEffectivePrice for product 2, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"1318.05","price_gross":"1621.20"} +Found product price. Net: 1318.05, Gross: 1621.2 +FINAL: Returning Net: 1318.05, Gross: 1621.2 +--- +--- +START getEffectivePrice for product 3, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"9.95","price_gross":"12.24"} +Found product price. Net: 9.95, Gross: 12.24 +FINAL: Returning Net: 9.95, Gross: 12.24 +--- +--- +START getEffectivePrice for product 4, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"9.95","price_gross":"12.24"} +Found product price. Net: 9.95, Gross: 12.24 +FINAL: Returning Net: 9.95, Gross: 12.24 +--- +--- +START getEffectivePrice for product 5, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"68.00","price_gross":"83.64"} +Found product price. Net: 68, Gross: 83.64 +FINAL: Returning Net: 68, Gross: 83.64 +--- +--- +START getEffectivePrice for product 6, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"171.60","price_gross":"211.07"} +Found product price. Net: 171.6, Gross: 211.07 +FINAL: Returning Net: 171.6, Gross: 211.07 +--- +--- +START getEffectivePrice for product 7, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"233.20","price_gross":"286.84"} +Found product price. Net: 233.2, Gross: 286.84 +FINAL: Returning Net: 233.2, Gross: 286.84 +--- +--- +START getEffectivePrice for product 1, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"1111.00","price_gross":"1366.53"} +Found product price. Net: 1111, Gross: 1366.53 +FINAL: Returning Net: 1111, Gross: 1366.53 +--- +--- +START getEffectivePrice for product 2, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"1318.05","price_gross":"1621.20"} +Found product price. Net: 1318.05, Gross: 1621.2 +FINAL: Returning Net: 1318.05, Gross: 1621.2 +--- +--- +START getEffectivePrice for product 3, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"9.95","price_gross":"12.24"} +Found product price. Net: 9.95, Gross: 12.24 +FINAL: Returning Net: 9.95, Gross: 12.24 +--- +--- +START getEffectivePrice for product 4, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"9.95","price_gross":"12.24"} +Found product price. Net: 9.95, Gross: 12.24 +FINAL: Returning Net: 9.95, Gross: 12.24 +--- +--- +START getEffectivePrice for product 5, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"68.00","price_gross":"83.64"} +Found product price. Net: 68, Gross: 83.64 +FINAL: Returning Net: 68, Gross: 83.64 +--- +--- +START getEffectivePrice for product 6, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"171.60","price_gross":"211.07"} +Found product price. Net: 171.6, Gross: 211.07 +FINAL: Returning Net: 171.6, Gross: 211.07 +--- +--- +START getEffectivePrice for product 7, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"233.20","price_gross":"286.84"} +Found product price. Net: 233.2, Gross: 286.84 +FINAL: Returning Net: 233.2, Gross: 286.84 +--- +--- +START getEffectivePrice for product 1, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"1111.00","price_gross":"1366.53"} +Found product price. Net: 1111, Gross: 1366.53 +FINAL: Returning Net: 1111, Gross: 1366.53 +--- +--- +START getEffectivePrice for product 2, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"1318.05","price_gross":"1621.20"} +Found product price. Net: 1318.05, Gross: 1621.2 +FINAL: Returning Net: 1318.05, Gross: 1621.2 +--- +--- +START getEffectivePrice for product 3, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"9.95","price_gross":"12.24"} +Found product price. Net: 9.95, Gross: 12.24 +FINAL: Returning Net: 9.95, Gross: 12.24 +--- +--- +START getEffectivePrice for product 4, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"9.95","price_gross":"12.24"} +Found product price. Net: 9.95, Gross: 12.24 +FINAL: Returning Net: 9.95, Gross: 12.24 +--- +--- +START getEffectivePrice for product 5, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"68.00","price_gross":"83.64"} +Found product price. Net: 68, Gross: 83.64 +FINAL: Returning Net: 68, Gross: 83.64 +--- +--- +START getEffectivePrice for product 6, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"171.60","price_gross":"211.07"} +Found product price. Net: 171.6, Gross: 211.07 +FINAL: Returning Net: 171.6, Gross: 211.07 +--- +--- +START getEffectivePrice for product 7, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"233.20","price_gross":"286.84"} +Found product price. Net: 233.2, Gross: 286.84 +FINAL: Returning Net: 233.2, Gross: 286.84 +--- +--- +START getEffectivePrice for product 1, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"1111.00","price_gross":"1366.53"} +Found product price. Net: 1111, Gross: 1366.53 +FINAL: Returning Net: 1111, Gross: 1366.53 +--- +--- +START getEffectivePrice for product 2, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"1318.05","price_gross":"1621.20"} +Found product price. Net: 1318.05, Gross: 1621.2 +FINAL: Returning Net: 1318.05, Gross: 1621.2 +--- +--- +START getEffectivePrice for product 3, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"9.95","price_gross":"12.24"} +Found product price. Net: 9.95, Gross: 12.24 +FINAL: Returning Net: 9.95, Gross: 12.24 +--- +--- +START getEffectivePrice for product 4, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"9.95","price_gross":"12.24"} +Found product price. Net: 9.95, Gross: 12.24 +FINAL: Returning Net: 9.95, Gross: 12.24 +--- +--- +START getEffectivePrice for product 5, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"68.00","price_gross":"83.64"} +Found product price. Net: 68, Gross: 83.64 +FINAL: Returning Net: 68, Gross: 83.64 +--- +--- +START getEffectivePrice for product 6, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"171.60","price_gross":"211.07"} +Found product price. Net: 171.6, Gross: 211.07 +FINAL: Returning Net: 171.6, Gross: 211.07 +--- +--- +START getEffectivePrice for product 7, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"233.20","price_gross":"286.84"} +Found product price. Net: 233.2, Gross: 286.84 +FINAL: Returning Net: 233.2, Gross: 286.84 +--- +--- +START getEffectivePrice for product 1, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"1111.00","price_gross":"1366.53"} +Found product price. Net: 1111, Gross: 1366.53 +FINAL: Returning Net: 1111, Gross: 1366.53 +--- +--- +START getEffectivePrice for product 2, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"1318.05","price_gross":"1621.20"} +Found product price. Net: 1318.05, Gross: 1621.2 +FINAL: Returning Net: 1318.05, Gross: 1621.2 +--- +--- +START getEffectivePrice for product 3, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"9.95","price_gross":"12.24"} +Found product price. Net: 9.95, Gross: 12.24 +FINAL: Returning Net: 9.95, Gross: 12.24 +--- +--- +START getEffectivePrice for product 4, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"9.95","price_gross":"12.24"} +Found product price. Net: 9.95, Gross: 12.24 +FINAL: Returning Net: 9.95, Gross: 12.24 +--- +--- +START getEffectivePrice for product 5, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"68.00","price_gross":"83.64"} +Found product price. Net: 68, Gross: 83.64 +FINAL: Returning Net: 68, Gross: 83.64 +--- +--- +START getEffectivePrice for product 6, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"171.60","price_gross":"211.07"} +Found product price. Net: 171.6, Gross: 211.07 +FINAL: Returning Net: 171.6, Gross: 211.07 +--- +--- +START getEffectivePrice for product 7, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"233.20","price_gross":"286.84"} +Found product price. Net: 233.2, Gross: 286.84 +FINAL: Returning Net: 233.2, Gross: 286.84 +--- +--- +START getEffectivePrice for product 1, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"1111.00","price_gross":"1366.53"} +Found product price. Net: 1111, Gross: 1366.53 +FINAL: Returning Net: 1111, Gross: 1366.53 +--- +--- +START getEffectivePrice for product 2, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"1318.05","price_gross":"1621.20"} +Found product price. Net: 1318.05, Gross: 1621.2 +FINAL: Returning Net: 1318.05, Gross: 1621.2 +--- +--- +START getEffectivePrice for product 3, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"9.95","price_gross":"12.24"} +Found product price. Net: 9.95, Gross: 12.24 +FINAL: Returning Net: 9.95, Gross: 12.24 +--- +--- +START getEffectivePrice for product 4, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"9.95","price_gross":"12.24"} +Found product price. Net: 9.95, Gross: 12.24 +FINAL: Returning Net: 9.95, Gross: 12.24 +--- +--- +START getEffectivePrice for product 5, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"68.00","price_gross":"83.64"} +Found product price. Net: 68, Gross: 83.64 +FINAL: Returning Net: 68, Gross: 83.64 +--- +--- +START getEffectivePrice for product 6, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"171.60","price_gross":"211.07"} +Found product price. Net: 171.6, Gross: 211.07 +FINAL: Returning Net: 171.6, Gross: 211.07 +--- +--- +START getEffectivePrice for product 7, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"233.20","price_gross":"286.84"} +Found product price. Net: 233.2, Gross: 286.84 +FINAL: Returning Net: 233.2, Gross: 286.84 +--- +--- +START getEffectivePrice for product 1, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"1111.00","price_gross":"1366.53"} +Found product price. Net: 1111, Gross: 1366.53 +FINAL: Returning Net: 1111, Gross: 1366.53 +--- +--- +START getEffectivePrice for product 2, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"1318.05","price_gross":"1621.20"} +Found product price. Net: 1318.05, Gross: 1621.2 +FINAL: Returning Net: 1318.05, Gross: 1621.2 +--- +--- +START getEffectivePrice for product 3, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"9.95","price_gross":"12.24"} +Found product price. Net: 9.95, Gross: 12.24 +FINAL: Returning Net: 9.95, Gross: 12.24 +--- +--- +START getEffectivePrice for product 4, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"9.95","price_gross":"12.24"} +Found product price. Net: 9.95, Gross: 12.24 +FINAL: Returning Net: 9.95, Gross: 12.24 +--- +--- +START getEffectivePrice for product 5, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"68.00","price_gross":"83.64"} +Found product price. Net: 68, Gross: 83.64 +FINAL: Returning Net: 68, Gross: 83.64 +--- +--- +START getEffectivePrice for product 6, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"171.60","price_gross":"211.07"} +Found product price. Net: 171.6, Gross: 211.07 +FINAL: Returning Net: 171.6, Gross: 211.07 +--- +--- +START getEffectivePrice for product 7, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"233.20","price_gross":"286.84"} +Found product price. Net: 233.2, Gross: 286.84 +FINAL: Returning Net: 233.2, Gross: 286.84 +--- +--- +START getEffectivePrice for product 1, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"1111.00","price_gross":"1366.53"} +Found product price. Net: 1111, Gross: 1366.53 +FINAL: Returning Net: 1111, Gross: 1366.53 +--- +--- +START getEffectivePrice for product 2, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"1318.05","price_gross":"1621.20"} +Found product price. Net: 1318.05, Gross: 1621.2 +FINAL: Returning Net: 1318.05, Gross: 1621.2 +--- +--- +START getEffectivePrice for product 3, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"9.95","price_gross":"12.24"} +Found product price. Net: 9.95, Gross: 12.24 +FINAL: Returning Net: 9.95, Gross: 12.24 +--- +--- +START getEffectivePrice for product 4, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"9.95","price_gross":"12.24"} +Found product price. Net: 9.95, Gross: 12.24 +FINAL: Returning Net: 9.95, Gross: 12.24 +--- +--- +START getEffectivePrice for product 5, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"68.00","price_gross":"83.64"} +Found product price. Net: 68, Gross: 83.64 +FINAL: Returning Net: 68, Gross: 83.64 +--- +--- +START getEffectivePrice for product 6, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"171.60","price_gross":"211.07"} +Found product price. Net: 171.6, Gross: 211.07 +FINAL: Returning Net: 171.6, Gross: 211.07 +--- +--- +START getEffectivePrice for product 7, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"233.20","price_gross":"286.84"} +Found product price. Net: 233.2, Gross: 286.84 +FINAL: Returning Net: 233.2, Gross: 286.84 +--- +--- +START getEffectivePrice for product 1, client 1 +Client price query executed. Found: {"price_net":"837.40","price_gross":"1030.00"} +Found client price. Net: 837.4, Gross: 1030 +FINAL: Returning Net: 837.4, Gross: 1030 +--- +--- +START getEffectivePrice for product 2, client 1 +Client price query executed. Found: {"price_net":"1056.91","price_gross":"1300.00"} +Found client price. Net: 1056.91, Gross: 1300 +FINAL: Returning Net: 1056.91, Gross: 1300 +--- +--- +START getEffectivePrice for product 3, client 1 +Client price query executed. Found: {"price_net":"32.52","price_gross":"40.00"} +Found client price. Net: 32.52, Gross: 40 +FINAL: Returning Net: 32.52, Gross: 40 +--- +--- +START getEffectivePrice for product 4, client 1 +Client price query executed. Found: No +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"9.95","price_gross":"12.24"} +Found product price. Net: 9.95, Gross: 12.24 +FINAL: Returning Net: 9.95, Gross: 12.24 +--- +--- +START getEffectivePrice for product 5, client 1 +Client price query executed. Found: No +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"68.00","price_gross":"83.64"} +Found product price. Net: 68, Gross: 83.64 +FINAL: Returning Net: 68, Gross: 83.64 +--- +--- +START getEffectivePrice for product 6, client 1 +Client price query executed. Found: No +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"171.60","price_gross":"211.07"} +Found product price. Net: 171.6, Gross: 211.07 +FINAL: Returning Net: 171.6, Gross: 211.07 +--- +--- +START getEffectivePrice for product 7, client 1 +Client price query executed. Found: No +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"233.20","price_gross":"286.84"} +Found product price. Net: 233.2, Gross: 286.84 +FINAL: Returning Net: 233.2, Gross: 286.84 +--- +--- +START getEffectivePrice for product 1, client 1 +Client price query executed. Found: {"price_net":"837.40","price_gross":"1030.00"} +Found client price. Net: 837.4, Gross: 1030 +FINAL: Returning Net: 837.4, Gross: 1030 +--- +--- +START getEffectivePrice for product 2, client 1 +Client price query executed. Found: {"price_net":"1056.91","price_gross":"1300.00"} +Found client price. Net: 1056.91, Gross: 1300 +FINAL: Returning Net: 1056.91, Gross: 1300 +--- +--- +START getEffectivePrice for product 3, client 1 +Client price query executed. Found: {"price_net":"32.52","price_gross":"40.00"} +Found client price. Net: 32.52, Gross: 40 +FINAL: Returning Net: 32.52, Gross: 40 +--- +--- +START getEffectivePrice for product 4, client 1 +Client price query executed. Found: No +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"9.95","price_gross":"12.24"} +Found product price. Net: 9.95, Gross: 12.24 +FINAL: Returning Net: 9.95, Gross: 12.24 +--- +--- +START getEffectivePrice for product 5, client 1 +Client price query executed. Found: No +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"68.00","price_gross":"83.64"} +Found product price. Net: 68, Gross: 83.64 +FINAL: Returning Net: 68, Gross: 83.64 +--- +--- +START getEffectivePrice for product 6, client 1 +Client price query executed. Found: No +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"171.60","price_gross":"211.07"} +Found product price. Net: 171.6, Gross: 211.07 +FINAL: Returning Net: 171.6, Gross: 211.07 +--- +--- +START getEffectivePrice for product 7, client 1 +Client price query executed. Found: No +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"233.20","price_gross":"286.84"} +Found product price. Net: 233.2, Gross: 286.84 +FINAL: Returning Net: 233.2, Gross: 286.84 +--- +--- +START getEffectivePrice for product 1, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"1111.00","price_gross":"1366.53"} +Found product price. Net: 1111, Gross: 1366.53 +FINAL: Returning Net: 1111, Gross: 1366.53 +--- +--- +START getEffectivePrice for product 2, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"1318.05","price_gross":"1621.20"} +Found product price. Net: 1318.05, Gross: 1621.2 +FINAL: Returning Net: 1318.05, Gross: 1621.2 +--- +--- +START getEffectivePrice for product 3, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"9.95","price_gross":"12.24"} +Found product price. Net: 9.95, Gross: 12.24 +FINAL: Returning Net: 9.95, Gross: 12.24 +--- +--- +START getEffectivePrice for product 4, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"9.95","price_gross":"12.24"} +Found product price. Net: 9.95, Gross: 12.24 +FINAL: Returning Net: 9.95, Gross: 12.24 +--- +--- +START getEffectivePrice for product 5, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"68.00","price_gross":"83.64"} +Found product price. Net: 68, Gross: 83.64 +FINAL: Returning Net: 68, Gross: 83.64 +--- +--- +START getEffectivePrice for product 6, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"171.60","price_gross":"211.07"} +Found product price. Net: 171.6, Gross: 211.07 +FINAL: Returning Net: 171.6, Gross: 211.07 +--- +--- +START getEffectivePrice for product 7, client +Client price not found or not set, falling back to product price. +Product price query executed. Found: {"price_net":"233.20","price_gross":"286.84"} +Found product price. Net: 233.2, Gross: 286.84 +FINAL: Returning Net: 233.2, Gross: 286.84 +--- diff --git a/includes/pdfparser/Document/CMap/Registry/Adobe/Identity0.php b/includes/pdfparser/Document/CMap/Registry/Adobe/Identity0.php new file mode 100644 index 0000000..6e919c7 --- /dev/null +++ b/includes/pdfparser/Document/CMap/Registry/Adobe/Identity0.php @@ -0,0 +1,20 @@ +getText(), $ordering->getText(), $supplement->value]) { + ['Adobe', 'Identity', 0] => new Identity0(), + default => null, + }; + } +} diff --git a/includes/pdfparser/Document/CMap/ToUnicode/BFChar.php b/includes/pdfparser/Document/CMap/ToUnicode/BFChar.php new file mode 100644 index 0000000..f790ce5 --- /dev/null +++ b/includes/pdfparser/Document/CMap/ToUnicode/BFChar.php @@ -0,0 +1,27 @@ +sourceCode; + } + + /** @throws ParseFailureException */ + public function toUnicode(int $characterCode): ?string { + if ($characterCode !== $this->sourceCode) { + throw new ParseFailureException(sprintf('This BFChar does not contain character code %d', $characterCode)); + } + + return CodePoint::toString($this->destinationString); + } +} diff --git a/includes/pdfparser/Document/CMap/ToUnicode/BFRange.php b/includes/pdfparser/Document/CMap/ToUnicode/BFRange.php new file mode 100644 index 0000000..89541f3 --- /dev/null +++ b/includes/pdfparser/Document/CMap/ToUnicode/BFRange.php @@ -0,0 +1,35 @@ + $destinationCodePoints */ + public function __construct( + public readonly int $sourceCodeStart, + public readonly int $sourceCodeEnd, + public readonly array $destinationCodePoints, + ) { + } + + public function containsCharacterCode(int $characterCode): bool { + return $characterCode >= $this->sourceCodeStart + && $characterCode <= $this->sourceCodeEnd; + } + + /** @throws ParseFailureException */ + public function toUnicode(int $characterCode): ?string { + if (count($this->destinationCodePoints) === 1) { + return CodePoint::toString( + dechex(((int) hexdec($this->destinationCodePoints[0])) + $characterCode - $this->sourceCodeStart), + ); + } + + return CodePoint::toString( + $this->destinationCodePoints[$characterCode - $this->sourceCodeStart] + ?? throw new ParseFailureException(), + ); + } +} diff --git a/includes/pdfparser/Document/CMap/ToUnicode/CodePoint.php b/includes/pdfparser/Document/CMap/ToUnicode/CodePoint.php new file mode 100644 index 0000000..00fc414 --- /dev/null +++ b/includes/pdfparser/Document/CMap/ToUnicode/CodePoint.php @@ -0,0 +1,37 @@ +> 16) & 0xFFFF) >= 0xD800 + && $highSurrogate <= 0xDBFF + && ($lowSurrogate = $surrogateCodePoint & 0xFFFF) >= 0xDC00 + && $lowSurrogate <= 0xDFFF) { + $charCodepoint = (($highSurrogate - 0xD800) << 10) + ($lowSurrogate - 0xDC00) + 0x10000; + $i += 8; // Surrogate Pairs are 4 bytes long + } else { + $charCodepoint = (int) hexdec(substr($hexString, $i, 4)); + $i += 4; // Non surrogate pairs are 2 bytes long + } + + if (($char = mb_chr($charCodepoint)) === false) { + throw new ParseFailureException(); + } + + $chars[] = $char; + } + + return implode('', $chars); + } +} diff --git a/includes/pdfparser/Document/CMap/ToUnicode/CodeSpaceRange.php b/includes/pdfparser/Document/CMap/ToUnicode/CodeSpaceRange.php new file mode 100644 index 0000000..e1cb89a --- /dev/null +++ b/includes/pdfparser/Document/CMap/ToUnicode/CodeSpaceRange.php @@ -0,0 +1,11 @@ + */ + private readonly array $bfCharRangeInfo; + + /** + * @no-named-arguments + * + * @param list $codeSpaceRanges + * @param int<1, max> $byteSize + * @throws InvalidArgumentException + */ + public function __construct( + public readonly array $codeSpaceRanges, + public readonly int $byteSize, + BFRange|BFChar ...$bfCharRangeInfo, + ) { + $this->bfCharRangeInfo = $bfCharRangeInfo; + if ($this->byteSize < 1) { + throw new InvalidArgumentException(); + } + } + + /** @throws PdfParserException */ + public function textToUnicode(string $characterGroup): string { + return implode( + '', + array_map( + fn (string $character) => $this->charToUnicode((int) hexdec($character)) ?? '', + str_split($characterGroup, $this->byteSize * 2) + ) + ); + } + + /** @throws PdfParserException */ + protected function charToUnicode(int $characterCode): ?string { + $char = null; + foreach ($this->bfCharRangeInfo as $bfCharRangeInfo) { + if (!$bfCharRangeInfo->containsCharacterCode($characterCode)) { + continue; + } + + if (($char = $bfCharRangeInfo->toUnicode($characterCode)) !== "\0") { // Some characters map to NULL in one BFRange and to an actual character in another + return $char; + } + } + + if ($char === "\0") { + return $char; // Only return NULL when it is the only character this is mapped to + } + + if ($characterCode === 0) { + return ''; + } + + return null; + } +} diff --git a/includes/pdfparser/Document/CMap/ToUnicode/ToUnicodeCMapOperator.php b/includes/pdfparser/Document/CMap/ToUnicode/ToUnicodeCMapOperator.php new file mode 100644 index 0000000..0bd4d52 --- /dev/null +++ b/includes/pdfparser/Document/CMap/ToUnicode/ToUnicodeCMapOperator.php @@ -0,0 +1,12 @@ +firstPos(ToUnicodeCMapOperator::BeginCodeSpaceRange, $startOffset, $startOffset + $nrOfBytes) + ?? throw new ParseFailureException(sprintf('Missing %s', ToUnicodeCMapOperator::BeginCodeSpaceRange->value)); + $beginCodeSpaceRangePos += strlen(ToUnicodeCMapOperator::BeginCodeSpaceRange->value); + $endCodeSpaceRangePos = $stream->firstPos(ToUnicodeCMapOperator::EndCodeSpaceRange, $beginCodeSpaceRangePos, $startOffset + $nrOfBytes) + ?? throw new ParseFailureException(); + $codeSpaceRangeSectionString = $stream->read($beginCodeSpaceRangePos, $endCodeSpaceRangePos - $beginCodeSpaceRangePos); + $codeSpaceRanges = []; + $byteSize = null; + foreach (explode("\n", $codeSpaceRangeSectionString) as $codeSpaceRangeSectionStringLine) { + if (trim($codeSpaceRangeSectionStringLine) === '') { + continue; + } + + if (preg_match('/^\s*<\s*(?P[0-9a-fA-F]+)\s*>\s*<\s*(?P[0-9a-fA-F]+)\s*>\s*$/', $codeSpaceRangeSectionStringLine, $matchesSpaceRange) !== 1) { + throw new ParseFailureException('Unrecognized codespacerange format'); + } + + if (strlen($matchesSpaceRange['start']) !== strlen($matchesSpaceRange['end'])) { + throw new ParseFailureException(sprintf('Start(%s) and end(%s) of codespacerange don\'t have the same number of bytes', $matchesSpaceRange['start'], $matchesSpaceRange['end'])); + } + + if (($strlen = strlen($matchesSpaceRange['start'])) % 2 !== 0 || !is_int($byteSizeRange = $strlen / 2)) { + throw new ParseFailureException(sprintf('Codespaceranges must be an even number of hex digits, got %d', $strlen)); + } + + if ($byteSize !== null && $byteSizeRange !== $byteSize) { + throw new ParseFailureException(sprintf('Byte size of codespaceranges is inconsistent, expected %d, got %d', $byteSize, $byteSizeRange)); + } + + $byteSize = $byteSizeRange; + $codeSpaceRanges[] = new CodeSpaceRange((int) hexdec($matchesSpaceRange['start']), (int) hexdec($matchesSpaceRange['end'])); + } + + /** @var array> $bfCharRangeInfo where the first index is used to track the position of the element in the CMap */ + $bfCharRangeInfo = []; + $lastPos = $startOffset; + while (($beginBFCharPos = $stream->firstPos(ToUnicodeCMapOperator::BeginBFChar, $lastPos, $startOffset + $nrOfBytes)) !== null) { + $beginBFCharPos += strlen(ToUnicodeCMapOperator::BeginBFChar->value); + $endBFCharPos = $stream->firstPos(ToUnicodeCMapOperator::EndBFChar, $beginBFCharPos, $startOffset + $nrOfBytes) + ?? throw new ParseFailureException(); + if (preg_match_all('/\s*<(?P[^>]+)>\s*<(?P[^>]+)>\s*/', $stream->read($beginBFCharPos, $endBFCharPos - $beginBFCharPos), $matchesBFChar, PREG_SET_ORDER) === 0) { + throw new ParseFailureException('Unrecognized bfchar format'); + } + + foreach ($matchesBFChar as $matchBFChar) { + $bfCharRangeInfo[$beginBFCharPos][] = new BFChar((int) hexdec(trim($matchBFChar['source'])), trim($matchBFChar['destination'])); + } + $lastPos = $endBFCharPos; + } + + $lastPos = $startOffset; + while (($beginBFRangePos = $stream->firstPos(ToUnicodeCMapOperator::BeginBFRange, $lastPos, $startOffset + $nrOfBytes)) !== null) { + $endBFRangePos = $stream->firstPos(ToUnicodeCMapOperator::EndBFRange, $beginBFRangePos, $startOffset + $nrOfBytes) + ?? throw new ParseFailureException(); + if (preg_match_all('/\s*<(?P[^>]+)>\s*<(?P[^>]+)>\s*(?P(<[^>]+>)|(\[\s*(<[^>]+>\s*)+\]))/', $stream->read($beginBFRangePos, $endBFRangePos - $beginBFRangePos), $matchesBFRange, PREG_SET_ORDER) === 0) { + throw new ParseFailureException('Unrecognized bfrange format'); + } + + foreach ($matchesBFRange as $matchBFRange) { + $bfCharRangeInfo[$beginBFRangePos][] = new BFRange( + (int) hexdec(trim($matchBFRange['start'])), + (int) hexdec(trim($matchBFRange['end'])), + array_map( + fn (string $value) => trim($value), + explode('><', rtrim(ltrim(str_replace(' ', '', $matchBFRange['targetString']), '[<'), '>]')) + ) + ); + } + $lastPos = $endBFRangePos; + } + + ksort($bfCharRangeInfo); // Make sure that Char and Range are in order they occur in the CMap + return new ToUnicodeCMap( + $codeSpaceRanges, + $byteSize !== null ? $byteSize : 2, + ...array_merge(...$bfCharRangeInfo) + ); + } +} diff --git a/includes/pdfparser/Document/ContentStream/Command/ContentStreamCommand.php b/includes/pdfparser/Document/ContentStream/Command/ContentStreamCommand.php new file mode 100644 index 0000000..85edf12 --- /dev/null +++ b/includes/pdfparser/Document/ContentStream/Command/ContentStreamCommand.php @@ -0,0 +1,28 @@ +multiplyWith( + new TransformationMatrix( + (float) $matrix[0], + (float) $matrix[1], + (float) $matrix[2], + (float) $matrix[3], + (float) $matrix[4], + (float) $matrix[5], + ) + ); + } + + return $transformationMatrix; + } +} diff --git a/includes/pdfparser/Document/ContentStream/Command/Operator/State/Interaction/InteractsWithTextState.php b/includes/pdfparser/Document/ContentStream/Command/Operator/State/Interaction/InteractsWithTextState.php new file mode 100644 index 0000000..fca117b --- /dev/null +++ b/includes/pdfparser/Document/ContentStream/Command/Operator/State/Interaction/InteractsWithTextState.php @@ -0,0 +1,9 @@ +scaleX, + $transformationMatrix->shearX, + $transformationMatrix->shearY, + $transformationMatrix->scaleY, + $transformationMatrix->offsetX + (float) $offsets[0], + $transformationMatrix->offsetY + (float) $offsets[1] + ); + } + + if ($this === self::SET_MATRIX) { + $matrix = explode(' ', trim($operands)); + if (count($matrix) !== 6) { + throw new ParseFailureException(); + } + + return new TransformationMatrix((float) $matrix[0], (float) $matrix[1], (float) $matrix[2], (float) $matrix[3], (float) $matrix[4], (float) $matrix[5]); + } + + return $transformationMatrix; + } + + /** @throws ParseFailureException */ + #[Override] + public function applyToTextState(string $operands, ?TextState $textState): ?TextState { + if ($this === self::MOVE_OFFSET_LEADING) { + $offsets = explode(' ', trim($operands)); + if (count($offsets) !== 2) { + throw new ParseFailureException(); + } + + return new TextState( + $textState->fontName ?? null, + $textState->fontSize ?? null, + $textState->charSpace ?? 0, + $textState->wordSpace ?? 0, + $textState->scale ?? 100, + -1 * (float) $offsets[1], + $textState->render ?? 0, + $textState->rise ?? 0, + ); + } + + return $textState; + } +} diff --git a/includes/pdfparser/Document/ContentStream/Command/Operator/State/TextShowingOperator.php b/includes/pdfparser/Document/ContentStream/Command/Operator/State/TextShowingOperator.php new file mode 100644 index 0000000..d77a4a7 --- /dev/null +++ b/includes/pdfparser/Document/ContentStream/Command/Operator/State/TextShowingOperator.php @@ -0,0 +1,53 @@ +fontName ?? null, + $textState->fontSize ?? null, + (float) $spacing[1], + (float) $spacing[0], + $textState->scale ?? 100, + $textState->leading ?? 0, + $textState->render ?? 0, + $textState->rise ?? 0, + ); + } + + return $textState; + } + + #[Override] + public function getPositionedTextElement(string $operands, TransformationMatrix $textMatrix, TransformationMatrix $globalTransformationMatrix, TextState $textState): PositionedTextElement { + return new PositionedTextElement( + $operands, + $globalTransformationMatrix->multiplyWith($textMatrix), + $textState + ); + } +} diff --git a/includes/pdfparser/Document/ContentStream/Command/Operator/State/TextStateOperator.php b/includes/pdfparser/Document/ContentStream/Command/Operator/State/TextStateOperator.php new file mode 100644 index 0000000..2618c23 --- /dev/null +++ b/includes/pdfparser/Document/ContentStream/Command/Operator/State/TextStateOperator.php @@ -0,0 +1,124 @@ +fontName ?? null, + $textState->fontSize ?? null, + (float) $operands, + $textState->wordSpace ?? 0, + $textState->scale ?? 100, + $textState->leading ?? 0, + $textState->render ?? 0, + $textState->rise ?? 0, + ); + } + + if ($this === self::WORD_SPACE) { + return new TextState( + $textState->fontName ?? null, + $textState->fontSize ?? null, + $textState->charSpace ?? 0, + (float) $operands, + $textState->scale ?? 100, + $textState->leading ?? 0, + $textState->render ?? 0, + $textState->rise ?? 0, + ); + } + + if ($this === self::SCALE) { + if (trim($operands) !== (string)($scale = (int) $operands) && trim($operands) !== (string)($scale = (float) $operands)) { + throw new ParseFailureException(sprintf('Invalid scale operand "%s" for scale operator', $operands)); + } + + return new TextState( + $textState->fontName ?? null, + $textState->fontSize ?? null, + $textState->charSpace ?? 0, + $textState->wordSpace ?? 0, + $scale, + $textState->leading ?? 0, + $textState->render ?? 0, + $textState->rise ?? 0, + ); + } + + if ($this === self::LEADING) { + return new TextState( + $textState->fontName ?? null, + $textState->fontSize ?? null, + $textState->charSpace ?? 0, + $textState->wordSpace ?? 0, + $textState->scale ?? 100, + (float) $operands, + $textState->render ?? 0, + $textState->rise ?? 0, + ); + } + + if ($this === self::FONT_SIZE) { + if (preg_match('/^\/(?[A-Za-z_0-9\.\-\+]+)\s+(?-?[0-9]+(\.[0-9]+)?)$/', $operands, $matches) !== 1) { + throw new InvalidArgumentException(sprintf('Invalid font operand "%s" for Tf operator', substr($operands, 0, 200))); + } + + return new TextState( + DictionaryKey::tryFrom($matches['fontReference']) ?? new ExtendedDictionaryKey($matches['fontReference']), + (float) $matches['FontSize'], + $textState->charSpace ?? 0, + $textState->wordSpace ?? 0, + $textState->scale ?? 100, + $textState->leading ?? 0, + $textState->render ?? 0, + $textState->rise ?? 0, + ); + } + + if ($this === self::RENDER) { + return new TextState( + $textState->fontName ?? null, + $textState->fontSize ?? null, + $textState->charSpace ?? 0, + $textState->wordSpace ?? 0, + $textState->scale ?? 100, + $textState->leading ?? 0, + (int) $operands, + $textState->rise ?? 0, + ); + } + + return new TextState( + $textState->fontName ?? null, + $textState->fontSize ?? null, + $textState->charSpace ?? 0, + $textState->wordSpace ?? 0, + $textState->scale ?? 100, + $textState->leading ?? 0, + $textState->render ?? 0, + (float) $operands, + ); + } +} diff --git a/includes/pdfparser/Document/ContentStream/Command/Operator/State/Type3FontOperator.php b/includes/pdfparser/Document/ContentStream/Command/Operator/State/Type3FontOperator.php new file mode 100644 index 0000000..8c93aee --- /dev/null +++ b/includes/pdfparser/Document/ContentStream/Command/Operator/State/Type3FontOperator.php @@ -0,0 +1,13 @@ + */ + public readonly array $content; + + /** @no-named-arguments */ + public function __construct( + TextObject|ContentStreamCommand... $content + ) { + $this->content = $content; + } + + /** @return list */ + public function getPositionedTextElements(): array { + $positionedTextElements = $transformationStateStack = []; + $textState = null; // See table 103, Tf operator for initial value + $transformationMatrix = new TransformationMatrix(1, 0, 0, 1, 0, 0); // Identity matrix + foreach ($this->content as $content) { + if ($content instanceof ContentStreamCommand) { + if ($content->operator instanceof InteractsWithTextState) { + $textState = $content->operator->applyToTextState($content->operands, $textState); + } elseif ($content->operator === GraphicsStateOperator::SaveCurrentStateToStack) { + $transformationStateStack[] = clone $transformationMatrix; + } elseif ($content->operator === GraphicsStateOperator::RestoreMostRecentStateFromStack) { + $transformationMatrix = array_pop($transformationStateStack) + ?? throw new ParseFailureException(); + } elseif ($content->operator instanceof InteractsWithTransformationMatrix) { + $transformationMatrix = $content->operator->applyToTransformationMatrix($content->operands, $transformationMatrix); + } + + continue; + } + + $textMatrix = new TransformationMatrix(1, 0, 0, 1, 0, 0); // Identity matrix, See Table 106, Tm operator for initial value in text object + foreach ($content->contentStreamCommands as $contentStreamCommand) { + if ($contentStreamCommand->operator instanceof InteractsWithTextState) { + $textState = $contentStreamCommand->operator->applyToTextState($contentStreamCommand->operands, $textState); + } + + if ($contentStreamCommand->operator instanceof InteractsWithTransformationMatrix) { + $textMatrix = $contentStreamCommand->operator->applyToTransformationMatrix($contentStreamCommand->operands, $textMatrix); + } + + if ($contentStreamCommand->operator instanceof ProducesPositionedTextElements && $textState !== null) { + $positionedTextElements[] = $contentStreamCommand->operator->getPositionedTextElement($contentStreamCommand->operands, $textMatrix, $transformationMatrix, $textState); + } + } + } + + usort( + $positionedTextElements, + static function (PositionedTextElement $a, PositionedTextElement $b): int { + if (($differenceY = $b->absoluteMatrix->offsetY <=> $a->absoluteMatrix->offsetY) !== 0) { + return $differenceY; + } + + return $a->absoluteMatrix->offsetX <=> $b->absoluteMatrix->offsetX; + } + ); + + return $positionedTextElements; + } + + /** @throws PdfParserException */ + public function getText(Document $document, Page $page): string { + $text = ''; + $previousPositionedTextElement = null; + foreach ($this->getPositionedTextElements() as $positionedTextElement) { + if ($previousPositionedTextElement !== null) { + if ($previousPositionedTextElement->absoluteMatrix->offsetY !== $positionedTextElement->absoluteMatrix->offsetY) { + $text .= "\n"; + } elseif (($positionedTextElement->absoluteMatrix->offsetX - $previousPositionedTextElement->absoluteMatrix->offsetX - $positionedTextElement->getFont($document, $page)->getWidthForChars($previousPositionedTextElement->getCodePoints(), $previousPositionedTextElement->textState, $previousPositionedTextElement->absoluteMatrix)) >= ($previousPositionedTextElement->textState->fontSize ?? 10) * $previousPositionedTextElement->absoluteMatrix->scaleX * 0.40) { + $text .= ' '; + } + } + + $text .= $positionedTextElement->getText($document, $page); + $previousPositionedTextElement = $positionedTextElement; + } + + return $text; + } +} diff --git a/includes/pdfparser/Document/ContentStream/ContentStreamParser.php b/includes/pdfparser/Document/ContentStream/ContentStreamParser.php new file mode 100644 index 0000000..b069870 --- /dev/null +++ b/includes/pdfparser/Document/ContentStream/ContentStreamParser.php @@ -0,0 +1,217 @@ + $contentsObjects + * @throws ParseFailureException + */ + public static function parse(array $contentsObjects): ContentStream { + $content = []; + $inStringLiteral = $inResourceName = $inDictionary = false; + $inArrayLevel = $inStringLevel = 0; + $textObject = $previousChar = $secondToLastChar = $thirdToLastChar = $previousContentStream = $startPreviousOperandIndex = null; + foreach ($contentsObjects as $contentsObject) { + $startCurrentOperandIndex = 0; + $contentStream = $contentsObject->getStream(); + $contentStreamSize = $contentStream->getSizeInBytes(); + for ($index = 0; $index < $contentStreamSize; $index++) { + $char = $contentStream->read($index, 1); + if ($inStringLiteral === true) { + if ($char === ')' && $previousChar !== '\\') { + $inStringLiteral = false; + } + } elseif ($inResourceName === true) { + if (in_array($char, [' ', '<', '(', '/', "\r", "\n"], true) && $previousChar !== '\\') { + $inResourceName = false; + } + } elseif ($inDictionary === true) { + if ($char === '>' && $previousChar === '>' && $secondToLastChar !== '\\') { + $inDictionary = false; + } + } elseif ($char === '[' && $previousChar !== '\\') { + $inArrayLevel++; + } elseif ($char === '<' && $previousChar === '<' && $secondToLastChar !== '\\') { + $inDictionary = true; + } elseif ($char === '<' && $previousChar !== '\\' && $contentStream->read($index + 1, 1) !== '<') { + $inStringLevel++; + } elseif ($char === '(' && $previousChar !== '\\') { + $inStringLiteral = true; + } elseif ($char === '/' && $previousChar !== '\\') { + $inResourceName = true; + } elseif ($inStringLevel > 0 || $inArrayLevel > 0) { + if ($inStringLevel > 0 && $char === '>' && $previousChar !== '\\') { + $inStringLevel--; + } elseif ($inArrayLevel > 0 && $char === ']' && $previousChar !== '\\') { + $inArrayLevel--; + } + } elseif ($char === 'T' && $previousChar === 'B') { // TextObjectOperator::BEGIN + $startCurrentOperandIndex = $index + 1; + $textObject = new TextObject(); + } elseif ($char === 'T' && $previousChar === 'E') { // TextObjectOperator::END + $startCurrentOperandIndex = $index + 1; + if ($textObject === null) { + throw new ParseFailureException('Encountered TextObjectOperator::END without preceding TextObjectOperator::BEGIN'); + } + + $content[] = $textObject; + $textObject = null; + } elseif ($char === 'C' + && (($secondToLastChar === 'B' && ($previousChar === 'M' || $previousChar === 'D')) || ($secondToLastChar === 'E' && $previousChar === 'M'))) { // MarkedContentOperator::BeginMarkedContent, MarkedContentOperator::EndMarkedContent, MarkedContentOperator::BeginMarkedContentWithProperties + $startCurrentOperandIndex = $index + 1; + } elseif (($operator = self::getOperator($char, $previousChar, $secondToLastChar, $thirdToLastChar)) !== null + && (($nextChar = $contentStream->read($index + 1, 1)) === '' || self::getOperator($nextChar, $char, $previousChar, $secondToLastChar) === null)) { // Skip the current hit if the next iteration is also a valid operator + $operands = ''; + if ($previousContentStream !== null && $startPreviousOperandIndex !== null && $startPreviousOperandIndex < $previousContentStream->getSizeInBytes()) { + $operands .= $previousContentStream->read($startPreviousOperandIndex, $previousContentStream->getSizeInBytes() - $startPreviousOperandIndex); + $startPreviousOperandIndex = null; + } + if (($operandLength = $index + 1 - $startCurrentOperandIndex - strlen($operator->value)) > 0) { + $operands .= $contentStream->read($startCurrentOperandIndex, $operandLength); + } + + $command = new ContentStreamCommand($operator, trim($operands)); + if ($textObject !== null) { + $textObject->addContentStreamCommand($command); + } else { + $content[] = $command; + } + + $startCurrentOperandIndex = $index + 1; + } + + $thirdToLastChar = $secondToLastChar; + $secondToLastChar = $previousChar; + $previousChar = $char; + } + + $previousContentStream = $contentStream; + $startPreviousOperandIndex = $startCurrentOperandIndex; + } + + return new ContentStream(...$content); + } + + /** + * This method uses three maps instead of calling $enum::tryFrom for all possible enums + * as operator retrieval happens possibly millions of times in a single file + */ + public static function getOperator(string $currentChar, ?string $previousChar, ?string $secondToLastChar, ?string $thirdToLastChar): CompatibilityOperator|InlineImageOperator|MarkedContentOperator|TextObjectOperator|ClippingPathOperator|ColorOperator|GraphicsStateOperator|PathConstructionOperator|PathPaintingOperator|TextPositioningOperator|TextShowingOperator|TextStateOperator|Type3FontOperator|XObjectOperator|null { + $threeLetterMatch = match ($secondToLastChar . $previousChar . $currentChar) { + 'BMC' => MarkedContentOperator::BeginMarkedContent, + 'BDC' => MarkedContentOperator::BeginMarkedContentWithProperties, + 'EMC' => MarkedContentOperator::EndMarkedContent, + 'SCN' => ColorOperator::SetStrokingParams, + 'scn' => ColorOperator::SetColorParams, + default => null, + }; + if ($threeLetterMatch !== null) { + return in_array($thirdToLastChar, ['\\', '/'], true) ? null : $threeLetterMatch; + } + + $twoLetterMatch = match ($previousChar . $currentChar) { + 'BX' => CompatibilityOperator::BeginCompatibilitySection, + 'EX' => CompatibilityOperator::EndCompatibilitySection, + 'BI' => InlineImageOperator::Begin, + 'ID' => InlineImageOperator::BeginImageData, + 'EI' => InlineImageOperator::End, + 'MD' => MarkedContentOperator::Tag, + 'DP' => MarkedContentOperator::TagProperties, + 'BT' => TextObjectOperator::BEGIN, + 'ET' => TextObjectOperator::END, + 'W*' => ClippingPathOperator::INTERSECT_EVEN_ODD, + 'CS' => ColorOperator::SetName, + 'cs' => ColorOperator::SetNameNonStroking, + 'SC' => ColorOperator::SetStrokingColor, + 'sc' => ColorOperator::SetColor, + 'RG' => ColorOperator::SetStrokingColorDeviceRGB, + 'rg' => ColorOperator::SetColorDeviceRGB, + 'cm' => GraphicsStateOperator::ModifyCurrentTransformationMatrix, + 'ri' => GraphicsStateOperator::SetIntent, + 'gs' => GraphicsStateOperator::SetDictName, + 're' => PathConstructionOperator::RECTANGLE, + 'f*' => PathPaintingOperator::FILL_EVEN_ODD, + 'B*' => PathPaintingOperator::FILL_STROKE_EVEN_ODD, + 'b*' => PathPaintingOperator::CLOSE_FILL_STROKE, + 'Td' => TextPositioningOperator::MOVE_OFFSET, + 'TD' => TextPositioningOperator::MOVE_OFFSET_LEADING, + 'Tm' => TextPositioningOperator::SET_MATRIX, + 'T*' => TextPositioningOperator::NEXT_LINE, + 'Tj' => TextShowingOperator::SHOW, + 'TJ' => TextShowingOperator::SHOW_ARRAY, + 'Tc' => TextStateOperator::CHAR_SPACE, + 'Tw' => TextStateOperator::WORD_SPACE, + 'Tz' => TextStateOperator::SCALE, + 'TL' => TextStateOperator::LEADING, + 'Tf' => TextStateOperator::FONT_SIZE, + 'Tr' => TextStateOperator::RENDER, + 'Ts' => TextStateOperator::RISE, + 'd0' => Type3FontOperator::SetWidth, + 'd1' => Type3FontOperator::SetWidthAndBoundingBox, + 'Do' => XObjectOperator::Paint, + default => null, + }; + if ($twoLetterMatch !== null) { + return in_array($secondToLastChar, ['\\', '/'], true) ? null : $twoLetterMatch; + } + + $oneLetterMatch = match ($currentChar) { + 'W' => ClippingPathOperator::INTERSECT, + 'G' => ColorOperator::SetStrokingColorSpace, + 'g' => ColorOperator::SetColorSpace, + 'K' => ColorOperator::SetStrokingColorDeviceCMYK, + 'k' => ColorOperator::SetColorDeviceCMYK, + 'q' => GraphicsStateOperator::SaveCurrentStateToStack, + 'Q' => GraphicsStateOperator::RestoreMostRecentStateFromStack, + 'w' => GraphicsStateOperator::SetLineWidth, + 'J' => GraphicsStateOperator::SetLineCap, + 'j' => GraphicsStateOperator::SetLineJoin, + 'M' => GraphicsStateOperator::SetMiterJoin, + 'd' => GraphicsStateOperator::SetLineDash, + 'i' => GraphicsStateOperator::SetFlatness, + 'm' => PathConstructionOperator::MOVE, + 'l' => PathConstructionOperator::LINE, + 'c' => PathConstructionOperator::CURVE_BEZIER_123, + 'v' => PathConstructionOperator::CURVE_BEZIER_23, + 'y' => PathConstructionOperator::CURVE_BEZIER_13, + 'h' => PathConstructionOperator::CLOSE, + 'S' => PathPaintingOperator::STROKE, + 's' => PathPaintingOperator::CLOSE_STROKE, + 'f' => PathPaintingOperator::FILL, + 'F' => PathPaintingOperator::FILL_DEPRECATED, + 'B' => PathPaintingOperator::FILL_STROKE, + 'n' => PathPaintingOperator::END, + '\'' => TextShowingOperator::MOVE_SHOW, + '"' => TextShowingOperator::MOVE_SHOW_SPACING, + default => null, + }; + + if ($oneLetterMatch !== null) { + return in_array($previousChar, ['\\', '/'], true) ? null : $oneLetterMatch; + } + + return null; + } +} diff --git a/includes/pdfparser/Document/ContentStream/Object/TextObject.php b/includes/pdfparser/Document/ContentStream/Object/TextObject.php new file mode 100644 index 0000000..da1248c --- /dev/null +++ b/includes/pdfparser/Document/ContentStream/Object/TextObject.php @@ -0,0 +1,22 @@ + */ + public array $contentStreamCommands = []; + + public function addContentStreamCommand(ContentStreamCommand $textOperator): self { + $this->contentStreamCommands[] = $textOperator; + + return $this; + } + + public function isEmpty(): bool { + return $this->contentStreamCommands === []; + } +} diff --git a/includes/pdfparser/Document/ContentStream/PositionedText/PositionedTextElement.php b/includes/pdfparser/Document/ContentStream/PositionedText/PositionedTextElement.php new file mode 100644 index 0000000..bc8e061 --- /dev/null +++ b/includes/pdfparser/Document/ContentStream/PositionedText/PositionedTextElement.php @@ -0,0 +1,106 @@ +textState->fontName === null) { + throw new ParseFailureException('Unable to locate font for text element'); + } + + return $page->getFontDictionary()?->getObjectForReference($document, $this->textState->fontName, Font::class) + ?? throw new ParseFailureException(sprintf('Unable to locate font with reference "/%s"', $this->textState->fontName->value)); + } + + /** @throws ParseFailureException */ + public function getText(Document $document, Page $page): string { + if (($result = preg_match_all('/(?(<(\\\\>|[^>])*>)|(\((\\\\\)|[^)])*\)))(?-?[0-9]+(\.[0-9]+)?)?/', $this->rawTextContent, $matches, PREG_SET_ORDER)) === false) { + throw new ParseFailureException(sprintf('Error with regex')); + } elseif ($result === 0) { + throw new ParseFailureException(sprintf('Operands "%s" is not in a recognized format', $this->rawTextContent)); + } + + $string = ''; + $font = $this->getFont($document, $page); + foreach ($matches as $match) { + if (str_starts_with($match['chars'], '(') && str_ends_with($match['chars'], ')')) { + $unescapedChars = LiteralStringEscapeCharacter::unescapeCharacters(substr($match['chars'], 1, -1)); + if (preg_match('/^\\\\\d{3}$/', substr($match['chars'], 1, -1)) === 1 && ($glyph = $font->getDifferences()?->getGlyph((int) octdec(substr($match['chars'], 2, -1)))) !== null) { + $chars = $glyph->getChar(); + } elseif (strlen($unescapedChars) === 1 && ($glyph = $font->getDifferences()?->getGlyph(ord($unescapedChars))) !== null) { + $chars = $glyph->getChar(); + } elseif (in_array($encoding = $font->getEncoding(), [EncodingNameValue::MacExpertEncoding, EncodingNameValue::WinAnsiEncoding], true)) { + $chars = $encoding->decodeString($unescapedChars); + } elseif (($toUnicodeCMap = $font->getToUnicodeCMap() ?? $font->getToUnicodeCMapDescendantFont()) !== null) { + $chars = $toUnicodeCMap->textToUnicode(bin2hex($unescapedChars)); + } elseif ($encoding !== null) { + $chars = $encoding->decodeString($unescapedChars); + } else { + $chars = $unescapedChars; + } + + $string .= $chars; + } elseif (str_starts_with($match['chars'], '<') && str_ends_with($match['chars'], '>')) { + $chars = substr($match['chars'], 1, -1); + if (($toUnicodeCMap = $font->getToUnicodeCMap() ?? $font->getToUnicodeCMapDescendantFont()) !== null) { + $string .= $toUnicodeCMap->textToUnicode($chars); + } elseif (($encoding = $font->getEncoding()) !== null) { + $string .= $encoding->decodeString(implode('', array_map(fn (string $character) => mb_chr((int) hexdec($character)), str_split($chars, 2)))); + } else { + throw new ParseFailureException('Unable to use CMap or decode string to retrieve characters for text object'); + } + } else { + throw new ParseFailureException(sprintf('Unrecognized character group format "%s"', $match['chars'])); + } + + if (isset($match['offset']) && (float) $match['offset'] < -100) { + $string .= ' '; + } + } + + return $string; + } + + /** @return list */ + public function getCodePoints(): array { + $codePoints = []; + if (($result = preg_match_all('/(?(<(\\\\>|[^>])*>)|(\((\\\\\)|[^)])*\)))(?-?[0-9]+(\.[0-9]+)?)?/', $this->rawTextContent, $matches, PREG_SET_ORDER)) === false) { + throw new ParseFailureException(sprintf('Error with regex')); + } elseif ($result === 0) { + throw new ParseFailureException(sprintf('Operands "%s" is not in a recognized format', $this->rawTextContent)); + } + + foreach ($matches as $match) { + if (str_starts_with($match['chars'], '(') && str_ends_with($match['chars'], ')')) { + $chars = str_replace(['\(', '\)', '\n', '\r'], ['(', ')', "\n", "\r"], substr($match['chars'], 1, -1)); + $chars = preg_replace_callback('/\\\\([0-7]{3})/', fn (array $matches) => mb_chr((int) octdec($matches[1])), $chars) + ?? throw new ParseFailureException(); + foreach (str_split($chars) as $char) { + $codePoints[] = ord($char); + } + } elseif (str_starts_with($match['chars'], '<') && str_ends_with($match['chars'], '>')) { + foreach (str_split(substr($match['chars'], 1, -1), 4) as $char) { + $codePoints[] = is_int($codePoint = hexdec($char)) ? $codePoint : throw new ParseFailureException(); + } + } else { + throw new ParseFailureException(sprintf('Unrecognized character group format "%s"', $match['chars'])); + } + } + + return $codePoints; + } +} diff --git a/includes/pdfparser/Document/ContentStream/PositionedText/TextState.php b/includes/pdfparser/Document/ContentStream/PositionedText/TextState.php new file mode 100644 index 0000000..c5741d0 --- /dev/null +++ b/includes/pdfparser/Document/ContentStream/PositionedText/TextState.php @@ -0,0 +1,20 @@ +scaleX * $other->scaleX + $this->shearX * $other->shearY, + $this->scaleX * $other->shearX + $this->shearX * $other->scaleY, + $this->shearY * $other->scaleX + $this->scaleY * $other->shearY, + $this->shearY * $other->shearX + $this->scaleY * $other->scaleY, + $this->offsetX * $other->scaleX + $this->offsetY * $other->shearY + $other->offsetX, + $this->offsetX * $other->shearX + $this->offsetY * $other->scaleY + $other->offsetY, + ); + } +} diff --git a/includes/pdfparser/Document/CrossReference/CrossReferenceSourceParser.php b/includes/pdfparser/Document/CrossReference/CrossReferenceSourceParser.php new file mode 100644 index 0000000..fbfab3b --- /dev/null +++ b/includes/pdfparser/Document/CrossReference/CrossReferenceSourceParser.php @@ -0,0 +1,92 @@ +lastPos(Marker::EOF, 0) + ?? throw new ParseFailureException(sprintf('Unable to locate marker %s', Marker::EOF->value)); + $startXrefMarkerPos = $stream->lastPos(Marker::START_XREF, $stream->getSizeInBytes() - $eofMarkerPos) + ?? throw new ParseFailureException(sprintf('Unable to locate marker %s', Marker::START_XREF->value)); + $startByteOffset = $stream->getStartOfNextLine($startXrefMarkerPos, $stream->getSizeInBytes()) + ?? throw new ParseFailureException('Expected a carriage return or line feed after startxref marker, none found'); + $endByteOffset = $stream->getEndOfCurrentLine($startByteOffset, $stream->getSizeInBytes()) + ?? throw new ParseFailureException('Expected a carriage return or line feed after the byte offset, none found'); + + $byteOffsetLastCrossReferenceSection = trim($stream->read($startByteOffset, $endByteOffset - $startByteOffset)); + if ($byteOffsetLastCrossReferenceSection !== (string)(int) $byteOffsetLastCrossReferenceSection) { + throw new ParseFailureException(sprintf('Invalid byte offset last crossReference section "%s", "%s"', $byteOffsetLastCrossReferenceSection, $stream->read($startXrefMarkerPos, $stream->getSizeInBytes() - $startXrefMarkerPos))); + } + + $byteOffsetLastCrossReferenceSection = (int) $byteOffsetLastCrossReferenceSection; + if ($byteOffsetLastCrossReferenceSection > $stream->getSizeInBytes()) { + throw new ParseFailureException(sprintf('Invalid byte offset: position of last crossReference section %d is greater than total size of stream %d. Should this be %d?', $byteOffsetLastCrossReferenceSection, $stream->getSizeInBytes(), $stream->lastPos(Marker::XREF, $stream->getSizeInBytes() - $startXrefMarkerPos) ?? $stream->lastPos(Marker::OBJ, $stream->getSizeInBytes() - $startXrefMarkerPos) ?? 0)); + } + + $eolPosByteOffset = $stream->getEndOfCurrentLine($byteOffsetLastCrossReferenceSection, $stream->getSizeInBytes()) + ?? throw new ParseFailureException('Expected a newline after byte offset for last cross reference stream'); + + $crossReferenceType = self::getCrossReferenceType($stream, $byteOffsetLastCrossReferenceSection, $eolPosByteOffset); + if ($crossReferenceType === null) { // Try to recover from an invalid byte offset crossReference section + $lastPosXrefSection = $stream->lastPos(Marker::XREF, $stream->getSizeInBytes() - $startXrefMarkerPos); + $lastPosObject = $stream->lastPos(Marker::OBJ, $stream->getSizeInBytes() - $startXrefMarkerPos); + if ($lastPosXrefSection === null && $lastPosObject === null) { + throw new ParseFailureException(sprintf('Unable to determine cross reference type for start line "%s" of crossReference source, and no other crossReference table or stream was found.', $stream->read($byteOffsetLastCrossReferenceSection, $eolPosByteOffset - $byteOffsetLastCrossReferenceSection))); + } + + $lastPossibleXrefSectionPos = $lastPosObject === null ? $lastPosXrefSection : ($lastPosXrefSection === null ? $lastPosObject : max($lastPosXrefSection, $lastPosObject)); + $eolStartXrefSectionPos = $stream->getEndOfCurrentLine($lastPossibleXrefSectionPos, $stream->getSizeInBytes()) + ?? throw new ParseFailureException(sprintf('Unable to determine cross reference type for start line "%s" of crossReference source, and no other crossReference table or stream was found.', $stream->read($startByteOffset, $endByteOffset - $startByteOffset))); + $crossReferenceType = self::getCrossReferenceType($stream, $lastPossibleXrefSectionPos, $eolStartXrefSectionPos) + ?? throw new ParseFailureException(sprintf('Unable to determine cross reference type for start line "%s" of crossReference source, and no other crossReference table or stream was found.', $stream->read($startByteOffset, $endByteOffset - $startByteOffset))); + } + + $endCrossReferenceSection = $crossReferenceType === CrossReferenceType::Table + ? ($stream->firstPos(Marker::START_XREF, $eolPosByteOffset, $stream->getSizeInBytes()) ?? throw new ParseFailureException(sprintf('Unable to locate marker %s', Marker::START_XREF->value))) + : ($stream->firstPos(Marker::END_OBJ, $eolPosByteOffset, $stream->getSizeInBytes()) ?? throw new ParseFailureException(sprintf('Unable to locate marker %s', Marker::END_OBJ->value))); + $currentCrossReferenceSection = $crossReferenceType === CrossReferenceType::Table + ? CrossReferenceTableParser::parse($stream, $eolPosByteOffset, $endCrossReferenceSection - $eolPosByteOffset) + : CrossReferenceStreamParser::parse($stream, $eolPosByteOffset, $endCrossReferenceSection - $eolPosByteOffset); + $crossReferenceSections = [$currentCrossReferenceSection]; + while (($previous = $currentCrossReferenceSection->dictionary->getValueForKey(DictionaryKey::PREV, IntegerValue::class)) !== null && $previous->value !== 0) { + $eolPosByteOffset = $stream->getEndOfCurrentLine($previous->value + 1, $stream->getSizeInBytes()) + ?? throw new ParseFailureException('Expected a newline after byte offset for cross reference stream'); + $endCrossReferenceSection = $crossReferenceType === CrossReferenceType::Table + ? $stream->firstPos(Marker::START_XREF, $eolPosByteOffset, $stream->getSizeInBytes()) ?? throw new ParseFailureException('Unable to locate startxref') + : $stream->firstPos(Marker::END_OBJ, $eolPosByteOffset, $stream->getSizeInBytes()) ?? throw new ParseFailureException('Unable to locate endobj'); + + $currentCrossReferenceSection = $crossReferenceType === CrossReferenceType::Table + ? CrossReferenceTableParser::parse($stream, $eolPosByteOffset, $endCrossReferenceSection - $eolPosByteOffset) + : CrossReferenceStreamParser::parse($stream, $eolPosByteOffset, $endCrossReferenceSection - $eolPosByteOffset); + $crossReferenceSections[] = $currentCrossReferenceSection; + } + + return new CrossReferenceSource(... $crossReferenceSections); + } + + private static function getCrossReferenceType(Stream $stream, int $byteOffsetLastCrossReferenceSection, int $byteOffsetEndOfCurrentLine): ?CrossReferenceType { + $startCrossReferenceContent = trim($stream->read($byteOffsetLastCrossReferenceSection, $byteOffsetEndOfCurrentLine - $byteOffsetLastCrossReferenceSection)); + if ($startCrossReferenceContent === Marker::XREF->value) { + return CrossReferenceType::Table; + } + + if (preg_match('/^[0-9]*\s*[0-9]*\s*obj$/', $startCrossReferenceContent) === 1) { + return CrossReferenceType::Stream; + } + + return null; + } +} diff --git a/includes/pdfparser/Document/CrossReference/CrossReferenceType.php b/includes/pdfparser/Document/CrossReference/CrossReferenceType.php new file mode 100644 index 0000000..0a92742 --- /dev/null +++ b/includes/pdfparser/Document/CrossReference/CrossReferenceType.php @@ -0,0 +1,8 @@ + Where the first is the newest incremental update and the last one is the oldest */ + private readonly array $crossReferenceSections; + + /** @no-named-arguments */ + public function __construct( + CrossReferenceSection... $crossReferenceSections, + ) { + $this->crossReferenceSections = $crossReferenceSections; + } + + public function getCrossReferenceEntry(int $objNumber): CrossReferenceEntryInUseObject|CrossReferenceEntryCompressed|null { + foreach ($this->crossReferenceSections as $crossReferenceSection) { + $crossReferenceEntry = $crossReferenceSection->getCrossReferenceEntry($objNumber); + if ($crossReferenceEntry !== null) { + return $crossReferenceEntry; + } + } + + return null; + } + + public function getReferenceForKey(DictionaryKey $dictionaryKey): ?ReferenceValue { + return $this->getValueForKey($dictionaryKey, ReferenceValue::class); + } + + /** + * @template T of DictionaryValue|NameValue|Dictionary + * @param class-string $valueType + * @return T + */ + public function getValueForKey(DictionaryKey $dictionaryKey, string $valueType): DictionaryValue|Dictionary|NameValue|null { + foreach ($this->crossReferenceSections as $crossReferenceSection) { + $valueForKey = $crossReferenceSection->dictionary->getValueForKey($dictionaryKey, $valueType); + if ($valueForKey !== null) { + return $valueForKey; + } + } + + return null; + } + + public function getFirstId(): string { + $value = $this->getValueForKey(DictionaryKey::ID, ArrayValue::class)->value[0] + ?? throw new ParseFailureException('Unable to retrieve first id from cross reference source'); + if (!is_string($value)) { + throw new ParseFailureException('First id is not a string'); + } + + if (!str_starts_with($value, '<') || !str_ends_with($value, '>')) { + throw new ParseFailureException('Unsupported first id format, expected ""'); + } + + $firstId = hex2bin(substr($value, 1, -1)); + if ($firstId === false) { + throw new ParseFailureException('Unable to retrieve binary value from first id'); + } + + return $firstId; + } +} diff --git a/includes/pdfparser/Document/CrossReference/Source/Section/CrossReferenceSection.php b/includes/pdfparser/Document/CrossReference/Source/Section/CrossReferenceSection.php new file mode 100644 index 0000000..c0261c1 --- /dev/null +++ b/includes/pdfparser/Document/CrossReference/Source/Section/CrossReferenceSection.php @@ -0,0 +1,32 @@ + */ + public readonly array $crossReferenceSubSections; + + /** @no-named-arguments */ + public function __construct( + public readonly Dictionary $dictionary, + CrossReferenceSubSection... $crossReferenceSubSections, + ) { + $this->crossReferenceSubSections = $crossReferenceSubSections; + } + + public function getCrossReferenceEntry(int $objNumber): CrossReferenceEntryInUseObject|CrossReferenceEntryCompressed|null { + foreach ($this->crossReferenceSubSections as $crossReferenceSubSection) { + if ($crossReferenceSubSection->containsObject($objNumber)) { + return $crossReferenceSubSection->getCrossReferenceEntry($objNumber); + } + } + + return null; + } +} diff --git a/includes/pdfparser/Document/CrossReference/Source/Section/SubSection/CrossReferenceSubSection.php b/includes/pdfparser/Document/CrossReference/Source/Section/SubSection/CrossReferenceSubSection.php new file mode 100644 index 0000000..620e352 --- /dev/null +++ b/includes/pdfparser/Document/CrossReference/Source/Section/SubSection/CrossReferenceSubSection.php @@ -0,0 +1,54 @@ + */ + public array $crossReferenceEntries = []; + + /** + * @phpstan-assert int<0, max> $nrOfEntries + * + * @throws InvalidArgumentException + * + * @no-named-arguments + */ + public function __construct( + public readonly int $firstObjectNumber, + public readonly int $nrOfEntries, + CrossReferenceEntryInUseObject|CrossReferenceEntryFreeObject|CrossReferenceEntryCompressed... $crossReferenceEntries + ) { + if ($this->nrOfEntries < 0) { + throw new InvalidArgumentException('$nrOfEntries should be a positive number'); + } + + $this->crossReferenceEntries = $crossReferenceEntries; + } + + public function containsObject(int $objNumber): bool { + return $objNumber >= $this->firstObjectNumber + && $objNumber < $this->firstObjectNumber + $this->nrOfEntries; + } + + /** @throws RuntimeException */ + public function getCrossReferenceEntry(int $objNumber): CrossReferenceEntryInUseObject|CrossReferenceEntryCompressed|null { + if (self::containsObject($objNumber) === false) { + return null; + } + + $object = $this->crossReferenceEntries[$objNumber - $this->firstObjectNumber] + ?? throw new RuntimeException(sprintf('Object with key %d not found', $objNumber - $this->firstObjectNumber)); + if ($object instanceof CrossReferenceEntryFreeObject) { + throw new RuntimeException('Cross reference entry for object should point to either a compressed or uncompressed entry, not a free object nr'); + } + + return $object; + } +} diff --git a/includes/pdfparser/Document/CrossReference/Source/Section/SubSection/Entry/CrossReferenceEntryCompressed.php b/includes/pdfparser/Document/CrossReference/Source/Section/SubSection/Entry/CrossReferenceEntryCompressed.php new file mode 100644 index 0000000..983085d --- /dev/null +++ b/includes/pdfparser/Document/CrossReference/Source/Section/SubSection/Entry/CrossReferenceEntryCompressed.php @@ -0,0 +1,21 @@ + $startPos + * @phpstan-assert int<1, max> $nrOfBytes + * + * @throws PdfParserException + */ + public static function parse(Stream $stream, int $startPos, int $nrOfBytes): CrossReferenceSection { + $dictionary = DictionaryParser::parse($stream, $startPos, $nrOfBytes); + if ($dictionary->getType() !== TypeNameValue::X_REF) { + throw new ParseFailureException('Expected stream of type xref'); + } + + $wValue = $dictionary->getValueForKey(DictionaryKey::W, CrossReferenceStreamByteSizes::class) + ?? throw new ParseFailureException('Cross reference streams should have a dictionary entry for "W"'); + $startStream = $stream->getStartNextLineAfter(Marker::STREAM, $startPos, $startPos + $nrOfBytes) + ?? throw new ParseFailureException(sprintf('Unable to locate marker %s', Marker::STREAM->value)); + + if (($length = $dictionary->getValueForKey(DictionaryKey::LENGTH, IntegerValue::class)?->value) === null) { + $endStream = $stream->lastPos(Marker::END_STREAM, $stream->getSizeInBytes() - $startPos + $nrOfBytes); + if ($endStream === null || $endStream > ($startPos + $nrOfBytes)) { + throw new ParseFailureException(sprintf('Expected end of stream content marked by %s, none found', Marker::END_STREAM->value)); + } + + $length = $endStream - $startStream - 1; + } + + $entries = []; + $hexContent = bin2hex(CompressedObjectContentParser::parseBinary($stream, $startStream, $length, $dictionary)->toString()); + foreach (str_split($hexContent, $wValue->getTotalLengthInBytes() * self::HEX_CHARS_IN_BYTE) as $referenceRow) { + $field1 = hexdec(substr($referenceRow, 0, $wValue->lengthRecord1InBytes * self::HEX_CHARS_IN_BYTE)); + $field2 = hexdec(substr($referenceRow, $wValue->lengthRecord1InBytes * self::HEX_CHARS_IN_BYTE, $wValue->lengthRecord2InBytes * self::HEX_CHARS_IN_BYTE)); + $field3 = hexdec(substr($referenceRow, ($wValue->lengthRecord1InBytes + $wValue->lengthRecord2InBytes) * self::HEX_CHARS_IN_BYTE, $wValue->lengthRecord3InBytes * self::HEX_CHARS_IN_BYTE)); + if (!is_int($field1) || !is_int($field2) || !is_int($field3)) { + throw new ParseFailureException(sprintf('Field 1, 2 and 3 in cross reference entries should be int, got %s, %s and %s', gettype($field1), gettype($field2), gettype($field3))); + } + + $entries[] = match (CrossReferenceStreamType::tryFrom($field1)) { + CrossReferenceStreamType::LINKED_LIST_FREE_OBJECT => new CrossReferenceEntryFreeObject($field2, $field3), + CrossReferenceStreamType::UNCOMPRESSED_OBJECT => new CrossReferenceEntryInUseObject($field2, $field3), + CrossReferenceStreamType::COMPRESSED_OBJECT => new CrossReferenceEntryCompressed($field2, $field3), + null => throw new ParseFailureException(sprintf('Unrecognized CrossReferenceStream type "%s"', $field1)), + }; + } + + /** @var list $startObjNrOfItemsArray where all even items are the start object number and all odd items are the number of objects */ + $startObjNrOfItemsArray = $dictionary->getValueForKey(DictionaryKey::INDEX, ArrayValue::class)->value + ?? [0, $dictionary->getValueForKey(DictionaryKey::SIZE, IntegerValue::class)->value ?? throw new ParseFailureException('Cross reference streams should have either an index or a size, neither was found')]; + + $crossReferenceSubSections = []; + foreach (array_chunk($startObjNrOfItemsArray, 2) as $startNrNrOfObjects) { + /** @phpstan-ignore offsetAccess.notFound, offsetAccess.notFound */ + $crossReferenceSubSections[] = new CrossReferenceSubSection($startNrNrOfObjects[0], $startNrNrOfObjects[1], ... array_slice($entries, 0, $startNrNrOfObjects[1])); + $entries = array_slice($entries, $startNrNrOfObjects[1]); + } + + return new CrossReferenceSection($dictionary, ... $crossReferenceSubSections); + } +} diff --git a/includes/pdfparser/Document/CrossReference/Stream/CrossReferenceStreamType.php b/includes/pdfparser/Document/CrossReference/Stream/CrossReferenceStreamType.php new file mode 100644 index 0000000..d27717c --- /dev/null +++ b/includes/pdfparser/Document/CrossReference/Stream/CrossReferenceStreamType.php @@ -0,0 +1,11 @@ +firstPos(Marker::TRAILER, $startPos, $startPos + $nrOfBytes) + ?? throw new ParseFailureException('Unable to locate trailer for crossReferenceTable'); + $dictionary = DictionaryParser::parse($stream, $startTrailerPos + Marker::TRAILER->length(), $nrOfBytes - ($startTrailerPos + Marker::TRAILER->length() - $startPos)); + + $firstObjectNumber = $nrOfEntries = null; + $crossReferenceSubSections = $crossReferenceEntries = []; + $content = trim($stream->read($startPos, $startTrailerPos - $startPos)); + $content = str_replace([WhitespaceCharacter::CARRIAGE_RETURN->value, WhitespaceCharacter::LINE_FEED->value . WhitespaceCharacter::LINE_FEED->value], WhitespaceCharacter::LINE_FEED->value, $content); + foreach (explode(WhitespaceCharacter::LINE_FEED->value, $content) as $line) { + $sections = explode(WhitespaceCharacter::SPACE->value, trim($line)); + switch (count($sections)) { + case 2: + if ($firstObjectNumber !== null && $nrOfEntries !== null) { + $crossReferenceSubSections[] = new CrossReferenceSubSection($firstObjectNumber, $nrOfEntries, ... $crossReferenceEntries); // Use previous objectNr and nrOfEntries + } + $crossReferenceEntries = []; + $firstObjectNumber = (int) $sections[0]; + $nrOfEntries = (int) $sections[1]; + break; + case 3: + $crossReferenceEntries[] = match (CrossReferenceTableInUseOrFree::tryFrom(trim($sections[2]))) { + CrossReferenceTableInUseOrFree::IN_USE => new CrossReferenceEntryInUseObject((int) $sections[0], (int) $sections[1]), + CrossReferenceTableInUseOrFree::FREE => new CrossReferenceEntryFreeObject((int) $sections[0], (int) $sections[1]), + null => throw new ParseFailureException(sprintf('Unrecognized crossReference table record type %s', trim($sections[2]))) + }; + break; + default: + throw new ParseFailureException(sprintf('Invalid line "%s", 2 or 3 sections expected, %d found', substr(trim($line), 0, 30), count($sections))); + } + } + + if ($firstObjectNumber !== null && $nrOfEntries !== null) { + $crossReferenceSubSections[] = new CrossReferenceSubSection($firstObjectNumber, $nrOfEntries, ... $crossReferenceEntries); + } + + return new CrossReferenceSection($dictionary, ... $crossReferenceSubSections); + } +} diff --git a/includes/pdfparser/Document/Dictionary/Dictionary.php b/includes/pdfparser/Document/Dictionary/Dictionary.php new file mode 100644 index 0000000..54084af --- /dev/null +++ b/includes/pdfparser/Document/Dictionary/Dictionary.php @@ -0,0 +1,143 @@ + */ + public readonly array $dictionaryEntries; + + /** @no-named-arguments */ + public function __construct( + DictionaryEntry... $dictionaryEntries + ) { + $this->dictionaryEntries = $dictionaryEntries; + } + + /** + * @template T of DictionaryValue|NameValue|Dictionary + * @param class-string $valueType + * @return T + */ + public function getValueForKey(DictionaryKey|ExtendedDictionaryKey $dictionaryKey, string $valueType): DictionaryValue|Dictionary|NameValue|null { + foreach ($this->dictionaryEntries as $dictionaryEntry) { + if (($dictionaryKey instanceof DictionaryKey && $dictionaryEntry->key === $dictionaryKey) + || ($dictionaryKey instanceof ExtendedDictionaryKey && $dictionaryEntry->key instanceof ExtendedDictionaryKey && $dictionaryEntry->key->value === $dictionaryKey->value)) { + $value = $dictionaryEntry->value; + if (is_a($value, $valueType) === false) { + throw new InvalidArgumentException(sprintf('Expected value with value %s to be of type %s, got %s', $dictionaryKey->value, $valueType, get_class($value))); + } + + return $value; + } + } + + return null; + } + + /** @return class-string */ + public function getTypeForKey(DictionaryKey $dictionaryKey): ?string { + foreach ($this->dictionaryEntries as $dictionaryEntry) { + if ($dictionaryEntry->key === $dictionaryKey) { + return $dictionaryEntry->value::class; + } + } + + return null; + } + + public function getSubDictionary(?Document $document, DictionaryKey $dictionaryKey): ?Dictionary { + $subDictionaryType = $this->getTypeForKey($dictionaryKey); + if ($subDictionaryType === null) { + return null; + } + + if ($subDictionaryType === Dictionary::class) { + return $this->getValueForKey($dictionaryKey, Dictionary::class) ?? throw new RuntimeException(); + } + + if ($subDictionaryType === DictionaryArrayValue::class) { + return ($this->getValueForKey($dictionaryKey, DictionaryArrayValue::class) ?? throw new RuntimeException())->toSingleDictionary(); + } + + if ($subDictionaryType === ReferenceValue::class) { + if ($document === null) { + throw new ParseFailureException('Document is required to get subDictionary for reference'); + } + + return ($this->getObjectForReference($document, $dictionaryKey) ?? throw new ParseFailureException()) + ->getDictionary(); + } + + throw new ParseFailureException(sprintf('Invalid type "%s" for subDictionary with key %s', $subDictionaryType, $dictionaryKey->name)); + } + + /** + * @template T of DecoratedObject + * @param class-string|null $expectedDecoratorFQN + * @return ($expectedDecoratorFQN is null ? DecoratedObject : T) + */ + public function getObjectForReference(Document $document, DictionaryKey|ExtendedDictionaryKey $dictionaryKey, ?string $expectedDecoratorFQN = null): ?DecoratedObject { + $reference = $this->getValueForKey($dictionaryKey, ReferenceValue::class); + if ($reference === null) { + return null; + } + + return $document->getObject($reference->objectNumber, $expectedDecoratorFQN) + ?? throw new ParseFailureException(); + } + + /** + * @template T of DecoratedObject + * @param class-string|null $expectedDecoratorFQN + * @return ($expectedDecoratorFQN is null ? list : list) + */ + public function getObjectsForReference(Document $document, DictionaryKey|ExtendedDictionaryKey $dictionaryKey, ?string $expectedDecoratorFQN = null): array { + $references = $this->getValueForKey($dictionaryKey, ReferenceValueArray::class); + if ($references === null) { + return []; + } + + $objects = []; + foreach ($references->referenceValues as $referenceValue) { + $objects[] = $document->getObject($referenceValue->objectNumber, $expectedDecoratorFQN) + ?? throw new ParseFailureException(); + } + + return $objects; + } + + public function getType(): ?TypeNameValue { + if ($this->getTypeForKey(DictionaryKey::TYPE) === Dictionary::class) { + return $this->getValueForKey(DictionaryKey::TYPE, Dictionary::class) + ?->getValueForKey(DictionaryKey::TYPE, TypeNameValue::class); + } + + return $this->getValueForKey(DictionaryKey::TYPE, TypeNameValue::class); + } + + public function getSubType(): ?SubtypeNameValue { + if ($this->getTypeForKey(DictionaryKey::SUBTYPE) === Dictionary::class) { + return $this->getValueForKey(DictionaryKey::SUBTYPE, Dictionary::class) + ?->getValueForKey(DictionaryKey::SUBTYPE, SubtypeNameValue::class); + } + + return $this->getValueForKey(DictionaryKey::SUBTYPE, SubtypeNameValue::class); + } +} diff --git a/includes/pdfparser/Document/Dictionary/DictionaryEntry/DictionaryEntry.php b/includes/pdfparser/Document/Dictionary/DictionaryEntry/DictionaryEntry.php new file mode 100644 index 0000000..f9784c3 --- /dev/null +++ b/includes/pdfparser/Document/Dictionary/DictionaryEntry/DictionaryEntry.php @@ -0,0 +1,19 @@ + $dictionaryValue + * @throws PdfParserException + */ + public static function fromKeyValuePair(string $keyString, string|array $dictionaryValue): ?DictionaryEntry { + $dictionaryKey = DictionaryKey::tryFromKeyString($keyString) + ?? ExtendedDictionaryKey::fromKeyString($keyString); + + return new DictionaryEntry($dictionaryKey, self::getValue($dictionaryKey, $dictionaryValue)); + } + + /** + * @param string|array $value + * @throws PdfParserException + */ + protected static function getValue(DictionaryKey|ExtendedDictionaryKey $dictionaryKey, string|array $value): Dictionary|DictionaryValue|NameValue { + $allowedValueTypes = $dictionaryKey->getValueTypes(); + if ((in_array(Dictionary::class, $allowedValueTypes, true) || in_array(ArrayValue::class, $allowedValueTypes, true)) + && is_array($value)) { + return DictionaryFactory::fromArray($value); + } + + if ((in_array(Dictionary::class, $allowedValueTypes, true) || in_array(ArrayValue::class, $allowedValueTypes, true)) + && is_string($value) + && preg_match('/^[0-9]+ [0-9]+ R$/', $value) === 1 + && ($referenceValue = ReferenceValue::fromValue($value)) !== null) { + return $referenceValue; + } + + foreach ($allowedValueTypes as $allowedValueType) { + if (is_a($allowedValueType, BackedEnum::class, true) + && is_string($value) + && ($resolvedValue = $allowedValueType::tryFrom(NameValueNormalizer::normalize($value))) !== null) { + return $resolvedValue; + } + } + + foreach ($allowedValueTypes as $allowedValueType) { + if (!is_a($allowedValueType, DictionaryValue::class, true) + || $allowedValueType === TextStringValue::class) { // TextStrings accept everything, so we check that last + continue; + } + + if (!is_string($value) || ($valueObject = $allowedValueType::fromValue($value)) === null) { + continue; + } + + return $valueObject; + } + + if (in_array(TextStringValue::class, $allowedValueTypes, true) && is_string($value)) { + return TextStringValue::fromValue($value); + } + + throw new ParseFailureException(sprintf('Value "%s" for dictionary key %s could not be parsed to a valid value type', is_array($value) ? 'array()' : $value, $dictionaryKey->value)); + } +} diff --git a/includes/pdfparser/Document/Dictionary/DictionaryFactory.php b/includes/pdfparser/Document/Dictionary/DictionaryFactory.php new file mode 100644 index 0000000..8ad36d5 --- /dev/null +++ b/includes/pdfparser/Document/Dictionary/DictionaryFactory.php @@ -0,0 +1,34 @@ + $dictionaryArray + * @throws PdfParserException + */ + public static function fromArray(array $dictionaryArray): Dictionary { + $dictionaryEntries = []; + foreach ($dictionaryArray as $keyString => $value) { + if (!is_string($value) && (!is_array($value) || array_is_list($value))) { + throw new InvalidArgumentException(sprintf('values should be either strings or non-list array, %s given', gettype($value))); + } + + /** @var non-empty-array|string $value */ + $dictionaryEntry = DictionaryEntryFactory::fromKeyValuePair($keyString, $value); + if ($dictionaryEntry === null) { + continue; + } + + $dictionaryEntries[] = $dictionaryEntry; + } + + return new Dictionary(... $dictionaryEntries); + } +} diff --git a/includes/pdfparser/Document/Dictionary/DictionaryKey/DictionaryKey.php b/includes/pdfparser/Document/Dictionary/DictionaryKey/DictionaryKey.php new file mode 100644 index 0000000..dde3515 --- /dev/null +++ b/includes/pdfparser/Document/Dictionary/DictionaryKey/DictionaryKey.php @@ -0,0 +1,1271 @@ + [Dictionary::class, BooleanValue::class, DictionaryArrayValue::class, IntegerValue::class, TextStringValue::class], + self::AA, self::BS => [Dictionary::class], + self::AC => [TextStringValue::class, ArrayValue::class], + self::AF => [DictionaryArrayValue::class, ReferenceValueArray::class, ReferenceValue::class], + self::ACCURATE_SCREENS => [BooleanValue::class], + self::ACRO_FORM => [Dictionary::class], + self::ACTION => [TextStringValue::class], + self::ADD_REV_INFO => [BooleanValue::class], + self::ADDITIONAL_STREAMS => [ArrayValue::class], + self::AFTER => [TextStringValue::class], + self::AFTER_PERMS_READY => [TextStringValue::class], + self::AIS => [BooleanValue::class, TextStringValue::class], + self::ALT => [ArrayValue::class], + self::ALTERNATE => [ArrayValue::class, TextStringValue::class], + self::ALTERNATE_IMAGES => [IntegerValue::class], + self::ALTERNATE_PRESENTATIONS => [TextStringValue::class], + self::ALTERNATES => [DictionaryArrayValue::class], + self::AN => [Dictionary::class], + self::ANGLE => [IntegerValue::class, FloatValue::class], + self::ANNOTATION => [Dictionary::class], + self::ANNOTATIONS => [IntegerValue::class], + self::ANNOTS => [ReferenceValue::class, ReferenceValueArray::class, DictionaryArrayValue::class], + self::ANTI_ALIAS => [BooleanValue::class], + self::AP => [DictionaryArrayValue::class], + self::APREF => [Dictionary::class], + self::ART_BOX => [Rectangle::class], + self::AS => [DictionaryArrayValue::class, TextStringValue::class], + self::ASCENT => [IntegerValue::class, FloatValue::class], + self::ASPECT => [ArrayValue::class], + self::ATTESTATION => [TextStringValue::class], + self::AUTH_EVENT => [AuthEventNameValue::class], + self::AUTHOR => [TextStringValue::class], + self::AVG_WIDTH => [IntegerValue::class, FloatValue::class], + self::B => [ArrayValue::class, BooleanValue::class, Dictionary::class, IntegerValue::class], + self::BACKGROUND => [ArrayValue::class], + self::BASE => [TextStringValue::class], + self::BASE_ENCODING => [EncodingNameValue::class], + self::BASE_FONT => [TextStringValue::class], + self::BASE_STATE => [TextStringValue::class], + self::BASE_VERSION => [Version::class], + self::BBOX => [Rectangle::class], + self::BC => [ArrayValue::class], + self::BE => [Dictionary::class], + self::BEFORE => [TextStringValue::class], + self::BG => [TextStringValue::class, ArrayValue::class, Dictionary::class], + self::BG2 => [TextStringValue::class], + self::BI => [Dictionary::class], + self::BITS_PER_COMPONENT => [IntegerValue::class], + self::BITS_PER_COORDINATE => [IntegerValue::class], + self::BITS_PER_FLAG => [IntegerValue::class], + self::BITS_PER_SAMPLE => [IntegerValue::class], + self::BLACK_IS1 => [BooleanValue::class], + self::BLACK_POINT => [ArrayValue::class], + self::BLEED_BOX => [Rectangle::class], + self::BM => [BlendModeNameValue::class], + self::BORDER => [ArrayValue::class], + self::BOUNDS => [ArrayValue::class], + self::BOX_COLOR_INFO => [Dictionary::class], + self::BU => [TextStringValue::class], + self::BYTE_RANGE => [ArrayValue::class], + self::C => [Dictionary::class, ArrayValue::class, IntegerValue::class, FloatValue::class, BooleanValue::class], + self::C0 => [ArrayValue::class], + self::C1 => [ArrayValue::class], + self::C2W => [ArrayValue::class], + self::CA => [IntegerValue::class, FloatValue::class, TextStringValue::class], + self::CA_L => [IntegerValue::class, FloatValue::class], + self::CAP => [BooleanValue::class], + self::CAP_HEIGHT => [IntegerValue::class, FloatValue::class], + self::CATEGORY => [ArrayValue::class], + self::CENTER_WINDOW => [BooleanValue::class], + self::CERT => [Dictionary::class, ArrayValue::class, TextStringValue::class], + self::CF => [Dictionary::class], + self::CFM => [CFMNameValue::class], + self::CHANGES => [ArrayValue::class], + self::CHAR_PROCS => [Dictionary::class], + self::CHAR_SET => [TextStringValue::class], + self::CHECK_SUM => [TextStringValue::class], + self::CI => [Dictionary::class], + self::CIDSET => [TextStringValue::class], + self::CIDSYSTEM_INFO => [Dictionary::class, DictionaryArrayValue::class], + self::CIDTO_GIDMAP => [TextStringValue::class], + self::CL => [ArrayValue::class], + self::CLR_F => [IntegerValue::class], + self::CLR_FF => [TextStringValue::class], + self::CMAP_NAME => [TextStringValue::class], + self::CO => [ArrayValue::class, TextStringValue::class, IntegerValue::class, FloatValue::class], + self::CO_ => [TextStringValue::class], + self::COLLECTION => [Dictionary::class], + self::COLOR_SPACE => [DeviceColorSpaceNameValue::class, CIEColorSpaceNameValue::class, SpecialColorSpaceNameValue::class, Dictionary::class, ArrayValue::class], + self::COLOR_TRANSFORM => [IntegerValue::class], + self::COLORANTS => [Dictionary::class], + self::COLORS => [IntegerValue::class], + self::COLUMNS => [IntegerValue::class], + self::COMPANY => [TextStringValue::class], + self::COMPONENTS => [ArrayValue::class], + self::CONFIGS => [DictionaryArrayValue::class], + self::CONTACT_INFO => [TextStringValue::class], + self::CONTENT_TYPE_ID => [TextStringValue::class], + self::CONTENTS => [ReferenceValue::class, ReferenceValueArray::class, TextStringValue::class], + self::COORDS => [ArrayValue::class], + self::COUNT => [IntegerValue::class], + self::CP => [TextStringValue::class], + self::CREATION_DATE => [DateValue::class, ReferenceValue::class, TextStringValue::class], + self::CREATOR => [IntegerValue::class, TextStringValue::class], + self::CREATOR_INFO => [Dictionary::class], + self::CROP_BOX => [Rectangle::class, ReferenceValue::class], + self::CS => [TextStringValue::class, ArrayValue::class], + self::CS_L => [TextStringValue::class], + self::CT => [TextStringValue::class], + self::CV => [IntegerValue::class, FloatValue::class], + self::CYX => [IntegerValue::class, FloatValue::class], + self::D => [TextStringValue::class, DateValue::class, IntegerValue::class, FloatValue::class, ArrayValue::class, Dictionary::class], + self::DA => [TextStringValue::class], + self::DAMAGED_ROWS_BEFORE_ERROR => [IntegerValue::class], + self::DATA => [TextStringValue::class], + self::DECODE => [ArrayValue::class], + self::DECODE_PARMS => [Dictionary::class, DictionaryArrayValue::class], + self::DEFAULT => [Dictionary::class, TextStringValue::class], + self::DEFAULT_FOR_PRINTING => [BooleanValue::class], + self::DESC => [TextStringValue::class], + self::DESCENDANT_FONTS => [ReferenceValueArray::class, Dictionary::class, DictionaryArrayValue::class], + self::DESCENT => [IntegerValue::class, FloatValue::class], + self::DEST => [TextStringValue::class, ArrayValue::class], + self::DESTS => [Dictionary::class, TextStringValue::class], + self::DEV_DEP_GS_BG => [IntegerValue::class], + self::DEV_DEP_GS_FL => [IntegerValue::class], + self::DEV_DEP_GS_HT => [IntegerValue::class], + self::DEV_DEP_GS_OP => [IntegerValue::class], + self::DEV_DEP_GS_TR => [IntegerValue::class], + self::DEV_DEP_GS_UCR => [IntegerValue::class], + self::DI => [IntegerValue::class, FloatValue::class, TextStringValue::class], + self::DIFFERENCES => [DifferencesArrayValue::class, TextStringValue::class], + self::DIGEST_METHOD => [ArrayValue::class, TextStringValue::class], + self::DIRECTION => [DirectionNameValue::class], + self::DIS => [TextStringValue::class], + self::DISPLAY_DOC_TITLE => [BooleanValue::class, ReferenceValue::class], + self::DL => [IntegerValue::class], + self::DM => [TextStringValue::class], + self::DOC => [ArrayValue::class], + self::DOC_CHECKSUM => [TextStringValue::class], + self::DOC_MDP => [Dictionary::class], + self::DOCUMENT => [ArrayValue::class], + self::DOMAIN => [ArrayValue::class], + self::DOS => [TextStringValue::class], + self::DOT_GAIN => [Dictionary::class], + self::DP => [Dictionary::class], + self::DR => [Dictionary::class], + self::DS => [TextStringValue::class, Dictionary::class], + self::DUPLEX => [PaperHandlingNameValue::class], + self::DUR => [IntegerValue::class, FloatValue::class], + self::DURATION => [TextStringValue::class], + self::DV => [TextStringValue::class], + self::DW => [IntegerValue::class], + self::DW2 => [ArrayValue::class], + self::E => [BooleanValue::class, Dictionary::class, TextStringValue::class], + self::EA => [BooleanValue::class], + self::EARLY_CHANGE => [IntegerValue::class], + self::EF => [Dictionary::class, ReferenceValue::class], + self::EFF => [TextStringValue::class], + self::EMBEDDED_FDFS => [DictionaryArrayValue::class], + self::EMBEDDED_FILES => [Dictionary::class], + self::ENCODE => [ArrayValue::class], + self::ENCODED_BYTE_ALIGN => [BooleanValue::class], + self::ENCODING => [EncodingNameValue::class, Dictionary::class], + self::ENCRYPT => [Dictionary::class], + self::ENCRYPT_METADATA => [BooleanValue::class], + self::ENCRYPTION_REVISION => [IntegerValue::class], + self::END_OF_BLOCK => [BooleanValue::class], + self::END_OF_LINE => [BooleanValue::class], + self::EVENT => [EventNameValue::class], + self::EX_DATA => [Dictionary::class], + self::EXPORT => [Dictionary::class], + self::EXT_GSTATE => [Dictionary::class], + self::EXTEND => [ArrayValue::class], + self::EXTENDS => [TextStringValue::class], + self::EXTENSION_LEVEL => [IntegerValue::class], + self::EXTENSIONS => [Dictionary::class], + self::EXTERNAL_OPIDICTS => [IntegerValue::class], + self::EXTERNAL_REF_XOBJECTS => [IntegerValue::class], + self::EXTERNAL_STREAMS => [IntegerValue::class], + self::F => [TextStringValue::class, IntegerValue::class, FloatValue::class, Dictionary::class], + self::FB => [BooleanValue::class], + self::FC => [ArrayValue::class, TextStringValue::class], + self::FD => [Dictionary::class, BooleanValue::class], + self::FDECODE_PARMS => [Dictionary::class, DictionaryArrayValue::class], + self::FDF => [Dictionary::class], + self::FF => [IntegerValue::class], + self::FFILTER => [FilterNameValue::class, ArrayValue::class], + self::FIELDS => [ReferenceValue::class, ReferenceValueArray::class, DictionaryArrayValue::class], + self::FILTER => [FilterNameValue::class, SecurityHandlerNameValue::class, ArrayValue::class], + self::FIRST => [IntegerValue::class, Dictionary::class], + self::FIRST_CHAR => [IntegerValue::class], + self::FIT_WINDOW => [BooleanValue::class], + self::FIXED_PRINT => [Dictionary::class], + self::FL => [IntegerValue::class, FloatValue::class], + self::FLAGS => [IntegerValue::class], + self::FO => [Dictionary::class], + self::FONT => [Dictionary::class, ArrayValue::class], + self::FONT_BBOX => [Rectangle::class, ReferenceValue::class], + self::FONT_DESCRIPTOR => [Dictionary::class], + self::FONT_FAMILY => [TextStringValue::class], + self::FONT_FAUXING => [DictionaryArrayValue::class], + self::FONT_FILE => [TextStringValue::class], + self::FONT_FILE2 => [TextStringValue::class], + self::FONT_FILE3 => [TextStringValue::class], + self::FONT_MATRIX => [ArrayValue::class], + self::FONT_NAME => [TextStringValue::class], + self::FONT_STRETCH => [TextStringValue::class], + self::FONT_WEIGHT => [IntegerValue::class, FloatValue::class], + self::FORM => [ArrayValue::class], + self::FORM_TYPE => [IntegerValue::class], + self::FOV => [IntegerValue::class, FloatValue::class], + self::FREQUENCY => [IntegerValue::class, FloatValue::class], + self::FS => [TextStringValue::class], + self::FT => [TextStringValue::class], + self::FUNCTION => [TextStringValue::class], + self::FUNCTION_TYPE => [IntegerValue::class], + self::FUNCTIONS => [ArrayValue::class], + self::FWPOSITION => [ArrayValue::class], + self::FWSCALE => [ArrayValue::class], + self::G => [ReferenceValue::class, TextStringValue::class], + self::GAMMA => [IntegerValue::class, FloatValue::class, ArrayValue::class], + self::GO_TO_REMOTE_ACTIONS => [IntegerValue::class], + self::GROUP => [Dictionary::class], + self::GS => [TextStringValue::class], + self::GS_L => [TextStringValue::class], + self::H => [TextStringValue::class, IntegerValue::class, FloatValue::class, BooleanValue::class, ArrayValue::class], + self::HALFTONE_NAME => [TextStringValue::class], + self::HALFTONE_TYPE => [IntegerValue::class], + self::HEIGHT => [IntegerValue::class], + self::HEIGHT2 => [IntegerValue::class], + self::HELV => [TextStringValue::class], + self::HI => [BooleanValue::class], + self::HIDE_ANNOTATION_ACTIONS => [IntegerValue::class], + self::HIDE_MENUBAR => [BooleanValue::class], + self::HIDE_TOOLBAR => [BooleanValue::class], + self::HIDE_WINDOW_UI => [BooleanValue::class], + self::HT => [Dictionary::class, TextStringValue::class], + self::I => [BooleanValue::class, Dictionary::class, IntegerValue::class, FloatValue::class, TextStringValue::class], + self::IC => [ArrayValue::class], + self::ID => [ArrayValue::class, TextStringValue::class], + self::IDTREE => [Dictionary::class], + self::IDENTITY => [TextStringValue::class], + self::IDS => [TextStringValue::class], + self::IF => [Dictionary::class], + self::IM => [TextStringValue::class], + self::IMAGE => [TextStringValue::class], + self::IMAGE_B => [TextStringValue::class], + self::IMAGE_C => [TextStringValue::class], + self::IMAGE_I => [TextStringValue::class], + self::IMAGE_MASK => [BooleanValue::class], + self::IN => [TextStringValue::class], + self::IN_DESIGN => [TextStringValue::class, Dictionary::class], + self::INDEX => [ArrayValue::class], + self::INFO => [Dictionary::class, TextStringValue::class], + self::INK_LIST => [ArrayValue::class], + self::INTENT => [IntentNameValue::class, RenderingIntentNameValue::class, ArrayValue::class], + self::INTERPOLATE => [BooleanValue::class], + self::IRT => [Dictionary::class], + self::IS_MAP => [BooleanValue::class], + self::ISSUER => [ArrayValue::class], + self::IT => [TextStringValue::class], + self::ITALIC_ANGLE => [IntegerValue::class, FloatValue::class], + self::IV => [BooleanValue::class], + self::IX => [TextStringValue::class], + self::JAVA_SCRIPT => [TextStringValue::class, Dictionary::class], + self::JAVA_SCRIPT_ACTIONS => [IntegerValue::class], + self::JBIG2GLOBALS => [TextStringValue::class], + self::JS => [TextStringValue::class], + self::K => [IntegerValue::class, BooleanValue::class, Dictionary::class, DictionaryArrayValue::class], + self::KEY_USAGE => [ArrayValue::class], + self::KEYWORDS => [TextStringValue::class], + self::KIDS => [ReferenceValueArray::class], + self::L => [Rectangle::class, ArrayValue::class], + self::LANG => [TextStringValue::class], + self::LANGUAGE => [Dictionary::class], + self::LAST => [Dictionary::class], + self::LAST_CHAR => [IntegerValue::class], + self::LAST_MODIFIED => [DateValue::class], + self::LAUNCH_ACTIONS => [IntegerValue::class], + self::LC => [IntegerValue::class], + self::LE => [TextStringValue::class, ArrayValue::class], + self::LEADING => [IntegerValue::class, FloatValue::class], + self::LEGAL => [Dictionary::class], + self::LEGAL_ATTESTATION => [Dictionary::class], + self::LENGTH => [IntegerValue::class, ReferenceValue::class], + self::LENGTH1 => [IntegerValue::class], + self::LENGTH2 => [IntegerValue::class], + self::LENGTH3 => [IntegerValue::class], + self::LEVEL1 => [TextStringValue::class], + self::LI => [BooleanValue::class], + self::LIMITS => [ArrayValue::class], + self::LINEARIZED => [IntegerValue::class, FloatValue::class], + self::LIST_MODE => [ListModeNameValue::class], + self::LJ => [IntegerValue::class], + self::LL => [IntegerValue::class, FloatValue::class], + self::LLE => [IntegerValue::class, FloatValue::class], + self::LLO => [IntegerValue::class, FloatValue::class], + self::LOCATION => [TextStringValue::class], + self::LOCK => [Dictionary::class], + self::LOCKED => [ArrayValue::class], + self::LS => [Dictionary::class], + self::LW => [IntegerValue::class, FloatValue::class], + self::M => [TextStringValue::class, DateValue::class, IntegerValue::class, ArrayValue::class], + self::MA => [DictionaryArrayValue::class], + self::MAC => [TextStringValue::class, Dictionary::class], + self::MARK_INFO => [Dictionary::class], + self::MARKED => [BooleanValue::class], + self::MASK => [TextStringValue::class, ArrayValue::class], + self::MATRIX => [ArrayValue::class], + self::MATTE => [ArrayValue::class], + self::MAX_LEN => [IntegerValue::class], + self::MAX_WIDTH => [IntegerValue::class, FloatValue::class], + self::MCAF => [DictionaryArrayValue::class], + self::MD5 => [TextStringValue::class], + self::MDP => [Dictionary::class], + self::MEASURE => [Dictionary::class], + self::MEDIA_BOX => [Rectangle::class, ReferenceValue::class], + self::METADATA => [TextStringValue::class], + self::MH => [Dictionary::class], + self::MISSING_WIDTH => [IntegerValue::class, FloatValue::class], + self::MIX => [BooleanValue::class], + self::MIXING_HINTS => [Dictionary::class], + self::MK => [Dictionary::class], + self::ML => [IntegerValue::class, FloatValue::class], + self::MOD_DATE => [DateValue::class, ReferenceValue::class], + self::MODE => [TextStringValue::class], + self::MOVIE => [Dictionary::class], + self::MOVIE_ACTIONS => [IntegerValue::class], + self::MS => [TextStringValue::class], + self::MSG => [TextStringValue::class], + self::MU => [DictionaryArrayValue::class], + self::N => [IntegerValue::class, FloatValue::class, TextStringValue::class, Dictionary::class], + self::NA => [Dictionary::class, ArrayValue::class], + self::NAME => [TextStringValue::class, TextStringValue::class], + self::NAMES => [Dictionary::class, ArrayValue::class], + self::NEED_APPEARANCES => [BooleanValue::class], + self::NEEDS_RENDERING => [BooleanValue::class], + self::NEW_WINDOW => [BooleanValue::class], + self::NEXT => [Dictionary::class, DictionaryArrayValue::class], + self::NM => [TextStringValue::class], + self::NON_EMBEDDED_FONTS => [IntegerValue::class], + self::NON_FULL_SCREEN_PAGE_MODE => [NonFullScreenPageModeNameValue::class], + self::NP => [BooleanValue::class], + self::NR => [BooleanValue::class], + self::NU => [DictionaryArrayValue::class], + self::NUM_COPIES => [IntegerValue::class], + self::NUMS => [TextStringValue::class], + self::O => [TextStringValue::class, IntegerValue::class, FloatValue::class, Dictionary::class, ArrayValue::class, BooleanValue::class], + self::OB => [TextStringValue::class], + self::OC => [Dictionary::class], + self::OCGS => [ReferenceValueArray::class, DictionaryArrayValue::class], + self::OCPROPERTIES => [Dictionary::class], + self::OFF => [ArrayValue::class], + self::OID => [ArrayValue::class], + self::ON => [ReferenceValueArray::class, ArrayValue::class], + self::ON_INSTANTIATE => [TextStringValue::class], + self::OP => [BooleanValue::class, IntegerValue::class], + self::OP_L => [TextStringValue::class], + self::OPEN => [BooleanValue::class], + self::OPEN_ACTION => [Dictionary::class, ArrayValue::class], + self::OPERATION => [TextStringValue::class], + self::OPI => [Dictionary::class], + self::OPM => [IntegerValue::class], + self::OPT => [ArrayValue::class], + self::OPTIONAL_CONTENT => [BooleanValue::class], + self::ORDER => [IntegerValue::class, ArrayValue::class], + self::ORDERING => [TextStringValue::class], + self::OS => [ArrayValue::class, IntegerValue::class, FloatValue::class], + self::OUTLINES => [Dictionary::class], + self::OUTPUT_INTENTS => [ReferenceValue::class, ReferenceValueArray::class, DictionaryArrayValue::class], + self::OVERLAY_TEXT => [TextStringValue::class], + self::P => [IntegerValue::class, TextStringValue::class, VisibilityPolicyNameValue::class, Dictionary::class, BooleanValue::class, ArrayValue::class, DictionaryArrayValue::class], + self::PA => [Dictionary::class], + self::PAGE => [IntegerValue::class, TextStringValue::class], + self::PAGE_ELEMENT => [Dictionary::class], + self::PAGE_LABELS => [TextStringValue::class, ArrayValue::class], + self::PAGE_LAYOUT => [PageLayoutNameValue::class], + self::PAGE_MODE => [PageModeNameValue::class], + self::PAGES => [Dictionary::class, TextStringValue::class, DictionaryArrayValue::class], + self::PAINT_TYPE => [IntegerValue::class], + self::PARAMS => [Dictionary::class], + self::PARENT => [Dictionary::class], + self::PARENT_TREE => [Dictionary::class], + self::PATTERN => [Dictionary::class, TextStringValue::class], + self::PATTERN_TYPE => [IntegerValue::class], + self::PC => [Dictionary::class, IntegerValue::class, ArrayValue::class], + self::PDF => [TextStringValue::class], + self::PDFDOC_ENCODING => [TextStringValue::class], + self::PERMS => [Dictionary::class], + self::PI => [Dictionary::class], + self::PICK_TRAY_BY_PDFSIZE => [BooleanValue::class], + self::PID => [Dictionary::class], + self::PIECE_INFO => [Dictionary::class], + self::PL => [Dictionary::class], + self::PO => [Dictionary::class, IntegerValue::class, FloatValue::class], + self::POPUP => [Dictionary::class], + self::POSTER => [BooleanValue::class, TextStringValue::class], + self::PREDICTOR => [IntegerValue::class], + self::PRES_STEPS => [Dictionary::class], + self::PRESERVE_RB => [BooleanValue::class], + self::PREV => [IntegerValue::class, Dictionary::class], + self::PRINT => [Dictionary::class], + self::PRINT_AREA => [TextStringValue::class], + self::PRINT_CLIP => [TextStringValue::class], + self::PRINT_PAGE_RANGE => [TextStringValue::class, ArrayValue::class], + self::PRINT_SCALING => [TextStringValue::class], + self::PRINTING_ORDER => [ArrayValue::class], + self::PRIVATE => [TextStringValue::class], + self::PROC_SET => [ArrayValue::class, ReferenceValue::class], + self::PROCESS => [Dictionary::class], + self::PRODUCER => [TextStringValue::class], + self::PROP_AUTH_TIME => [IntegerValue::class], + self::PROP_AUTH_TYPE => [TextStringValue::class], + self::PROP_BUILD => [Dictionary::class], + self::PROPERTIES => [Dictionary::class], + self::PS => [TextStringValue::class, IntegerValue::class, FloatValue::class], + self::PT_DATA => [DictionaryArrayValue::class], + self::PTEX_FULLBANNER => [TextStringValue::class], + self::PV => [Dictionary::class], + self::PZ => [FloatValue::class], + self::Q => [IntegerValue::class], + self::QUAD_POINTS => [ArrayValue::class], + self::R => [StandardSecurityHandlerRevision::class, Rectangle::class, Dictionary::class, TextStringValue::class, ArrayValue::class], + self::RANGE => [ArrayValue::class], + self::RATE => [IntegerValue::class, FloatValue::class], + self::RBGROUPS => [ArrayValue::class], + self::RC => [TextStringValue::class, IntegerValue::class, FloatValue::class], + self::RD => [Rectangle::class, TextStringValue::class], + self::REASON => [TextStringValue::class], + self::REASONS => [ArrayValue::class], + self::RECIPIENTS => [ArrayValue::class, TextStringValue::class], + self::RECT => [Rectangle::class], + self::REF => [Dictionary::class], + self::REFERENCE => [DictionaryArrayValue::class], + self::REGISTRY => [TextStringValue::class], + self::RENAME => [BooleanValue::class], + self::RENDITIONS => [TextStringValue::class], + self::REPEAT => [BooleanValue::class], + self::REQUIREMENTS => [DictionaryArrayValue::class], + self::RES_FORK => [TextStringValue::class], + self::RESOURCE => [TextStringValue::class], + self::RESOURCES => [Dictionary::class], + self::RF => [Dictionary::class], + self::RH => [Dictionary::class, DictionaryArrayValue::class], + self::RI => [RenderingIntentNameValue::class, TextStringValue::class], + self::RIGHTS_WATCHMARK => [TextStringValue::class], + self::RM => [Dictionary::class], + self::RO => [TextStringValue::class], + self::ROOT => [Dictionary::class], + self::ROTATE => [IntegerValue::class], + self::ROWS => [IntegerValue::class], + self::RT => [TextStringValue::class, IntegerValue::class], + self::RV => [TextStringValue::class], + self::S => [TextStringValue::class, ArrayValue::class, NumberingStyleNameValue::class, TransitionStyleNameValue::class, BorderStyleNameValue::class, BooleanValue::class], + self::SA => [BooleanValue::class, ArrayValue::class], + self::SCHEMA => [Dictionary::class], + self::SCRIPT => [TextStringValue::class], + self::SE => [Dictionary::class], + self::SEPARATION_INFO => [Dictionary::class], + self::SET_F => [IntegerValue::class], + self::SET_FF => [TextStringValue::class], + self::SHADING => [Dictionary::class, TextStringValue::class], + self::SHADING_TYPE => [IntegerValue::class], + self::SHOW_CONTROLS => [BooleanValue::class], + self::SI => [DictionaryArrayValue::class], + self::SIG_FLAGS => [IntegerValue::class], + self::SIGNATURE => [ArrayValue::class], + self::SIZE => [IntegerValue::class, ArrayValue::class], + self::SM => [IntegerValue::class, FloatValue::class], + self::SMASK => [Dictionary::class, TextStringValue::class], + self::SMASK_IN_DATA => [IntegerValue::class], + self::SOLIDITIES => [Dictionary::class], + self::SORT => [Dictionary::class], + self::SOUND => [TextStringValue::class], + self::SOUND_ACTIONS => [IntegerValue::class], + self::SOURCE_MODIFIED => [DateValue::class], + self::SP => [Dictionary::class], + self::SPIDER_INFO => [Dictionary::class], + self::SPOT_FUNCTION => [TextStringValue::class], + self::SS => [IntegerValue::class, FloatValue::class, TextStringValue::class], + self::ST => [IntegerValue::class], + self::START => [TextStringValue::class], + self::START_RESOURCE => [TextStringValue::class], + self::STATE => [TextStringValue::class, ArrayValue::class], + self::STATE_MODEL => [TextStringValue::class], + self::STATUS => [TextStringValue::class], + self::STEM_H => [IntegerValue::class, FloatValue::class], + self::STEM_V => [IntegerValue::class, FloatValue::class, ReferenceValue::class], + self::STM_F => [TextStringValue::class], + self::STR_F => [TextStringValue::class], + self::STRUCT_PARENT => [IntegerValue::class], + self::STRUCT_PARENTS => [IntegerValue::class], + self::STRUCT_TREE_ROOT => [Dictionary::class], + self::STYLE => [Dictionary::class], + self::SUB_FILTER => [TextStringValue::class, ArrayValue::class], + self::SUBJ => [TextStringValue::class], + self::SUBJECT => [ArrayValue::class, TextStringValue::class], + self::SUBJECT_DN => [DictionaryArrayValue::class], + self::SUBTYPE => [SubtypeNameValue::class, TextStringValue::class], + self::SUPPLEMENT => [IntegerValue::class], + self::SUSPECTS => [BooleanValue::class], + self::SV => [Dictionary::class], + self::SW => [TextStringValue::class], + self::SY => [TextStringValue::class], + self::SYNCHRONOUS => [BooleanValue::class], + self::T => [Dictionary::class, TextStringValue::class, DictionaryArrayValue::class, BooleanValue::class], + self::TA => [Dictionary::class], + self::TABS => [TabsNameValue::class, TextStringValue::class], + self::TARGET => [TextStringValue::class], + self::TB => [BooleanValue::class], + self::TC => [TextStringValue::class], + self::TEMPLATE_INSTANTIATED => [TextStringValue::class], + self::TEMPLATES => [TextStringValue::class, DictionaryArrayValue::class], + self::TEXT => [TextStringValue::class], + self::TF => [TextStringValue::class], + self::THREADS => [DictionaryArrayValue::class], + self::THREE_DA => [Dictionary::class, TextStringValue::class], + self::THREE_DB => [Rectangle::class], + self::THREE_DD => [TextStringValue::class], + self::THREE_DI => [BooleanValue::class], + self::THREE_DV => [TextStringValue::class, Dictionary::class], + self::THUMB => [TextStringValue::class], + self::TILING_TYPE => [IntegerValue::class], + self::TIME_STAMP => [Dictionary::class], + self::TITLE => [TextStringValue::class], + self::TK => [BooleanValue::class], + self::TM => [TextStringValue::class, IntegerValue::class, FloatValue::class], + self::TO_UNICODE => [ReferenceValue::class], + self::TP => [IntegerValue::class], + self::TPL => [TextStringValue::class], + self::TR => [TextStringValue::class, ArrayValue::class], + self::TR2 => [TextStringValue::class], + self::TRANS => [Dictionary::class], + self::TRANSFER_FUNCTION => [TextStringValue::class], + self::TRANSFORM_METHOD => [TextStringValue::class], + self::TRANSFORM_PARAMS => [Dictionary::class], + self::TRAPPED => [TrappedNameValue::class], + self::TREF => [Dictionary::class], + self::TRIM_BOX => [Rectangle::class], + self::TRUE_TYPE_FONTS => [IntegerValue::class], + self::TT => [ArrayValue::class], + self::TU => [TextStringValue::class], + self::TYPE => [TypeNameValue::class, Dictionary::class], + self::U => [TextStringValue::class, Dictionary::class], + self::U3DPATH => [ArrayValue::class, TextStringValue::class], + self::UC => [BooleanValue::class], + self::UCR => [TextStringValue::class], + self::UCR2 => [TextStringValue::class], + self::UF => [TextStringValue::class], + self::UNIX => [TextStringValue::class], + self::UR3 => [Dictionary::class], + self::URI => [Dictionary::class, TextStringValue::class], + self::URIACTIONS => [IntegerValue::class], + self::URL => [TextStringValue::class], + self::URLS => [TextStringValue::class], + self::URLTYPE => [TextStringValue::class], + self::USAGE => [Dictionary::class], + self::USE_CMAP => [TextStringValue::class], + self::USER => [Dictionary::class], + self::USER_PROPERTIES => [BooleanValue::class], + self::USER_UNIT => [FloatValue::class], + self::V => [SecurityAlgorithm::class, FloatValue::class, BooleanValue::class, Dictionary::class, TextStringValue::class, ArrayValue::class], + self::VA => [DictionaryArrayValue::class], + self::VE => [ArrayValue::class], + self::VERIKET_CLASSIFICATION => [TextStringValue::class], + self::VERSION => [Version::class], + self::VERTICES => [ArrayValue::class], + self::VERTICES_PER_ROW => [IntegerValue::class], + self::VIEW => [Dictionary::class, ViewNameValue::class], + self::VIEW_AREA => [TextStringValue::class], + self::VIEW_CLIP => [TextStringValue::class], + self::VIEWER_PREFERENCES => [Dictionary::class], + self::VOLUME => [IntegerValue::class, FloatValue::class], + self::VP => [Dictionary::class, DictionaryArrayValue::class, ReferenceValueArray::class, ArrayValue::class], + self::W => [CrossReferenceStreamByteSizes::class, CIDFontWidths::class, IntegerValue::class, FloatValue::class, ReferenceValue::class], + self::W2 => [ArrayValue::class], + self::WC => [Dictionary::class], + self::WHITE_POINT => [ArrayValue::class], + self::WIDTH => [IntegerValue::class], + self::WIDTH2 => [IntegerValue::class], + self::WIDTHS => [ArrayValue::class], + self::WIN => [Dictionary::class], + self::WM => [TextStringValue::class], + self::WMODE => [IntegerValue::class], + self::WP => [Dictionary::class], + self::WS => [Dictionary::class], + self::X => [Dictionary::class, ArrayValue::class], + self::XFA => [TextStringValue::class, ArrayValue::class], + self::XHEIGHT => [IntegerValue::class, FloatValue::class], + self::XN => [TextStringValue::class], + self::XOBJECT => [Dictionary::class], + self::XREF_STM => [IntegerValue::class], + self::XSQUARE => [IntegerValue::class], + self::XSTEP => [IntegerValue::class, FloatValue::class], + self::XYZ => [TextStringValue::class], + self::Y => [ArrayValue::class], + self::YSQUARE => [IntegerValue::class], + self::YSTEP => [IntegerValue::class, FloatValue::class], + self::Z => [Dictionary::class], + self::ZA_DB => [ReferenceValue::class, TextStringValue::class], + self::ZOOM => [Dictionary::class], + }; + } +} diff --git a/includes/pdfparser/Document/Dictionary/DictionaryKey/DictionaryKeyInterface.php b/includes/pdfparser/Document/Dictionary/DictionaryKey/DictionaryKeyInterface.php new file mode 100644 index 0000000..6fe2fa5 --- /dev/null +++ b/includes/pdfparser/Document/Dictionary/DictionaryKey/DictionaryKeyInterface.php @@ -0,0 +1,12 @@ +> */ + public function getValueTypes(): array; +} diff --git a/includes/pdfparser/Document/Dictionary/DictionaryKey/ExtendedDictionaryKey.php b/includes/pdfparser/Document/Dictionary/DictionaryKey/ExtendedDictionaryKey.php new file mode 100644 index 0000000..1a24471 --- /dev/null +++ b/includes/pdfparser/Document/Dictionary/DictionaryKey/ExtendedDictionaryKey.php @@ -0,0 +1,26 @@ + */ + private array $nestingContext = []; + + /** @var array */ + private array $keyBuffer = []; + + /** @var array */ + private array $valueBuffer = []; + + public function __construct() { + $this->currentLevel = ''; + } + + public function incrementNesting(): self { + $this->currentLevel = (string) ($this->keyBuffer[$this->currentLevel] ?? (int) $this->currentLevel + 1); + + return $this; + } + + public function decrementNesting(): self { + array_pop($this->nestingContext); + $this->currentLevel = (string) array_key_last($this->nestingContext); + + return $this; + } + + public function setContext(DictionaryParseContext $dictionaryParseContext): self { + $this->nestingContext[$this->currentLevel] = $dictionaryParseContext; + + return $this; + } + + public function getContext(): DictionaryParseContext { + return $this->nestingContext[$this->currentLevel] ?? DictionaryParseContext::ROOT; + } + + public function getKeyBuffer(): InfiniteBuffer { + return $this->keyBuffer[$this->currentLevel] ??= new InfiniteBuffer(); + } + + public function addToKeyBuffer(string $char): self { + $this->getKeyBuffer()->addChar($char); + + return $this; + } + + public function removeFromKeyBuffer(int $nChars = 1): self { + $this->getKeyBuffer()->removeChar($nChars); + + return $this; + } + + public function getValueBuffer(): InfiniteBuffer { + return $this->valueBuffer[$this->currentLevel] ??= new InfiniteBuffer(); + } + + public function addToValueBuffer(string $char): self { + $this->getValueBuffer()->addChar($char); + + return $this; + } + + public function removeFromValueBuffer(int $nChars = 1): self { + $this->getValueBuffer()->removeChar($nChars); + + return $this; + } + + /** @return list */ + public function getKeysFromRoot(): array { + $keysFromRoot = []; + foreach ($this->keyBuffer as $keyBuffer) { + $keyBufferString = (string) $keyBuffer; + if ($keyBufferString === '') { + continue; + } + + $keysFromRoot[] = $keyBufferString; + } + + return $keysFromRoot; + } + + public function flush(): self { + ($this->valueBuffer[$this->currentLevel] ?? null)?->flush(); + ($this->keyBuffer[$this->currentLevel] ?? null)?->flush(); + + return $this; + } +} diff --git a/includes/pdfparser/Document/Dictionary/DictionaryParser.php b/includes/pdfparser/Document/Dictionary/DictionaryParser.php new file mode 100644 index 0000000..735b1ba --- /dev/null +++ b/includes/pdfparser/Document/Dictionary/DictionaryParser.php @@ -0,0 +1,112 @@ + $startPos + * @phpstan-assert int<1, max> $nrOfBytes + * + * @throws PdfParserException + */ + public static function parse(Stream $stream, int $startPos, int $nrOfBytes): Dictionary { + $dictionaryArray = []; + $rollingCharBuffer = new RollingCharBuffer(6); + $nestingContext = (new NestingContext())->setContext(DictionaryParseContext::ROOT); + $arrayNestingLevel = 0; + foreach ($stream->chars($startPos, $nrOfBytes) as $char) { + $rollingCharBuffer->next($char); + if ($char === DelimiterCharacter::LESS_THAN_SIGN->value && $rollingCharBuffer->getPreviousCharacter() === DelimiterCharacter::LESS_THAN_SIGN->value && $rollingCharBuffer->getPreviousCharacter(2) !== LiteralStringEscapeCharacter::REVERSE_SOLIDUS->value && $nestingContext->getContext() !== DictionaryParseContext::VALUE_IN_SQUARE_BRACKETS) { + if ($nestingContext->getContext() === DictionaryParseContext::KEY) { + $nestingContext->removeFromKeyBuffer(); + } + + $nestingContext->setContext(DictionaryParseContext::DICTIONARY)->incrementNesting()->setContext(DictionaryParseContext::DICTIONARY); + } elseif ($char === DelimiterCharacter::LESS_THAN_SIGN->value && $nestingContext->getContext() === DictionaryParseContext::KEY) { + $nestingContext->setContext(DictionaryParseContext::VALUE); + } elseif ($char === DelimiterCharacter::GREATER_THAN_SIGN->value && $rollingCharBuffer->getPreviousCharacter() === DelimiterCharacter::GREATER_THAN_SIGN->value && $rollingCharBuffer->getPreviousCharacter(2) !== LiteralStringEscapeCharacter::REVERSE_SOLIDUS->value && $nestingContext->getContext() !== DictionaryParseContext::VALUE_IN_SQUARE_BRACKETS) { + $nestingContext->removeFromValueBuffer(); + self::flush($dictionaryArray, $nestingContext); + $nestingContext->decrementNesting()->flush(); + } elseif ($char === DelimiterCharacter::SOLIDUS->value && $rollingCharBuffer->getPreviousCharacter() !== LiteralStringEscapeCharacter::REVERSE_SOLIDUS->value && $nestingContext->getContext() !== DictionaryParseContext::VALUE_IN_SQUARE_BRACKETS) { + if ($nestingContext->getContext() === DictionaryParseContext::DICTIONARY) { + $nestingContext->setContext(DictionaryParseContext::KEY); + } elseif ($nestingContext->getContext() === DictionaryParseContext::VALUE) { + self::flush($dictionaryArray, $nestingContext); + $nestingContext->setContext(DictionaryParseContext::KEY); + } elseif ($nestingContext->getContext() === DictionaryParseContext::KEY || $nestingContext->getContext() === DictionaryParseContext::KEY_VALUE_SEPARATOR) { + $nestingContext->setContext(DictionaryParseContext::VALUE); + } + } elseif ($char === WhitespaceCharacter::LINE_FEED->value && $nestingContext->getContext() !== DictionaryParseContext::VALUE_IN_SQUARE_BRACKETS) { + if ($nestingContext->getContext() === DictionaryParseContext::KEY) { + $nestingContext->setContext(DictionaryParseContext::KEY_VALUE_SEPARATOR); + } elseif ($nestingContext->getContext() === DictionaryParseContext::VALUE) { + self::flush($dictionaryArray, $nestingContext); + } elseif ($nestingContext->getContext() === DictionaryParseContext::COMMENT) { + $nestingContext->setContext(DictionaryParseContext::DICTIONARY); + } + } elseif (WhitespaceCharacter::tryFrom($char) !== null && $nestingContext->getContext() === DictionaryParseContext::KEY) { + $nestingContext->setContext(DictionaryParseContext::KEY_VALUE_SEPARATOR); + } elseif ($char === DelimiterCharacter::LEFT_PARENTHESIS->value && (in_array($nestingContext->getContext(), [DictionaryParseContext::KEY, DictionaryParseContext::KEY_VALUE_SEPARATOR, DictionaryParseContext::VALUE], true))) { + $nestingContext->setContext(DictionaryParseContext::VALUE_IN_PARENTHESES); + } elseif ($char === DelimiterCharacter::RIGHT_PARENTHESIS->value && $rollingCharBuffer->getPreviousCharacter() !== LiteralStringEscapeCharacter::REVERSE_SOLIDUS->value && $nestingContext->getContext() === DictionaryParseContext::VALUE_IN_PARENTHESES) { + $nestingContext->setContext(DictionaryParseContext::VALUE); + } elseif ($char === DelimiterCharacter::LEFT_SQUARE_BRACKET->value && (in_array($nestingContext->getContext(), [DictionaryParseContext::KEY, DictionaryParseContext::KEY_VALUE_SEPARATOR, DictionaryParseContext::VALUE, DictionaryParseContext::VALUE_IN_SQUARE_BRACKETS], true))) { + $nestingContext->setContext(DictionaryParseContext::VALUE_IN_SQUARE_BRACKETS); + $arrayNestingLevel++; + } elseif ($char === DelimiterCharacter::RIGHT_SQUARE_BRACKET->value && $nestingContext->getContext() === DictionaryParseContext::VALUE_IN_SQUARE_BRACKETS) { + $arrayNestingLevel--; + if ($arrayNestingLevel === 0) { + $nestingContext->setContext(DictionaryParseContext::VALUE); + } + } elseif (trim($char) !== '' && $nestingContext->getContext() === DictionaryParseContext::KEY_VALUE_SEPARATOR) { + $nestingContext->setContext(DictionaryParseContext::VALUE); + } elseif ($char === DelimiterCharacter::PERCENT_SIGN->value && $rollingCharBuffer->getPreviousCharacter() !== LiteralStringEscapeCharacter::REVERSE_SOLIDUS->value && $nestingContext->getContext() !== DictionaryParseContext::VALUE_IN_PARENTHESES) { + $nestingContext->setContext(DictionaryParseContext::COMMENT); + } + + match ($nestingContext->getContext()) { + DictionaryParseContext::KEY => $nestingContext->addToKeyBuffer($char), + DictionaryParseContext::VALUE_IN_PARENTHESES, + DictionaryParseContext::VALUE_IN_SQUARE_BRACKETS, + DictionaryParseContext::VALUE => $nestingContext->addToValueBuffer($char), + default => null, + }; + } + + return DictionaryFactory::fromArray($dictionaryArray); + } + + /** @param array $dictionaryArray */ + private static function flush(array &$dictionaryArray, NestingContext $nestingContext): void { + if ($nestingContext->getValueBuffer()->isEmpty() || $nestingContext->getKeyBuffer()->isEmpty()) { + return; + } + + $dictionaryArrayPointer = &$dictionaryArray; + $keys = $nestingContext->getKeysFromRoot(); + foreach ($keys as $index => $key) { + if ($key === (string) $nestingContext->getKeyBuffer() && $index === array_key_last($keys)) { + break; + } + + /** @phpstan-ignore offsetAccess.nonOffsetAccessible */ + $dictionaryArrayPointer = &$dictionaryArrayPointer[trim($key)]; + } + + /** @phpstan-ignore offsetAccess.nonOffsetAccessible */ + $dictionaryArrayPointer[(string) $nestingContext->getKeyBuffer()] = trim((string) $nestingContext->getValueBuffer()); + $nestingContext->flush(); + } +} diff --git a/includes/pdfparser/Document/Dictionary/DictionaryValue/Array/ArrayValue.php b/includes/pdfparser/Document/Dictionary/DictionaryValue/Array/ArrayValue.php new file mode 100644 index 0000000..00914ac --- /dev/null +++ b/includes/pdfparser/Document/Dictionary/DictionaryValue/Array/ArrayValue.php @@ -0,0 +1,69 @@ + $value */ + public function __construct( + public readonly array $value + ) { + } + + #[Override] + /** @throws PdfParserException */ + public static function fromValue(string $valueString): null|self|ReferenceValueArray { + $valueString = trim($valueString); + if (!str_starts_with($valueString, '[') || !str_ends_with($valueString, ']')) { + return null; + } + + $valueString = preg_replace('/(<[^>]*>)(?=<[^>]*>)/', '$1 $2', $valueString) + ?? throw new RuntimeException('An error occurred while sanitizing array value'); + $valueString = str_replace(['/', "\n"], [' /', ' '], rtrim(ltrim($valueString, '[ '), ' ]')); + $valueString = preg_replace('/\s+/', ' ', $valueString) + ?? throw new RuntimeException('An error occurred while removing duplicate spaces from array value'); + $values = explode(' ', $valueString); + if (count($values) % 3 === 0 && array_key_exists(2, $values) && $values[2] === 'R') { + return ReferenceValueArray::fromValue($valueString); + } + + $array = []; + foreach ($values as $value) { + if (str_starts_with($value, '[') && str_ends_with($value, ']')) { + $array[] = self::fromValue($value); + } elseif ((string) (int) $value === $value) { + $array[] = (int) $value; + } elseif ($value !== '') { + $array[] = $value; + } + } + + return new self($array); + } + + public function toString(): string { + $string = ''; + foreach ($this->value as $value) { + $string .= ' ' . match (true) { + is_int($value), + is_float($value), + is_string($value) => $value, + $value instanceof ArrayValue => $value->toString(), + $value instanceof ReferenceValueArray => implode(' ', array_map(fn (ReferenceValue $referenceValue) => $referenceValue->objectNumber . ' R', $value->referenceValues)), + default => throw new ParseFailureException('Unsupported array value type: ' . gettype($value)), + }; + } + + return '[' . trim($string) . ']'; + } +} diff --git a/includes/pdfparser/Document/Dictionary/DictionaryValue/Array/CIDFontWidths.php b/includes/pdfparser/Document/Dictionary/DictionaryValue/Array/CIDFontWidths.php new file mode 100644 index 0000000..0dd3da0 --- /dev/null +++ b/includes/pdfparser/Document/Dictionary/DictionaryValue/Array/CIDFontWidths.php @@ -0,0 +1,65 @@ + */ + private readonly array $widths; + + /** @no-named-arguments */ + public function __construct( + ConsecutiveCIDWidth|RangeCIDWidth ...$widths, + ) { + $this->widths = $widths; + } + + public function getWidthForCharacter(int $characterCode): ?float { + foreach ($this->widths as $widthItem) { + if (($widthForCharacterCode = $widthItem->getWidthForCharacterCode($characterCode)) !== null) { + return $widthForCharacterCode; + } + } + + return null; + } + + #[Override] + public static function fromValue(string $valueString): ?self { + $valueString = str_replace("\n", ' ', $valueString); + if (preg_match_all('/(?[0-9]+)\s*(?[0-9]+\s*[0-9.]+|\[[0-9. ]+\])/', $valueString, $matches, PREG_SET_ORDER) <= 0) { + return null; + } + + $widths = []; + foreach ($matches as $match) { + if ((string) ($startingCID = (int) $match['startingCID']) !== $match['startingCID']) { + return null; + } + + if (str_starts_with($match['CIDS'], '[') && str_ends_with($match['CIDS'], ']')) { + $widths[] = new ConsecutiveCIDWidth($startingCID, array_map('floatval', explode(' ', rtrim(ltrim($match['CIDS'], '['), ']')))); + + continue; + } + + $arguments = explode(' ', $match['CIDS']); + if (count($arguments) !== 2) { + return null; + } + + if ((string)($endCID = (int) $arguments[0]) !== $arguments[0] || (string)($width = (float) $arguments[1]) !== $arguments[1]) { + return null; + } + + $widths[] = new RangeCIDWidth($startingCID, $endCID, $width); + } + + return new self(... $widths); + } +} diff --git a/includes/pdfparser/Document/Dictionary/DictionaryValue/Array/CrossReferenceStreamByteSizes.php b/includes/pdfparser/Document/Dictionary/DictionaryValue/Array/CrossReferenceStreamByteSizes.php new file mode 100644 index 0000000..c2af98b --- /dev/null +++ b/includes/pdfparser/Document/Dictionary/DictionaryValue/Array/CrossReferenceStreamByteSizes.php @@ -0,0 +1,50 @@ + + */ + public function getTotalLengthInBytes(): int { + $totalLength = $this->lengthRecord1InBytes + $this->lengthRecord2InBytes + $this->lengthRecord3InBytes; + if ($totalLength < 1) { + throw new RuntimeException(sprintf('Total length should not be less than 1, got %d', $totalLength)); + } + + return $totalLength; + } + + #[Override] + public static function fromValue(string $valueString): ?self { + if (!str_starts_with($valueString, '[') || !str_ends_with($valueString, ']')) { + return null; + } + + $values = explode(' ', trim(rtrim(ltrim($valueString, '['), ']'))); + if (count($values) !== 3) { + return null; + } + + if ((string) (int) trim($values[0]) !== trim($values[0]) + || (string) (int) trim($values[1]) !== trim($values[1]) + || (string) (int) trim($values[2]) !== trim($values[2])) { + return null; + } + + return new self((int) $values[0], (int) $values[1], (int) $values[2]); + } +} diff --git a/includes/pdfparser/Document/Dictionary/DictionaryValue/Array/DictionaryArrayValue.php b/includes/pdfparser/Document/Dictionary/DictionaryValue/Array/DictionaryArrayValue.php new file mode 100644 index 0000000..5700928 --- /dev/null +++ b/includes/pdfparser/Document/Dictionary/DictionaryValue/Array/DictionaryArrayValue.php @@ -0,0 +1,55 @@ + */ + public readonly array $dictionaries; + + /** @no-named-arguments */ + public function __construct( + Dictionary... $dictionaries, + ) { + $this->dictionaries = $dictionaries; + } + + #[Override] + /** @throws PdfParserException */ + public static function fromValue(string $valueString): ?self { + $valueStringWithoutSpaces = str_replace([' ', "\r", "\n"], '', $valueString); + if ((str_starts_with($valueStringWithoutSpaces, '[<<') === false && str_starts_with($valueStringWithoutSpaces, '[null') === false) + || (str_ends_with($valueStringWithoutSpaces, '>>]') === false && str_ends_with($valueStringWithoutSpaces, 'null]') === false)) { + return null; + } + + $dictionaryEntries = []; + $valueString = preg_replace('/(<<[^>]*>>)(?=<<[^>]*>>)/', '$1 $2', $valueString) + ?? throw new RuntimeException('An error occurred while sanitizing dictionary array value'); + foreach (explode('>> <<', substr($valueString, 3, -3)) as $dictionaryValueString) { + $dictionaryEntries[] = $dictionaryValueString === '' + ? new Dictionary() + : DictionaryParser::parse($memoryStream = new InMemoryStream('<<' . $dictionaryValueString . '>>'), 0, $memoryStream->getSizeInBytes()); + } + + return new self(... $dictionaryEntries); + } + + public function toSingleDictionary(): ?Dictionary { + $dictionaryEntries = []; + foreach ($this->dictionaries as $dictionary) { + foreach ($dictionary->dictionaryEntries as $dictionaryEntry) { + $dictionaryEntries[] = $dictionaryEntry; + } + } + + return new Dictionary(... $dictionaryEntries); + } +} diff --git a/includes/pdfparser/Document/Dictionary/DictionaryValue/Array/DifferencesArrayValue.php b/includes/pdfparser/Document/Dictionary/DictionaryValue/Array/DifferencesArrayValue.php new file mode 100644 index 0000000..567f5cb --- /dev/null +++ b/includes/pdfparser/Document/Dictionary/DictionaryValue/Array/DifferencesArrayValue.php @@ -0,0 +1,57 @@ + $differenceRanges */ + public function __construct( + private readonly array $differenceRanges, + ) { + } + + #[Override] + public static function fromValue(string $valueString): ?self { + if (($arrayValue = ArrayValue::fromValue($valueString)) === null || $arrayValue instanceof ReferenceValueArray) { + return null; + } + + $startIndex = null; + $characters = $differenceRanges = []; + foreach ($arrayValue->value as $arrayValueItem) { + if (is_int($arrayValueItem)) { + if ($startIndex !== null) { + $differenceRanges[] = new DifferenceRange($startIndex, $characters); + $characters = []; + } + + $startIndex = $arrayValueItem; + } elseif (is_string($arrayValueItem)) { + $characters[] = AGlyphList::tryFrom(ltrim($arrayValueItem, '/')); + } else { + return null; + } + } + + if ($startIndex !== null) { + $differenceRanges[] = new DifferenceRange($startIndex, $characters); + } + + return new self($differenceRanges); + } + + public function getGlyph(int $int): ?AGlyphList { + foreach ($this->differenceRanges as $differenceRange) { + if ($differenceRange->contains($int)) { + return $differenceRange->getGlyph($int); + } + } + + return null; + } +} diff --git a/includes/pdfparser/Document/Dictionary/DictionaryValue/Array/Item/ConsecutiveCIDWidth.php b/includes/pdfparser/Document/Dictionary/DictionaryValue/Array/Item/ConsecutiveCIDWidth.php new file mode 100644 index 0000000..2397b8e --- /dev/null +++ b/includes/pdfparser/Document/Dictionary/DictionaryValue/Array/Item/ConsecutiveCIDWidth.php @@ -0,0 +1,20 @@ + $widths */ + public function __construct( + public readonly int $cidStart, + public readonly array $widths, + ) { + } + + public function getWidthForCharacterCode(int $characterCode): ?float { + if (array_key_exists($characterCode - $this->cidStart, $this->widths) === false) { + return null; + } + + return $this->widths[$characterCode - $this->cidStart] / 1000; + } +} diff --git a/includes/pdfparser/Document/Dictionary/DictionaryValue/Array/Item/DifferenceRange.php b/includes/pdfparser/Document/Dictionary/DictionaryValue/Array/Item/DifferenceRange.php new file mode 100644 index 0000000..6dfca8b --- /dev/null +++ b/includes/pdfparser/Document/Dictionary/DictionaryValue/Array/Item/DifferenceRange.php @@ -0,0 +1,33 @@ + $characters */ + public function __construct( + private readonly int $firstIndex, + private readonly array $characters, + ) { + } + + public function contains(int $index): bool { + return $index >= $this->firstIndex + && $index < $this->firstIndex + count($this->characters); + } + + public function getGlyph(int $index): ?AGlyphList { + if (!$this->contains($index)) { + throw new InvalidArgumentException('This difference range does not contain index ' . $index); + } + + if (!array_key_exists($index - $this->firstIndex, $this->characters)) { + throw new RuntimeException('Expected glyph to be present, but it was not'); + } + + return $this->characters[$index - $this->firstIndex]; + } +} diff --git a/includes/pdfparser/Document/Dictionary/DictionaryValue/Array/Item/RangeCIDWidth.php b/includes/pdfparser/Document/Dictionary/DictionaryValue/Array/Item/RangeCIDWidth.php new file mode 100644 index 0000000..b11549c --- /dev/null +++ b/includes/pdfparser/Document/Dictionary/DictionaryValue/Array/Item/RangeCIDWidth.php @@ -0,0 +1,20 @@ +cidStart || $characterCode > $this->cidEnd) { + return null; + } + + return $this->width / 1000; + } +} diff --git a/includes/pdfparser/Document/Dictionary/DictionaryValue/Boolean/BooleanValue.php b/includes/pdfparser/Document/Dictionary/DictionaryValue/Boolean/BooleanValue.php new file mode 100644 index 0000000..7e44699 --- /dev/null +++ b/includes/pdfparser/Document/Dictionary/DictionaryValue/Boolean/BooleanValue.php @@ -0,0 +1,27 @@ +')) { + $valueString = substr($valueString, 1, -1); + if (!ctype_xdigit($valueString) || strlen($valueString) % 2 !== 0) { + throw new InvalidArgumentException(sprintf('String "%s" is not hexadecimal', substr($valueString, 0, 10))); + } + + $valueString = hex2bin($valueString); + if ($valueString === false) { + return null; + } + } + + if (str_starts_with($valueString, '(') && str_ends_with($valueString, ')')) { + $valueString = preg_replace_callback( + '/\\\\([0-7]{3})/', + fn (array $matches) => mb_chr((int) octdec($matches[1])), + substr($valueString, 1, -1) + ) ?? throw new ParseFailureException(); + } + + if (!str_starts_with($valueString, 'D:')) { + $valueString = mb_convert_encoding($valueString, 'UTF-8', 'UTF-16'); + if ($valueString === false || !str_starts_with($valueString, 'D:')) { + return null; + } + } + + try { + $parsedDate = DateTimeImmutable::createFromFormat( + preg_match('/^D:\d{14}$/', $valueString) === 1 ? '\D\:YmdHis' : '\D\:YmdHisP', + str_replace("'", '', $valueString) + ); + } catch (ValueError) { + return null; + } + + if ($parsedDate === false) { + return null; + } + + return new self($parsedDate); + } +} diff --git a/includes/pdfparser/Document/Dictionary/DictionaryValue/DictionaryValue.php b/includes/pdfparser/Document/Dictionary/DictionaryValue/DictionaryValue.php new file mode 100644 index 0000000..59bc3b5 --- /dev/null +++ b/includes/pdfparser/Document/Dictionary/DictionaryValue/DictionaryValue.php @@ -0,0 +1,8 @@ + Components::Gray, + self::DeviceRGB => Components::RGB, + self::DeviceCMYK => Components::CMYK, + }; + } +} diff --git a/includes/pdfparser/Document/Dictionary/DictionaryValue/Name/DirectionNameValue.php b/includes/pdfparser/Document/Dictionary/DictionaryValue/Name/DirectionNameValue.php new file mode 100644 index 0000000..076432d --- /dev/null +++ b/includes/pdfparser/Document/Dictionary/DictionaryValue/Name/DirectionNameValue.php @@ -0,0 +1,8 @@ + (new Identity0())->getToUnicodeCMap()->textToUnicode($characterGroup), + self::WinAnsiEncoding => WinAnsi::textToUnicode($characterGroup), + self::MacRomanEncoding => MacRoman::textToUnicode($characterGroup), + default => throw new ParseFailureException(sprintf('Unsupported encoding %s', $this->name)), + }; + } +} diff --git a/includes/pdfparser/Document/Dictionary/DictionaryValue/Name/EventNameValue.php b/includes/pdfparser/Document/Dictionary/DictionaryValue/Name/EventNameValue.php new file mode 100644 index 0000000..02dfee1 --- /dev/null +++ b/includes/pdfparser/Document/Dictionary/DictionaryValue/Name/EventNameValue.php @@ -0,0 +1,9 @@ +getSubDictionary($document, DictionaryKey::DECODE_PARMS); + + return match($this) { + self::JPX_DECODE, + self::JBIG2_DECODE, + self::DCT_DECODE => $content, // Don't decode JPEG content + self::FLATE_DECODE => FlateDecode::decodeBinary( + $content, + $decodeParams !== null && ($predictorValue = LZWFlatePredictorValue::tryFrom((int) $decodeParams->getValueForKey(DictionaryKey::PREDICTOR, IntegerValue::class)?->value)) !== null + ? $predictorValue + : LZWFlatePredictorValue::None, + $decodeParams?->getValueForKey(DictionaryKey::COLUMNS, IntegerValue::class)->value ?? 1 + ), + self::CCITT_FAX_DECODE => CCITTFaxDecode::addHeaderAndIFD( + $content, + $decodeParams?->getValueForKey(DictionaryKey::COLUMNS, IntegerValue::class)->value + ?? throw new ParseFailureException('Missing columns'), + $decodeParams->getValueForKey(DictionaryKey::ROWS, IntegerValue::class)->value + ?? $dictionary->getValueForKey(DictionaryKey::HEIGHT, IntegerValue::class)->value + ?? throw new ParseFailureException('Missing rows'), + $decodeParams->getValueForKey(DictionaryKey::K, IntegerValue::class)->value + ?? throw new ParseFailureException('Missing K'), + ), + self::ASCII_85_DECODE => ASCII85Decode::decodeBinary($content), + default => throw new ParseFailureException(sprintf('Content "%.100s..." cannot be decoded for filter "%s"', $content, $this->name)) + }; + } + + public function getImageType(): ?ImageType { + return match ($this) { + self::LZW_DECODE => ImageType::TIFF, + self::FLATE_DECODE => ImageType::PNG, + self::RUN_LENGTH_DECODE => ImageType::RAW, + self::CCITT_FAX_DECODE => ImageType::TIFF_FAX, + self::DCT_DECODE => ImageType::JPEG, + self::JPX_DECODE => ImageType::JPEG2000, + self::JBIG2_DECODE => ImageType::JBIG2, + default => null, + }; + } +} diff --git a/includes/pdfparser/Document/Dictionary/DictionaryValue/Name/IntentNameValue.php b/includes/pdfparser/Document/Dictionary/DictionaryValue/Name/IntentNameValue.php new file mode 100644 index 0000000..d61fbf0 --- /dev/null +++ b/includes/pdfparser/Document/Dictionary/DictionaryValue/Name/IntentNameValue.php @@ -0,0 +1,9 @@ + */ + public function getDecoratorFQN(): string { + return match($this) { + TypeNameValue::CATALOG => Catalog::class, + TypeNameValue::EMBEDDED_FILE => EmbeddedFile::class, + TypeNameValue::FILE_SPEC => FileSpecification::class, + TypeNameValue::FONT => Font::class, + TypeNameValue::PAGE => Page::class, + TypeNameValue::PAGES => Pages::class, + TypeNameValue::X_OBJECT => XObject::class, + default => GenericObject::class, + }; + } +} diff --git a/includes/pdfparser/Document/Dictionary/DictionaryValue/Name/ViewNameValue.php b/includes/pdfparser/Document/Dictionary/DictionaryValue/Name/ViewNameValue.php new file mode 100644 index 0000000..e904398 --- /dev/null +++ b/includes/pdfparser/Document/Dictionary/DictionaryValue/Name/ViewNameValue.php @@ -0,0 +1,9 @@ + */ + public readonly array $referenceValues; + + /** @no-named-arguments */ + public function __construct(ReferenceValue ...$referenceValues) { + $this->referenceValues = $referenceValues; + } + + #[Override] + /** @throws ParseFailureException */ + public static function fromValue(string $valueString): ?self { + if (!str_starts_with($valueString, '[') || !str_ends_with($valueString, ']')) { + return null; + } + + $valueString = preg_replace('/\s+/', ' ', $valueString) + ?? throw new ParseFailureException('An unexpected error occurred while sanitizing reference value array'); + $valueString = trim(rtrim(ltrim($valueString, '['), ']')); + if (str_starts_with($valueString, '<<') && str_ends_with($valueString, '>>')) { + return null; + } + + if ($valueString === '') { + return new self(); + } + + $referenceParts = explode(' ', $valueString); + $nrOfReferenceParts = count($referenceParts); + if ($nrOfReferenceParts % 3 !== 0) { + return null; + } + + $referenceValues = []; + for ($i = 0; $i < $nrOfReferenceParts; $i += 3) { + /** @phpstan-ignore offsetAccess.notFound, offsetAccess.notFound, offsetAccess.notFound */ + $string = $referenceParts[$i] . ' ' . $referenceParts[$i + 1] . ' ' . $referenceParts[$i + 2]; + + $referenceValues[] = ReferenceValue::fromValue($string) + ?? throw new ParseFailureException(sprintf('Could not parse reference value "%s" at index %d in "%s"', $string, $i, $valueString)); + } + + return new self(... $referenceValues); + } +} diff --git a/includes/pdfparser/Document/Dictionary/DictionaryValue/TextString/TextStringValue.php b/includes/pdfparser/Document/Dictionary/DictionaryValue/TextString/TextStringValue.php new file mode 100644 index 0000000..a8b2a5f --- /dev/null +++ b/includes/pdfparser/Document/Dictionary/DictionaryValue/TextString/TextStringValue.php @@ -0,0 +1,57 @@ +textStringValue, '(') && str_ends_with($this->textStringValue, ')')) { + return preg_replace_callback( + '/\\\\([0-7]{3})/', + fn (array $matches) => mb_chr((int) octdec($matches[1])), + str_replace(['\(', '\)', '\n', '\r'], ['(', ')', "\n", "\r"], substr($this->textStringValue, 1, -1)) + ) ?? throw new ParseFailureException(); + } + + if (str_starts_with($this->textStringValue, '<') && str_ends_with($this->textStringValue, '>')) { + $string = substr($this->textStringValue, 1, -1); + if (str_starts_with($string, 'FEFF')) { + $string = substr($string, 4); + } + + return implode( + '', + array_map( + fn (string $character) => mb_chr((int) hexdec($character)), + str_split($string, 4) + ) + ); + } + + if (str_starts_with($this->textStringValue, '/')) { + return preg_replace_callback( + '/#([0-9A-F]{2})/', + fn (array $matches) => chr((int) hexdec($matches[1])), + $this->textStringValue, + ) ?? throw new ParseFailureException(); + } + + throw new ParseFailureException(sprintf('Unrecognized format %s', $this->textStringValue)); + } + + #[Override] + public static function fromValue(string $valueString): self { + return new self($valueString); + } +} diff --git a/includes/pdfparser/Document/Dictionary/Normalization/NameValueNormalizer.php b/includes/pdfparser/Document/Dictionary/Normalization/NameValueNormalizer.php new file mode 100644 index 0000000..2dfe7eb --- /dev/null +++ b/includes/pdfparser/Document/Dictionary/Normalization/NameValueNormalizer.php @@ -0,0 +1,25 @@ + */ + private readonly array $pages; + + /** @var array */ + private array $objectCache = []; + + public function __construct( + public readonly Stream $stream, + public readonly Version $version, + public readonly CrossReferenceSource $crossReferenceSource, + public ?StandardSecurity $security, + ) { + if ($this->getEncryptDictionary() !== null) { + throw new NotImplementedException('Encrypted documents are not supported yet'); + } + } + + /** @throws PdfParserException */ + public function getInformationDictionary(): ?InformationDictionary { + $infoReference = $this->crossReferenceSource->getReferenceForKey(DictionaryKey::INFO); + if ($infoReference === null) { + return null; + } + + return $this->getObject($infoReference->objectNumber, InformationDictionary::class); + } + + public function getEncryptDictionary(): ?EncryptDictionary { + $infoReference = $this->crossReferenceSource->getReferenceForKey(DictionaryKey::ENCRYPT); + if ($infoReference === null) { + return null; + } + + return $this->getObject($infoReference->objectNumber, EncryptDictionary::class); + } + + /** @throws PdfParserException */ + public function getCatalog(): Catalog { + $rootReference = $this->crossReferenceSource->getReferenceForKey(DictionaryKey::ROOT) + ?? throw new ParseFailureException('Unable to locate root for document.'); + $catalog = $this->getObject($rootReference->objectNumber, Catalog::class) + ?? throw new ParseFailureException(sprintf('Document references object %d as root, but object couln\'t be located', $rootReference->objectNumber)); + if (!$catalog instanceof Catalog) { + throw new RuntimeException('Catalog should be a catalog item'); + } + + return $catalog; + } + + /** + * @template T of DecoratedObject + * @param class-string|null $expectedDecoratorFQN + * @throws PdfParserException + * @return ($expectedDecoratorFQN is null ? list : list) + */ + public function getObjectsByDictionaryKey(Dictionary $dictionary, DictionaryKey $dictionaryKey, ?string $expectedDecoratorFQN = null): array { + $dictionaryValueType = $dictionary->getTypeForKey($dictionaryKey); + if ($dictionaryValueType === ReferenceValue::class) { + return [$this->getObject($dictionary->getValueForKey($dictionaryKey, ReferenceValue::class)->objectNumber ?? throw new ParseFailureException(), $expectedDecoratorFQN) ?? throw new ParseFailureException()]; + } elseif ($dictionaryValueType === ReferenceValueArray::class) { + return array_map( + fn (ReferenceValue $referenceValue) => $this->getObject($referenceValue->objectNumber, $expectedDecoratorFQN) ?? throw new ParseFailureException(), + $dictionary->getValueForKey($dictionaryKey, ReferenceValueArray::class)->referenceValues ?? throw new ParseFailureException(), + ); + } + + throw new ParseFailureException(sprintf('Dictionary value with key "%s" is of type "%s", expected referencevalue(array)', $dictionaryKey->name, $dictionaryValueType ?? 'null')); + } + + /** + * @template T of DecoratedObject + * @param class-string|null $expectedDecoratorFQN + * @throws PdfParserException + * @return ($expectedDecoratorFQN is null ? DecoratedObject : T) + */ + public function getObject(int $objectNumber, ?string $expectedDecoratorFQN = null): ?DecoratedObject { + if (array_key_exists($objectNumber, $this->objectCache)) { + return $this->objectCache[$objectNumber]; + } + + $crossReferenceEntry = $this->crossReferenceSource->getCrossReferenceEntry($objectNumber); + if ($crossReferenceEntry === null) { + return null; + } + + if ($crossReferenceEntry instanceof CrossReferenceEntryCompressed) { + $parentObject = $this->getObject($crossReferenceEntry->storedInStreamWithObjectNumber) + ?? throw new RuntimeException(sprintf('Parent object for %d with number %d doesn\'t exist', $objectNumber, $crossReferenceEntry->storedInStreamWithObjectNumber)); + + if (!$parentObject->objectItem instanceof UncompressedObject) { + throw new RuntimeException('Parents for stream items shouldn\'t be stream items themselves'); + } + + $objectItem = $parentObject->objectItem->getCompressedObject($objectNumber, $this); + } else { + $objectItem = UncompressedObjectParser::parseObject($crossReferenceEntry, $objectNumber, $this->stream); + } + + return $this->objectCache[$objectNumber] = DecoratedObjectFactory::forItem($objectItem, $this, $expectedDecoratorFQN); + } + + /** @throws PdfParserException */ + public function getPage(int $pageNumber): ?Page { + return $this->getPages()[$pageNumber - 1] ?? null; + } + + /** @throws PdfParserException */ + public function getNumberOfPages(): int { + return count($this->getPages()); + } + + /** + * @throws PdfParserException + * @return list + */ + public function getPages(): array { + return $this->pages ??= $this->getCatalog() + ->getPagesRoot() + ->getPageItems(); + } + + /** + * @param ?string $pageSeparator an optional string to put between text of different pages + * @throws PdfParserException + */ + public function getText(?string $pageSeparator = null): string { + $text = ''; + foreach ($this->getPages() as $page) { + $text .= ($pageSeparator !== null ? $pageSeparator : '') + . $page->getText(); + } + + return $text; + } + + /** + * @throws PdfParserException + * @return list + */ + public function getImages(): array { + $images = []; + foreach ($this->getPages() as $page) { + $images = [... $images, ...$page->getImages()]; + } + + return $images; + } +} diff --git a/includes/pdfparser/Document/Encoding/Encoding.php b/includes/pdfparser/Document/Encoding/Encoding.php new file mode 100644 index 0000000..42a25f9 --- /dev/null +++ b/includes/pdfparser/Document/Encoding/Encoding.php @@ -0,0 +1,8 @@ +')) { + $string = substr($string, 2, -2); + } + + $string = preg_replace('/\s+/', '', $string) + ?? throw new RuntimeException('An unexpected error occurred while sanitizing ASCII85 string'); + $length = strlen($string); + $decoded = $block = ''; + for ($i = 0; $i < $length; ++$i) { + $char = $string[$i]; + if ($char === 'z') { + $decoded .= "\0\0\0\0"; + continue; + } + + $block .= $char; + if (strlen($block) === 5) { + $value = 0; + for ($j = 0; $j < 5; ++$j) { + $value = $value * 85 + (ord($block[$j]) - 33); + } + + $decoded .= pack('N', $value); + $block = ''; + } + } + + if ($block !== '') { + $padding = 5 - strlen($block); + $block = str_pad($block, 5, 'u'); + $value = 0; + for ($i = 0; $i < 5; ++$i) { + $value = $value * 85 + (ord($block[$i]) - 33); + } + + $binaryData = pack('N', $value); + $decoded .= substr($binaryData, 0, 4 - $padding); + } + + return $decoded; + } +} diff --git a/includes/pdfparser/Document/Filter/Decode/CCITTFaxDecode.php b/includes/pdfparser/Document/Filter/Decode/CCITTFaxDecode.php new file mode 100644 index 0000000..c9df473 --- /dev/null +++ b/includes/pdfparser/Document/Filter/Decode/CCITTFaxDecode.php @@ -0,0 +1,52 @@ += 0 ? 3 : 4), + self::createIfdEntry(TiffTag::PhotometricInterpretation, 3, 1, 0), + self::createIfdEntry(TiffTag::RowsPerStrip, 3, 1, $rows), + self::createIfdEntry(TiffTag::StripByteCounts, 4, 1, strlen($rawData)), + ]; + + $ifdEntries[] = self::createIfdEntry(TiffTag::StripOffsets, 4, 1, 8 + 2 + (12 * (count($ifdEntries) + 1)) + 4); + + return self::BYTE_ORDER_LITTLE_ENDIAN + . pack("v", self::MAGIC_NUMBER_TIFF) + . pack("V", self::IFD_OFFSET_IN_BYTES) + . pack("v", count($ifdEntries)) + . implode('', $ifdEntries) + . pack("V", self::END_OF_IFD_OFFSET) + . $rawData; + } + + /** + * @param int<3,4> $type + * @param int<1, max> $count + */ + private static function createIfdEntry(TiffTag $tiffTag, int $type, int $count, int $value): string { + $entry = pack("v", $tiffTag->value) . pack("v", $type) . pack("V", $count); + + if ($type === 3 && $count === 1) { + return $entry . pack("v", $value) . "\x00\x00"; + } elseif ($type === 4 || ($type === 3 && $count > 1)) { + return $entry . pack("V", $value); + } else { + throw new ParseFailureException("Unsupported IFD entry type or count."); + } + } +} diff --git a/includes/pdfparser/Document/Filter/Decode/FlateDecode.php b/includes/pdfparser/Document/Filter/Decode/FlateDecode.php new file mode 100644 index 0000000..ff1ae06 --- /dev/null +++ b/includes/pdfparser/Document/Filter/Decode/FlateDecode.php @@ -0,0 +1,87 @@ +value); + } + + $hexTable = array_map(fn (string $row) => str_split($row, 2), str_split(bin2hex($decodedValue), ($columns + 1) * 2)); + $decodedValue = ''; + foreach ($hexTable as $rowIndex => $row) { + if (!is_array($row) || !array_is_list($row) || count($row) < 2) { + throw new RuntimeException(sprintf('Expected at least 2 items per row, got %d', count($row))); + } + + if (!is_int($algorithmNumber = hexdec($row[0]))) { + throw new ParseFailureException(sprintf('Expected algorithm number to be an integer, got %s', $algorithmNumber)); + } + + $rowAlgorithm = PNGPredictorAlgorithm::tryFrom($algorithmNumber) + ?? throw new ParseFailureException(sprintf('Unrecognized row algorithm %d', $algorithmNumber)); + if ($rowAlgorithm === PNGPredictorAlgorithm::None) { + $decodedValue .= implode('', array_slice($row, 1)); + + continue; + } + + if ($rowAlgorithm === PNGPredictorAlgorithm::Up) { + if ($rowIndex === 0) { + $decodedValue .= implode('', array_slice($row, 1)); + + continue; + } + + foreach ($row as $columnIndex => $columnValue) { + /** @phpstan-ignore offsetAccess.notFound, offsetAccess.notFound */ + $hexTable[$rowIndex][$columnIndex] = str_pad(dechex((hexdec($columnValue) + hexdec($hexTable[$rowIndex - 1][$columnIndex])) % 256), 2, '0', STR_PAD_LEFT); + } + + $decodedValue .= implode('', array_slice($hexTable[$rowIndex], 1)); + + continue; + } + } + + if (($decodedValue = hex2bin($decodedValue)) === false) { + throw new ParseFailureException('Unable to hex2bin value "' . substr(trim($value), 0, 30) . '..."'); + } + + return $decodedValue; + } +} diff --git a/includes/pdfparser/Document/Filter/Decode/LZWFlatePredictorValue.php b/includes/pdfparser/Document/Filter/Decode/LZWFlatePredictorValue.php new file mode 100644 index 0000000..b39da93 --- /dev/null +++ b/includes/pdfparser/Document/Filter/Decode/LZWFlatePredictorValue.php @@ -0,0 +1,25 @@ + $widths */ + public function __construct( + public readonly int $firstChar, + public readonly array $widths, + ) { + } + + public function getWidthForCharacter(int $characterCode): ?float { + return $this->widths[$characterCode - $this->firstChar] ?? null; + } +} diff --git a/includes/pdfparser/Document/Generic/Character/DelimiterCharacter.php b/includes/pdfparser/Document/Generic/Character/DelimiterCharacter.php new file mode 100644 index 0000000..76f4d67 --- /dev/null +++ b/includes/pdfparser/Document/Generic/Character/DelimiterCharacter.php @@ -0,0 +1,39 @@ +'; + case LEFT_SQUARE_BRACKET = '['; + case RIGHT_SQUARE_BRACKET = ']'; + case LEFT_CURLY_BRACKET = '{'; + case RIGHT_CURLY_BRACKET = '}'; + case SOLIDUS = '/'; + + /** + * Any occurrence of the PERCENT SIGN outside a string or stream introduces a comment. The comment + * consists of all characters after the PERCENT SIGN and up to but not including the end of the line, including + * regular, delimiter, SPACE (20h), and HORIZONTAL TAB characters (09h). A conforming reader shall ignore + * comments, and treat them as single white-space characters. That is, a comment separates the token preceding + * it from the one following it. + * + * Comments (other than the %PDF–n.m and %%EOF comments described in 7.5, "File Structure") have no + * semantics. They are not necessarily preserved by applications that edit PDF files + */ + case PERCENT_SIGN = '%'; +} diff --git a/includes/pdfparser/Document/Generic/Character/LiteralStringEscapeCharacter.php b/includes/pdfparser/Document/Generic/Character/LiteralStringEscapeCharacter.php new file mode 100644 index 0000000..ee0fe6f --- /dev/null +++ b/includes/pdfparser/Document/Generic/Character/LiteralStringEscapeCharacter.php @@ -0,0 +1,65 @@ + "\n", + self::CARRIAGE_RETURN => "\r", + self::HORIZONTAL_TAB => "\t", + self::BACKSPACE => "\x08", + self::FORM_FEED => "\x0C", + self::LEFT_PARENTHESIS => "(", + self::RIGHT_PARENTHESIS => ")", + self::REVERSE_SOLIDUS => "\\", + }; + } + + /** @return array{0: list, 1: list} */ + private static function getReplacementSet(): array { + $find = $replace = []; + foreach (self::cases() as $case) { + $find[] = $case->value; + $replace[] = $case->getActualCharacter(); + } + + return [$find, $replace]; + } + + public static function unescapeCharacters(string $string): string { + $string = str_replace("\\\n", '', $string); // Example 2, 7.3.4.2 newlines preceded by reverse solidus should be handled like single lines + + [$find, $replace] = LiteralStringEscapeCharacter::getReplacementSet(); + + return preg_replace_callback( + '/\\\\([0-7]{1,3})/', + static function (array $matches) { + $decimal = octdec($matches[1]); + if (!is_int($decimal) || $decimal < 0 || $decimal > 255) { + throw new ParseFailureException(sprintf('Invalid octal value "%s"', $matches[1])); + } + + return mb_chr($decimal); + }, + str_replace($find, $replace, $string) + ) ?? throw new ParseFailureException(); + } +} diff --git a/includes/pdfparser/Document/Generic/Character/WhitespaceCharacter.php b/includes/pdfparser/Document/Generic/Character/WhitespaceCharacter.php new file mode 100644 index 0000000..74b209c --- /dev/null +++ b/includes/pdfparser/Document/Generic/Character/WhitespaceCharacter.php @@ -0,0 +1,28 @@ +value); + } +} diff --git a/includes/pdfparser/Document/Generic/Parsing/InfiniteBuffer.php b/includes/pdfparser/Document/Generic/Parsing/InfiniteBuffer.php new file mode 100644 index 0000000..3220203 --- /dev/null +++ b/includes/pdfparser/Document/Generic/Parsing/InfiniteBuffer.php @@ -0,0 +1,48 @@ +buffer .= $char; + + return $this; + } + + public function flush(): self { + return $this->setValue(''); + } + + #[Override] + public function __toString(): string { + return $this->buffer; + } + + public function getLength(): int { + return strlen($this->buffer); + } + + public function isEmpty(): bool { + return $this->getLength() === 0; + } + + public function setValue(string $buffer): self { + $this->buffer = $buffer; + + return $this; + } + + public function removeChar(int $nChars): self { + if ($this->buffer !== '') { + $this->buffer = substr($this->buffer, 0, -$nChars); + } + + return $this; + } +} diff --git a/includes/pdfparser/Document/Generic/Parsing/RollingCharBuffer.php b/includes/pdfparser/Document/Generic/Parsing/RollingCharBuffer.php new file mode 100644 index 0000000..501434d --- /dev/null +++ b/includes/pdfparser/Document/Generic/Parsing/RollingCharBuffer.php @@ -0,0 +1,94 @@ + $length */ + private int $length; + + /** @var int<0, max> */ + private int $currentIndex = 0; + + /** + * Rolling buffer, where the modulo of the index is used. Fe: when writing 'a', 'b', 'c', 'd', 'e', 'f' to a buffer of length 3: + * ['a'] + * ['a', 'b'] + * ['a', 'b', 'c'] + * ['d', 'b', 'c'] + * ['d', 'e', 'c'] + * ['d', 'e', 'f'] + * + * @var array, string> + */ + private array $buffer = []; + + /** @phpstan-assert int<1, max> $length */ + public function __construct(int $length) { + if ($length < 1) { + throw new InvalidArgumentException(sprintf('A negative or zero buffer length doesn\'t make sense, %d provided', $length)); + } + + $this->length = $length; + } + + public function next(string $char): self { + $this->currentIndex++; + $this->buffer[$this->currentIndex % $this->length] = $char; + + return $this; + } + + /** @throws InvalidArgumentException */ + public function getPreviousCharacter(int $nAgo = 1): ?string { + if ($nAgo >= $this->length) { + throw new InvalidArgumentException('Buffer length of "' . $this->length . '" configured, but character "-' . $nAgo . '" requested'); + } + + return $this->buffer[($this->currentIndex - $nAgo) % $this->length] ?? null; + } + + /** + * @phpstan-assert non-empty-string $string + * + * @throws InvalidArgumentException + */ + public function seenString(string $string): bool { + $strlen = strlen($string); + if ($strlen === 0) { + throw new InvalidArgumentException('Cannot assert if non empty string has been encountered'); + } + + if ($strlen > $this->length) { + throw new InvalidArgumentException(sprintf('Buffer length of %d configured, but value with length %d requested', $this->length, strlen($string))); + } + + foreach (str_split($string) as $index => $char) { + $previousChar = $this->getPreviousCharacter($strlen - $index - 1); + if ($previousChar !== $char) { + return false; + } + } + + return true; + } + + /** @throws InvalidArgumentException */ + public function seenReverseString(string $string): bool { + if (strlen($string) > $this->length) { + throw new InvalidArgumentException(sprintf('Buffer length of %d configured, but enum with length %d requested', $this->length, strlen($string))); + } + + foreach (str_split($string) as $index => $char) { + $previousChar = $this->getPreviousCharacter($index); + if ($previousChar !== $char) { + return false; + } + } + + return true; + } +} diff --git a/includes/pdfparser/Document/Image/ColorSpace/ColorSpace.php b/includes/pdfparser/Document/Image/ColorSpace/ColorSpace.php new file mode 100644 index 0000000..93253c4 --- /dev/null +++ b/includes/pdfparser/Document/Image/ColorSpace/ColorSpace.php @@ -0,0 +1,46 @@ +components)) { + return $this->components; + } + + if ($this->nameValue instanceof DeviceColorSpaceNameValue) { + return $this->components = $this->nameValue->getComponents(); + } + + if ($this->LUTObj?->getDictionary()->getTypeForKey(DictionaryKey::N) !== null) { + return $this->components = Components::tryFrom( + $this->LUTObj + ->getDictionary() + ->getValueForKey(DictionaryKey::N, IntegerValue::class) + ->value ?? throw new RuntimeException('Unable to determine number of components for color space') + ) ?? throw new ParseFailureException('Unable to determine number of components for color space'); + } + + return $this->components = Components::Gray; + } +} diff --git a/includes/pdfparser/Document/Image/ColorSpace/ColorSpaceFactory.php b/includes/pdfparser/Document/Image/ColorSpace/ColorSpaceFactory.php new file mode 100644 index 0000000..4b38eb8 --- /dev/null +++ b/includes/pdfparser/Document/Image/ColorSpace/ColorSpaceFactory.php @@ -0,0 +1,43 @@ +\/Indexed)?\s*(?\/[A-Za-z]+|([0-9]+\s+[0-9]+\s+R))(?(\s+(?[0-9]+))?\s+(?<[A-Fa-f0-9]*>|((?[0-9]+)\s+[0-9]+\s+R)))?\s*]\s*$/', $string, $matches) !== 1) { + throw new ParseFailureException(sprintf('Invalid color space string "%s"', $string)); + } + + if (preg_match('/^(?[0-9]+)\s+[0-9]+\s+R$/', $matches['name'], $nameObjectMatches) === 1) { + $colorSpaceObject = $document->getObject((int) $nameObjectMatches['objectNr']) + ?? throw new ParseFailureException(sprintf('Unable to locate object with number %d', (int) $nameObjectMatches['objectNr'])); + if (preg_match('/^\s*\[\s*\/(?[A-Za-z]+)\s+(?[0-9]+)\s+[0-9]+\s+R\s*]\s*$/', $colorSpaceObject->getStream()->toString(), $colorSpaceObjectMatches) !== 1) { + throw new ParseFailureException(sprintf('Invalid color space string "%s" in colorSpaceObject', $colorSpaceObject->getStream()->toString())); + } + + $colorSpaceName = DeviceColorSpaceNameValue::tryFrom($colorSpaceObjectMatches['name']) + ?? SpecialColorSpaceNameValue::tryFrom($colorSpaceObjectMatches['name']) + ?? CIEColorSpaceNameValue::tryFrom($colorSpaceObjectMatches['name']) + ?? throw new ParseFailureException(sprintf('Unsupported color space name "%s"', $colorSpaceObjectMatches['name'])); + } else { + $colorSpaceName = DeviceColorSpaceNameValue::tryFrom($nameString = substr($matches['name'], 1)) + ?? SpecialColorSpaceNameValue::tryFrom($nameString) + ?? CIEColorSpaceNameValue::tryFrom($nameString) + ?? throw new ParseFailureException(sprintf('Unsupported color space name "%s"', $nameString)); + } + + return new ColorSpace( + $matches['indexed'] !== '' && SpecialColorSpaceNameValue::tryFrom(substr($matches['indexed'], 1)) === SpecialColorSpaceNameValue::Indexed, + $colorSpaceName, + array_key_exists('lut_obj_nr', $matches) ? $document->getObject((int) $matches['lut_obj_nr']) : null, + $matches['lut_value'] !== '' && preg_match('/^(?[0-9]+)\s+[0-9]+\s+R$/', $matches['lut_value']) === 0 ? $matches['lut_value'] : null, + $matches['lut_count'] !== '' ? (int) $matches['lut_count'] : null, + ); + } +} diff --git a/includes/pdfparser/Document/Image/ColorSpace/Components.php b/includes/pdfparser/Document/Image/ColorSpace/Components.php new file mode 100644 index 0000000..92c421c --- /dev/null +++ b/includes/pdfparser/Document/Image/ColorSpace/Components.php @@ -0,0 +1,9 @@ + 'jpg', + self::JPEG2000 => 'jp2', + self::PNG => 'png', + self::TIFF, + self::TIFF_FAX => 'tiff', + self::CUSTOM, + self::RAW => 'raw', + self::JBIG2 => 'jbig2', + }; + } +} diff --git a/includes/pdfparser/Document/Image/RasterizedImage.php b/includes/pdfparser/Document/Image/RasterizedImage.php new file mode 100644 index 0000000..00c23fb --- /dev/null +++ b/includes/pdfparser/Document/Image/RasterizedImage.php @@ -0,0 +1,110 @@ + $width + * @param int<1, max> $height + * @throws ParseFailureException + */ + public static function toPNG(ColorSpace $colorSpace, int $width, int $height, int $bitsPerComponent, Stream $content): Stream { + $image = imagecreatetruecolor($width, $height); + if ($image === false) { + throw new ParseFailureException('Unable to create image'); + } + + if ($bitsPerComponent === 1) { + $streamLength = $content->getSizeInBytes(); + if ($streamLength < ceil($width * $height * $bitsPerComponent / 8)) { + throw new ParseFailureException('Stream content is smaller than expected'); + } + + $byteIndex = $bitsRemaining = 0; + $currentByte = null; + for ($y = 0; $y < $height; $y++) { + for ($x = 0; $x < $width; $x++) { + if ($bitsRemaining === 0) { + $currentByte = ord($content->read($byteIndex, 1)); + $bitsRemaining = 8; + $byteIndex++; + } + + $bitPosition = --$bitsRemaining; + $bit = ($currentByte >> $bitPosition) & 1; + if (($color = $bit === 0 ? imagecolorallocate($image, 0, 0, 0) : imagecolorallocate($image, 255, 255, 255)) === false) { + throw new ParseFailureException('Unable to allocate color'); + } + + imagesetpixel($image, $x, $y, $color); + } + + $endOfRowBits = $width % 8; + if ($endOfRowBits !== 0) { + $bitsRemaining = max(0, $bitsRemaining - (8 - $endOfRowBits)); + } + } + } elseif ($bitsPerComponent === 8) { + $pixelIndex = 0; + for ($y = 0; $y < $height; $y++) { + for ($x = 0; $x < $width; $x++) { + if ($colorSpace->isIndexed && $colorSpace->LUTObj !== null) { + $indexInLUT = ord($content->read($pixelIndex, 1)); + if ($indexInLUT > $colorSpace->maxIndexLUT) { + throw new ParseFailureException('Index in LUT is too large'); + } + + $color = match ($colorSpace->getComponents()) { + Components::RGB => imagecolorallocate($image, ord($colorSpace->LUTObj->getStream()->read($indexInLUT, 1)), ord($colorSpace->LUTObj->getStream()->read($indexInLUT + 1, 1)), ord($colorSpace->LUTObj->getStream()->read($indexInLUT + 2, 1))), + Components::Gray => imagecolorallocate($image, $value = ord($colorSpace->LUTObj->getStream()->read($indexInLUT, 1)), $value, $value), + Components::CMYK => imagecolorallocate( + $image, + min(255, max(0, (int)(255 * (1 - (ord($colorSpace->LUTObj->getStream()->read($indexInLUT, 1)) / 255)) * (1 - (ord($colorSpace->LUTObj->getStream()->read($indexInLUT + 3, 1)) / 255))))), + min(255, max(0, (int)(255 * (1 - (ord($colorSpace->LUTObj->getStream()->read($indexInLUT + 1, 1)) / 255)) * (1 - (ord($colorSpace->LUTObj->getStream()->read($indexInLUT + 3, 1)) / 255))))), + min(255, max(0, (int)(255 * (1 - (ord($colorSpace->LUTObj->getStream()->read($indexInLUT + 2, 1)) / 255)) * (1 - (ord($colorSpace->LUTObj->getStream()->read($indexInLUT + 3, 1)) / 255))))), + ), + }; + $pixelIndex++; + } else { + $color = match ($colorSpace->getComponents()) { + Components::RGB => imagecolorallocate($image, ord($content->read($pixelIndex, 1)), ord($content->read($pixelIndex + 1, 1)), ord($content->read($pixelIndex + 2, 1))), + Components::Gray => imagecolorallocate($image, $value = ord($content->read($pixelIndex, 1)), $value, $value), + Components::CMYK => imagecolorallocate( + $image, + min(255, max(0, (int)(255 * (1 - (ord($content->read($pixelIndex, 1)) / 255)) * (1 - (ord($content->read($pixelIndex + 3, 1)) / 255))))), + min(255, max(0, (int)(255 * (1 - (ord($content->read($pixelIndex + 1, 1)) / 255)) * (1 - (ord($content->read($pixelIndex + 3, 1)) / 255))))), + min(255, max(0, (int)(255 * (1 - (ord($content->read($pixelIndex + 2, 1)) / 255)) * (1 - (ord($content->read($pixelIndex + 3, 1)) / 255))))), + ), + }; + $pixelIndex += $colorSpace->getComponents()->value; + } + + if ($color === false) { + throw new ParseFailureException('Unable to allocate color'); + } + + imagesetpixel($image, $x, $y, $color); + } + } + } else { + throw new ParseFailureException(sprintf('Unsupported BitsPerComponent %d', $bitsPerComponent)); + } + + ob_start(); + imagepng($image); + $imageContent = ob_get_clean(); + if ($imageContent === false) { + throw new ParseFailureException('Unable to decode image'); + } + + return FileStream::fromString($imageContent); + } +} diff --git a/includes/pdfparser/Document/Object/Decorator/Catalog.php b/includes/pdfparser/Document/Object/Decorator/Catalog.php new file mode 100644 index 0000000..f83577c --- /dev/null +++ b/includes/pdfparser/Document/Object/Decorator/Catalog.php @@ -0,0 +1,49 @@ +getDictionary()->getValueForKey(DictionaryKey::PAGES, ReferenceValue::class) + ?? throw new ParseFailureException('Every catalog dictionary should contain a pages reference, none found'); + + return $this->document->getObject($pagesReference->objectNumber, Pages::class) + ?? throw new ParseFailureException(sprintf('Unable to retrieve pages root object with number %d', $pagesReference->objectNumber)); + } + + /** @return list */ + public function getFileSpecifications(): array { + $afType = $this->getDictionary()->getTypeForKey(DictionaryKey::AF); + if ($afType === null) { + return []; + } + + if ($afType === ReferenceValue::class) { + $referenceArrayContent = $this->getDictionary() + ->getObjectForReference($this->document, DictionaryKey::AF, FileSpecification::class) + ?->getStream()->toString() ?? throw new ParseFailureException('Unable to retrieve AF object content'); + if (($AFReferences = ReferenceValueArray::fromValue($referenceArrayContent)) instanceof ReferenceValueArray === false) { + throw new ParseFailureException('AF object is not a reference array'); + } + + return array_map( + fn (ReferenceValue $referenceValue) => $this->document->getObject($referenceValue->objectNumber, FileSpecification::class) ?? throw new ParseFailureException('Unable to retrieve file specification'), + $AFReferences->referenceValues, + ); + } + + if ($afType === ReferenceValueArray::class) { + return $this->getDictionary() + ->getObjectsForReference($this->document, DictionaryKey::AF, FileSpecification::class); + } + + throw new ParseFailureException(sprintf('Unexpected type "%s" for AF key', $afType)); + } +} diff --git a/includes/pdfparser/Document/Object/Decorator/DecoratedObject.php b/includes/pdfparser/Document/Object/Decorator/DecoratedObject.php new file mode 100644 index 0000000..4e04f0a --- /dev/null +++ b/includes/pdfparser/Document/Object/Decorator/DecoratedObject.php @@ -0,0 +1,40 @@ +objectItem->getDictionary($document)->getType(); + if ($typeNameValue !== null && !in_array($typeNameValue->getDecoratorFQN(), [static::class, GenericObject::class], true)) { + throw new InvalidArgumentException( + sprintf('Object should have decorator %s, got %s', $typeNameValue->getDecoratorFQN(), static::class) + ); + } + } + + /** @throws PdfParserException */ + public function getDictionary(): Dictionary { + return $this->objectItem->getDictionary($this->document); + } + + public function getStream(): Stream { + return $this->objectItem->getContent($this->document); + } + + #[Deprecated('Use self::getStream() instead')] + public function getContent(): string { + return $this->getStream()->toString(); + } +} diff --git a/includes/pdfparser/Document/Object/Decorator/DecoratedObjectFactory.php b/includes/pdfparser/Document/Object/Decorator/DecoratedObjectFactory.php new file mode 100644 index 0000000..7990efc --- /dev/null +++ b/includes/pdfparser/Document/Object/Decorator/DecoratedObjectFactory.php @@ -0,0 +1,33 @@ +|null $expectedDecoratorFQN + * @throws PdfParserException + * @return ($expectedDecoratorFQN is null ? DecoratedObject : T) + */ + public static function forItem(?ObjectItem $objectItem, Document $document, ?string $expectedDecoratorFQN): ?DecoratedObject { + if ($objectItem === null) { + return null; + } + + $typeNameValue = $objectItem->getDictionary($document)->getType(); + if ($expectedDecoratorFQN !== null && $typeNameValue !== null && $expectedDecoratorFQN !== $typeNameValue->getDecoratorFQN()) { + throw new ParseFailureException(sprintf('Expected object of type %s, got %s', $expectedDecoratorFQN, $typeNameValue->getDecoratorFQN())); + } + + $decoratorFQN = $expectedDecoratorFQN + ?? $typeNameValue?->getDecoratorFQN() + ?? GenericObject::class; + + return new $decoratorFQN($objectItem, $document); + } +} diff --git a/includes/pdfparser/Document/Object/Decorator/EmbeddedFile.php b/includes/pdfparser/Document/Object/Decorator/EmbeddedFile.php new file mode 100644 index 0000000..8c976a7 --- /dev/null +++ b/includes/pdfparser/Document/Object/Decorator/EmbeddedFile.php @@ -0,0 +1,48 @@ +getDictionary() + ->getValueForKey(DictionaryKey::LENGTH, IntegerValue::class) + ?->value; + } + + public function getFileSpecificInformation(): ?Dictionary { + return $this->getDictionary() + ->getSubDictionary($this->document, DictionaryKey::PARAMS); + } + + public function getSubType(): ?string { + return $this->getDictionary() + ->getValueForKey(DictionaryKey::SUBTYPE, TextStringValue::class) + ?->getText(); + } + + public function getSize(): ?int { + return $this->getFileSpecificInformation() + ?->getValueForKey(DictionaryKey::SIZE, IntegerValue::class) + ?->value; + } + + public function getCreationDate(): ?DateTimeImmutable { + return $this->getFileSpecificInformation() + ?->getValueForKey(DictionaryKey::CREATION_DATE, DateValue::class) + ?->value; + } + + public function getModificationDate(): ?DateTimeImmutable { + return $this->getFileSpecificInformation() + ?->getValueForKey(DictionaryKey::MOD_DATE, DateValue::class) + ?->value; + } +} diff --git a/includes/pdfparser/Document/Object/Decorator/EncryptDictionary.php b/includes/pdfparser/Document/Object/Decorator/EncryptDictionary.php new file mode 100644 index 0000000..ea03cfa --- /dev/null +++ b/includes/pdfparser/Document/Object/Decorator/EncryptDictionary.php @@ -0,0 +1,109 @@ +getDictionary()->getTypeForKey(DictionaryKey::FILTER); + if ($filterType === null) { + return null; + } + + if ($filterType !== SecurityHandlerNameValue::class) { + throw new RuntimeException('Unable to retrieve security handler for non-security handler dictionaries'); + } + + return $this->getDictionary()->getValueForKey(DictionaryKey::FILTER, SecurityHandlerNameValue::class); + } + + public function getLengthFileEncryptionKeyInBits(): ?int { + return $this->getDictionary() + ->getValueForKey(DictionaryKey::LENGTH, IntegerValue::class) + ?->value; + } + + public function getOwnerPasswordEntry(): string { + $textStringValue = $this->getDictionary() + ->getValueForKey(DictionaryKey::O, TextStringValue::class) + ->textStringValue + ?? throw new ParseFailureException(); + + if (str_starts_with($textStringValue, '<') && str_ends_with($textStringValue, '>')) { + $decodedValue = hex2bin(substr($textStringValue, 1, -1)); + if ($decodedValue === false) { + throw new ParseFailureException('Unable to decode owner password entry'); + } + } elseif (str_starts_with($textStringValue, '(') && str_ends_with($textStringValue, ')')) { + $decodedValue = substr($textStringValue, 1, -1); + } else { + throw new ParseFailureException(); + } + + $decodedValue = str_pad($decodedValue, 32, "\x00"); + if ($this->getStandardSecurityHandlerRevision()->value <= 4) { + return substr($decodedValue, 0, 32); + } + + return $decodedValue; + } + + public function getUserPasswordEntry(): string { + $textStringValue = $this->getDictionary() + ->getValueForKey(DictionaryKey::U, TextStringValue::class) + ->textStringValue + ?? throw new ParseFailureException(); + + if (str_starts_with($textStringValue, '<') && str_ends_with($textStringValue, '>')) { + $decodedValue = hex2bin(substr($textStringValue, 1, -1)); + if ($decodedValue === false) { + throw new ParseFailureException('Unable to decode user password entry'); + } + } elseif (str_starts_with($textStringValue, '(') && str_ends_with($textStringValue, ')')) { + $decodedValue = substr($textStringValue, 1, -1); + } else { + throw new ParseFailureException(); + } + + $expectedLength = $this->getStandardSecurityHandlerRevision() === StandardSecurityHandlerRevision::v2 ? 32 : 64; + return str_pad( + substr($decodedValue, 0, $expectedLength), + $expectedLength, + "\x00" + ); + } + + public function getPValue(): int { + return $this->getDictionary() + ->getValueForKey(DictionaryKey::P, IntegerValue::class) + ->value + ?? throw new ParseFailureException('Unable to retrieve p value'); + } + + public function getSecurityAlgorithm(): ?SecurityAlgorithm { + return $this->getDictionary() + ->getValueForKey(DictionaryKey::V, SecurityAlgorithm::class); + } + + public function getStandardSecurityHandlerRevision(): StandardSecurityHandlerRevision { + return $this->getDictionary() + ->getValueForKey(DictionaryKey::R, StandardSecurityHandlerRevision::class) + ?? throw new ParseFailureException('Unable to retrieve standard security handler revision'); + } + + public function isMetadataEncrypted(): bool { + $encryptMetadata = $this->getDictionary() + ->getValueForKey(DictionaryKey::ENCRYPT_METADATA, BooleanValue::class); + + return $encryptMetadata === null || $encryptMetadata->value; // If key is not present, assume encrypted metadata + } +} diff --git a/includes/pdfparser/Document/Object/Decorator/FileSpecification.php b/includes/pdfparser/Document/Object/Decorator/FileSpecification.php new file mode 100644 index 0000000..670214f --- /dev/null +++ b/includes/pdfparser/Document/Object/Decorator/FileSpecification.php @@ -0,0 +1,39 @@ +getDictionary()->getTypeForKey(DictionaryKey::UF); + if ($ufType === TextStringValue::class) { + return $this->getDictionary() + ->getValueForKey(DictionaryKey::UF, $ufType) + ?->getText() ?? throw new ParseFailureException(); + } + + $fType = $this->getDictionary()->getTypeForKey(DictionaryKey::F); + if ($fType === TextStringValue::class) { + return $this->getDictionary() + ->getValueForKey(DictionaryKey::F, $fType) + ?->getText() ?? throw new ParseFailureException(); + } + + return null; + } + + public function getEmbeddedFileStreamDictionary(): ?Dictionary { + return $this->getDictionary() + ->getSubDictionary($this->document, DictionaryKey::EF); + } + + public function getEmbeddedFile(): ?EmbeddedFile { + return $this->getEmbeddedFileStreamDictionary() + ?->getObjectForReference($this->document, DictionaryKey::F, EmbeddedFile::class); + } +} diff --git a/includes/pdfparser/Document/Object/Decorator/Font.php b/includes/pdfparser/Document/Object/Decorator/Font.php new file mode 100644 index 0000000..3605608 --- /dev/null +++ b/includes/pdfparser/Document/Object/Decorator/Font.php @@ -0,0 +1,275 @@ +getDictionary() + ->getValueForKey(DictionaryKey::BASE_FONT, TextStringValue::class) + ?->textStringValue; + } + + public function getEncodingDictionary(): ?Dictionary { + if (in_array($this->getDictionary()->getTypeForKey(DictionaryKey::ENCODING), [null, EncodingNameValue::class], true)) { + return null; + } + + return $this->getDictionary() + ->getSubDictionary($this->document, DictionaryKey::ENCODING); + } + + /** @throws PdfParserException */ + public function getEncoding(): ?EncodingNameValue { + $encodingType = $this->getDictionary()->getTypeForKey(DictionaryKey::ENCODING); + if ($encodingType === null) { + return null; + } + + if ($encodingType === EncodingNameValue::class) { + return $this->getDictionary()->getValueForKey(DictionaryKey::ENCODING, EncodingNameValue::class); + } + + return $this->getEncodingDictionary() + ?->getValueForKey(DictionaryKey::BASE_ENCODING, EncodingNameValue::class); + } + + public function getDifferences(): ?DifferencesArrayValue { + return $this->getEncodingDictionary() + ?->getValueForKey(DictionaryKey::DIFFERENCES, DifferencesArrayValue::class); + } + + /** @throws PdfParserException */ + public function getToUnicodeCMap(): ?ToUnicodeCMap { + if (isset($this->toUnicodeCMap)) { + if ($this->toUnicodeCMap === false) { + return null; + } + + return $this->toUnicodeCMap; + } + + $toUnicodeObject = $this->getDictionary() + ->getObjectForReference($this->document, DictionaryKey::TO_UNICODE); + if ($toUnicodeObject === null) { + $this->toUnicodeCMap = false; + + return null; + } + + if ($toUnicodeObject->objectItem instanceof UncompressedObject === false) { + throw new ParseFailureException(); + } + + $stream = $toUnicodeObject->objectItem->getContent($this->document); + return $this->toUnicodeCMap = ToUnicodeCMapParser::parse($stream, 0, $stream->getSizeInBytes()); + } + + public function getToUnicodeCMapDescendantFont(): ?ToUnicodeCMap { + foreach ($this->getDescendantFonts() as $descendantFont) { + $fontDictionary = $descendantFont instanceof Dictionary ? $descendantFont : $descendantFont->getDictionary(); + + if (($CIDSystemInfo = $fontDictionary->getValueForKey(DictionaryKey::CIDSYSTEM_INFO, Dictionary::class)) !== null) { + $fontResource = RegistryOrchestrator::getForRegistryOrderingSupplement( + $CIDSystemInfo->getValueForKey(DictionaryKey::REGISTRY, TextStringValue::class) ?? throw new ParseFailureException(), + $CIDSystemInfo->getValueForKey(DictionaryKey::ORDERING, TextStringValue::class) ?? throw new ParseFailureException(), + $CIDSystemInfo->getValueForKey(DictionaryKey::SUPPLEMENT, IntegerValue::class) ?? throw new ParseFailureException(), + ); + + if ($fontResource !== null) { + return $fontResource->getToUnicodeCMap(); + } + } + } + + return null; + } + + /** @throws PdfParserException */ + public function getFirstChar(): ?int { + return $this->getDictionary() + ->getValueForKey(DictionaryKey::FIRST_CHAR, IntegerValue::class) + ?->value; + } + + /** @throws PdfParserException */ + public function getLastChar(): ?int { + return $this->getDictionary() + ->getValueForKey(DictionaryKey::LAST_CHAR, IntegerValue::class) + ?->value; + } + + public function getWidthForChar(int $characterCode, TextState $textState, TransformationMatrix $transformationMatrix): float { + $fontWidths = $this->getWidths(); + if ($fontWidths !== null && ($charWidth = $fontWidths->getWidthForCharacter($characterCode)) !== null) { + $characterWidth = $charWidth; + } else { + $characterWidth = $this->getDefaultWidth(); + } + + return ($characterWidth * ($textState->fontSize ?? 10) + $textState->charSpace) * $transformationMatrix->scaleX; + } + + /** @param list $chars */ + public function getWidthForChars(array $chars, TextState $textState, TransformationMatrix $transformationMatrix): float { + $totalCharacterWidth = 0; + foreach ($chars as $char) { + $totalCharacterWidth += $this->getWidthForChar($char, $textState, $transformationMatrix); + } + + return $totalCharacterWidth; + } + + /** @return list */ + public function getDescendantFonts(): array { + $valueType = $this->getDictionary()->getTypeForKey(DictionaryKey::DESCENDANT_FONTS); + if ($valueType === null) { + return []; + } + + if ($valueType === ReferenceValue::class) { + $descendantFontsReference = $this->getDictionary()->getValueForKey(DictionaryKey::DESCENDANT_FONTS, ReferenceValue::class) ?? throw new ParseFailureException(); + return [ + $this->document->getObject($descendantFontsReference->objectNumber, Font::class) + ?? throw new ParseFailureException(sprintf('Descendant font with number %d could not be found', $descendantFontsReference->objectNumber)), + ]; + } + + if ($valueType === DictionaryArrayValue::class) { + return $this->getDictionary()->getValueForKey(DictionaryKey::DESCENDANT_FONTS, DictionaryArrayValue::class)->dictionaries ?? throw new ParseFailureException(); + } + + $descendantFonts = []; + foreach ($this->getDictionary()->getValueForKey(DictionaryKey::DESCENDANT_FONTS, ReferenceValueArray::class)->referenceValues ?? [] as $referenceValue) { + $descendantFonts[] = $this->document->getObject($referenceValue->objectNumber, Font::class) + ?? throw new ParseFailureException(sprintf('Descendant font with number %d could not be found', $referenceValue->objectNumber)); + } + + return $descendantFonts; + } + + public function isCIDFont(): bool { + return in_array( + $this->getDictionary()->getValueForKey(DictionaryKey::SUBTYPE, SubtypeNameValue::class), + [SubtypeNameValue::CID_FONT_TYPE_0, SubtypeNameValue::CID_FONT_TYPE_2, SubtypeNameValue::CID_FONT_TYPE_0_C], + true, + ); + } + + public function getDefaultWidth(): float { + if ($this->isCIDFont()) { + return ($this->getDictionary()->getValueForKey(DictionaryKey::DW, IntegerValue::class)->value + ?? 1000) / 1000; + } + + foreach ($this->getDescendantFonts() as $descendantFont) { + if ($descendantFont instanceof Dictionary && $descendantFont->getTypeForKey(DictionaryKey::W) === ReferenceValue::class) { + $descendantFont = $this->document->getObject($descendantFont->getValueForKey(DictionaryKey::W, ReferenceValue::class)->objectNumber ?? throw new ParseFailureException(), Font::class) ?? throw new ParseFailureException(); + } + + if ($descendantFont instanceof Font) { + return $descendantFont->getDefaultWidth(); + } + } + + return 1000; + } + + /** @throws PdfParserException */ + public function getWidths(): CIDFontWidths|FontWidths|null { + if (isset($this->widths)) { + if ($this->widths === false) { + return null; + } + + return $this->widths; + } + + if ($this->isCIDFont()) { + if ($this->getDictionary()->getTypeForKey(DictionaryKey::W) === CrossReferenceStreamByteSizes::class) { + $byteSizes = $this->getDictionary()->getValueForKey(DictionaryKey::W, CrossReferenceStreamByteSizes::class) ?? throw new ParseFailureException(); // TODO: fix misinterpretation + + return $this->widths = new CIDFontWidths(new RangeCIDWidth($byteSizes->lengthRecord1InBytes, $byteSizes->lengthRecord2InBytes, $byteSizes->lengthRecord3InBytes)); + } + + $this->widths = $this->getDictionary()->getValueForKey(DictionaryKey::W, CIDFontWidths::class) ?? false; + return $this->widths === false ? null : $this->widths; + } + + foreach ($this->getDescendantFonts() as $descendantFont) { + if ($descendantFont instanceof Dictionary && $descendantFont->getTypeForKey(DictionaryKey::W) === ReferenceValue::class) { + $descendantFont = $this->document->getObject($descendantFont->getValueForKey(DictionaryKey::W, ReferenceValue::class)->objectNumber ?? throw new ParseFailureException(), Font::class) ?? throw new ParseFailureException(); + } + + if ($descendantFont instanceof Font && ($widthsDescendantFont = $descendantFont->getWidths()) !== null) { + return $this->widths = $widthsDescendantFont; + } + } + + if ($this->getDictionary()->getTypeForKey(DictionaryKey::WIDTHS) === ReferenceValue::class) { + $object = $this->document->getObject(($widthsReference = $this->getDictionary()->getValueForKey(DictionaryKey::WIDTHS, ReferenceValue::class))->objectNumber ?? throw new ParseFailureException(), Font::class) + ?? throw new ParseFailureException(sprintf('Width dictionary with number %d could not be found', $widthsReference->objectNumber)); + $arrayValue = ArrayValue::fromValue($object->getStream()->toString()); + if ($arrayValue instanceof ArrayValue === false) { + throw new ParseFailureException(sprintf('Width dictionary with number %d does not contain a valid array, "%s"', $widthsReference->objectNumber, $object->getStream()->read(0, 100) . '...')); + } + + $widthsArray = $arrayValue->value; + } elseif (($widthsArray = $this->getDictionary()->getValueForKey(DictionaryKey::WIDTHS, ArrayValue::class)?->value) === null) { + $this->widths = false; + return null; + } + + if (($firstChar = $this->getFirstChar()) === null) { + $this->widths = false; + return null; + } + + return $this->widths = new FontWidths( + $firstChar, + array_values( + array_map( + fn (mixed $width): float => is_numeric($width) ? (float) $width : throw new InvalidArgumentException(sprintf('"%s" is not a valid width', ($jsonEncoded = json_encode($width)) !== false ? $jsonEncoded : 'value')), + array_filter( + $widthsArray, + fn (mixed $item) => $item !== '', + ), + ), + ), + ); + } + + /** @throws PdfParserException */ + public function getFontDescriptor(): ?ReferenceValue { + return $this->getDictionary() + ->getValueForKey(DictionaryKey::FONT_DESCRIPTOR, ReferenceValue::class); + } +} diff --git a/includes/pdfparser/Document/Object/Decorator/GenericObject.php b/includes/pdfparser/Document/Object/Decorator/GenericObject.php new file mode 100644 index 0000000..1e2e5eb --- /dev/null +++ b/includes/pdfparser/Document/Object/Decorator/GenericObject.php @@ -0,0 +1,6 @@ +getDictionary() + ->getValueForKey(DictionaryKey::TITLE, TextStringValue::class) + ?->getText(); + } + + /** @throws PdfParserException */ + public function getProducer(): ?string { + return $this->getDictionary() + ->getValueForKey(DictionaryKey::PRODUCER, TextStringValue::class) + ?->getText(); + } + + /** @throws PdfParserException */ + public function getAuthor(): ?string { + return $this->getDictionary() + ->getValueForKey(DictionaryKey::AUTHOR, TextStringValue::class) + ?->getText(); + } + + /** @throws PdfParserException */ + public function getCreator(): ?string { + return $this->getDictionary() + ->getValueForKey(DictionaryKey::CREATOR, TextStringValue::class) + ?->getText(); + } + + /** @throws PdfParserException */ + public function getCreationDate(): ?DateTimeImmutable { + return $this->getDictionary() + ->getValueForKey(DictionaryKey::CREATION_DATE, DateValue::class) + ?->value; + } + + /** @throws PdfParserException */ + public function getModificationDate(): ?DateTimeImmutable { + return $this->getDictionary() + ->getValueForKey(DictionaryKey::MOD_DATE, DateValue::class) + ?->value; + } +} diff --git a/includes/pdfparser/Document/Object/Decorator/Page.php b/includes/pdfparser/Document/Object/Decorator/Page.php new file mode 100644 index 0000000..f23a1b9 --- /dev/null +++ b/includes/pdfparser/Document/Object/Decorator/Page.php @@ -0,0 +1,102 @@ + + */ + public function getPositionedTextElements(): array { + return $this->getContentStream() + ->getPositionedTextElements(); + } + + /** @throws PdfParserException */ + public function getText(): string { + return $this->getContentStream() + ->getText($this->document, $this); + } + + /** @throws PdfParserException */ + public function getContentStream(): ContentStream { + return ContentStreamParser::parse( + $this->document->getObjectsByDictionaryKey($this->getDictionary(), DictionaryKey::CONTENTS), + ); + } + + /** @throws PdfParserException */ + public function getResourceDictionary(): ?Dictionary { + return $this->getDictionary() + ->getSubDictionary($this->document, DictionaryKey::RESOURCES); + } + + /** @throws PdfParserException */ + public function getXObjectsDictionary(): ?Dictionary { + return $this->getResourceDictionary() + ?->getSubDictionary($this->document, DictionaryKey::XOBJECT); + } + + /** + * @throws PdfParserException + * @return list + */ + public function getXObjects(): array { + $xObjects = []; + foreach ($this->getXObjectsDictionary()->dictionaryEntries ?? [] as $xObjectDictionaryEntry) { + if (!$xObjectDictionaryEntry->value instanceof ReferenceValue) { + throw new InvalidArgumentException(sprintf('XObjects should be references, got %s', get_class($xObjectDictionaryEntry->value))); + } + + $xObjects[] = $this->document->getObject($xObjectDictionaryEntry->value->objectNumber, XObject::class) + ?? throw new ParseFailureException(sprintf('Unable to locate object with nr %d', $xObjectDictionaryEntry->value->objectNumber)); + } + + return $xObjects; + } + + /** + * @throws PdfParserException + * @return list + */ + public function getImages(): array { + return array_values(array_filter( + $this->getXObjects(), + fn (XObject $XObject) => $XObject->isImage(), + )); + } + + /** @throws PdfParserException */ + public function getFontDictionary(): ?Dictionary { + if (($pageFontDictionary = $this->getDictionary()->getSubDictionary($this->document, DictionaryKey::FONT)) !== null) { + return $pageFontDictionary; + } + + if (($pageResourceFontDictionary = $this->getResourceDictionary()?->getSubDictionary($this->document, DictionaryKey::FONT)) !== null) { + return $pageResourceFontDictionary; + } + + if (($pagesParent = $this->getDictionary()->getObjectForReference($this->document, DictionaryKey::PARENT, Pages::class)) === null) { + return null; + } + + return $pagesParent->getResourceDictionary() + ?->getSubDictionary($this->document, DictionaryKey::FONT); + } + + /** @return list */ + public function getFileSpecifications(): array { + return $this->getDictionary() + ->getObjectsForReference($this->document, DictionaryKey::AF, FileSpecification::class); + } +} diff --git a/includes/pdfparser/Document/Object/Decorator/Pages.php b/includes/pdfparser/Document/Object/Decorator/Pages.php new file mode 100644 index 0000000..e86b389 --- /dev/null +++ b/includes/pdfparser/Document/Object/Decorator/Pages.php @@ -0,0 +1,39 @@ + + */ + public function getPageItems(): array { + $kids = []; + foreach ($this->getDictionary()->getValueForKey(DictionaryKey::KIDS, ReferenceValueArray::class)->referenceValues ?? [] as $referenceValue) { + $kidObject = $this->document->getObject($referenceValue->objectNumber) + ?? throw new ParseFailureException(sprintf('Child with number %d could not be found', $referenceValue->objectNumber)); + + if ($kidObject instanceof Pages) { + $kids = [...$kids, ...$kidObject->getPageItems()]; + } elseif ($kidObject instanceof Page) { + $kids[] = $kidObject; + } elseif ($kidObject instanceof GenericObject) { + $kids[] = new Page($kidObject->objectItem, $this->document); + } + } + + return $kids; + } + + /** @throws PdfParserException */ + public function getResourceDictionary(): ?Dictionary { + return $this->getDictionary() + ->getSubDictionary($this->document, DictionaryKey::RESOURCES); + } +} diff --git a/includes/pdfparser/Document/Object/Decorator/XObject.php b/includes/pdfparser/Document/Object/Decorator/XObject.php new file mode 100644 index 0000000..c67ebd6 --- /dev/null +++ b/includes/pdfparser/Document/Object/Decorator/XObject.php @@ -0,0 +1,150 @@ +getDictionary() + ->getSubType() === SubtypeNameValue::IMAGE; + } + + public function isForm(): bool { + return $this->getDictionary() + ->getSubType() === SubtypeNameValue::FORM; + } + + public function getWidth(): ?int { + if ($this->getDictionary()->getTypeForKey(DictionaryKey::WIDTH) === null) { + return null; + } + + return $this->getDictionary() + ->getValueForKey(DictionaryKey::WIDTH, IntegerValue::class) + ?->value; + } + + public function getHeight(): ?int { + if ($this->getDictionary()->getTypeForKey(DictionaryKey::HEIGHT) === null) { + return null; + } + + return $this->getDictionary() + ->getValueForKey(DictionaryKey::HEIGHT, IntegerValue::class) + ?->value; + } + + public function getLength(): ?int { + if ($this->getDictionary()->getTypeForKey(DictionaryKey::LENGTH) === null) { + return null; + } + + return $this->getDictionary() + ->getValueForKey(DictionaryKey::LENGTH, IntegerValue::class) + ?->value; + } + + public function getImageType(): ?ImageType { + if (!$this->isImage()) { + throw new RuntimeException('Unable to retrieve image type for XObjects that is not an image'); + } + + $filterValueType = $this->getDictionary()->getTypeForKey(DictionaryKey::FILTER); + if ($filterValueType === null) { + return null; + } + + if ($filterValueType === FilterNameValue::class) { + return $this->getDictionary()->getValueForKey(DictionaryKey::FILTER, FilterNameValue::class)?->getImageType(); + } + + if ($filterValueType === ArrayValue::class) { + foreach ($this->getDictionary()->getValueForKey(DictionaryKey::FILTER, ArrayValue::class)->value ?? throw new RuntimeException() as $filterValue) { + if (!is_string($filterValue)) { + throw new ParseFailureException(sprintf('Expected a string for filter value, got "%s"', ($jsonEncoded = json_encode($filterValue)) !== false ? $jsonEncoded : 'Unknown')); + } + + $filterValue = FilterNameValue::tryFrom(ltrim($filterValue, '/')) ?? throw new ParseFailureException(sprintf('Unsupported filter value "%s"', $filterValue)); + if ($filterValue->getImageType() !== null) { + return $filterValue->getImageType(); + } + } + } + + throw new ParseFailureException(sprintf('Unsupported filter value type %s', $filterValueType)); + } + + private function getBitsPerComponent(): ?int { + return $this->getDictionary() + ->getValueForKey(DictionaryKey::BITS_PER_COMPONENT, IntegerValue::class)?->value; + } + + private function getColorSpace(): ?ColorSpace { + if (($type = $this->getDictionary()->getTypeForKey(DictionaryKey::COLOR_SPACE)) === null) { + return null; + } + + if ($type === DeviceColorSpaceNameValue::class || $type === CIEColorSpaceNameValue::class || $type === SpecialColorSpaceNameValue::class) { + return new ColorSpace(false, $this->getDictionary()->getValueForKey(DictionaryKey::COLOR_SPACE, $type) ?? throw new ParseFailureException(), null, null, null); + } + + if ($type === ArrayValue::class) { + $colorSpaceArray = $this->getDictionary()->getValueForKey(DictionaryKey::COLOR_SPACE, ArrayValue::class) + ?? throw new ParseFailureException(); + + return ColorSpaceFactory::fromString($colorSpaceArray->toString(), $this->document); + } + + if ($type === ReferenceValue::class) { + $colorSpaceObject = $this->getDictionary()->getObjectForReference($this->document, DictionaryKey::COLOR_SPACE) + ?? throw new ParseFailureException('Unable to retrieve colorspace object'); + + return ColorSpaceFactory::fromString($colorSpaceObject->getStream()->toString(), $this->document); + } + + throw new ParseFailureException(sprintf('Unsupported colorspace format %s', $type)); + } + + #[Override] + public function getStream(): Stream { + $content = parent::getStream(); + if (!$this->isImage() || $this->getImageType() !== ImageType::PNG) { + return $content; + } + + $height = $this->getHeight() ?? throw new RuntimeException('Unable to retrieve height'); + if ($height < 1) { + throw new RuntimeException(sprintf('Height %d cannot be less than 1', $height)); + } + + $width = $this->getWidth() ?? throw new RuntimeException('Unable to retrieve width'); + if ($width < 1) { + throw new RuntimeException(sprintf('Width %d cannot be less than 1', $width)); + } + + return RasterizedImage::toPNG( + $this->getColorSpace() ?? throw new RuntimeException('Unable to retrieve colorspace'), + $width, + $height, + $this->getBitsPerComponent() ?? throw new RuntimeException('Unable to retrieve bits per component'), + $content, + ); + } +} diff --git a/includes/pdfparser/Document/Object/Item/CompressedObject/CompressedObject.php b/includes/pdfparser/Document/Object/Item/CompressedObject/CompressedObject.php new file mode 100644 index 0000000..1f4d3ab --- /dev/null +++ b/includes/pdfparser/Document/Object/Item/CompressedObject/CompressedObject.php @@ -0,0 +1,77 @@ +endByteOffsetInDecodedStream !== null && $this->startByteOffsetInDecodedStream > $this->endByteOffsetInDecodedStream) { + throw new InvalidArgumentException(sprintf('Start offset %d should be before end offset %d', $this->startByteOffsetInDecodedStream, $this->endByteOffsetInDecodedStream)); + } + } + + #[Override] + public function getDictionary(Document $document): Dictionary { + if (isset($this->dictionary)) { + return $this->dictionary; + } + + $objectContent = trim($this->getContent($document)->toString()); + if ($objectContent === '' || !str_starts_with($objectContent, '<<') || !str_ends_with($objectContent, '>>')) { + return $this->dictionary = new Dictionary(); + } + + $inMemoryStream = new InMemoryStream($objectContent); + return $this->dictionary = DictionaryParser::parse($inMemoryStream, 0, $inMemoryStream->getSizeInBytes()); + } + + #[Override] + public function getContent(Document $document): Stream { + $first = $this->storedInObject->getDictionary($document)->getValueForKey(DictionaryKey::FIRST, IntegerValue::class) + ?? throw new RuntimeException('Expected a dictionary entry for "First", none found'); + + $content = substr( + $this->storedInObject->getContent($document)->toString(), + $first->value + $this->startByteOffsetInDecodedStream, + $this->endByteOffsetInDecodedStream !== null ? $this->endByteOffsetInDecodedStream - $this->startByteOffsetInDecodedStream : null + ); + + if (str_starts_with($content, '[') && str_ends_with($content, ']') && ($referenceValueArray = ReferenceValueArray::fromValue($content)) !== null) { + $content = implode( + '', + array_map( + fn (ReferenceValue $referenceValue) => ($document->getObject($referenceValue->objectNumber) ?? throw new ParseFailureException()) + ->getStream() + ->toString(), + $referenceValueArray->referenceValues, + ) + ); + } + + return FileStream::fromString($content); + } +} diff --git a/includes/pdfparser/Document/Object/Item/CompressedObject/CompressedObjectByteOffsetParser.php b/includes/pdfparser/Document/Object/Item/CompressedObject/CompressedObjectByteOffsetParser.php new file mode 100644 index 0000000..cb6a6cd --- /dev/null +++ b/includes/pdfparser/Document/Object/Item/CompressedObject/CompressedObjectByteOffsetParser.php @@ -0,0 +1,69 @@ +getStartNextLineAfter(Marker::STREAM, $startOffsetObject, $endOffsetObject) + ?? throw new ParseFailureException(sprintf('Unable to locate marker %s', Marker::STREAM->value)); + if ($dictionary->getTypeForKey(DictionaryKey::LENGTH) === IntegerValue::class && ($lengthInteger = $dictionary->getValueForKey(DictionaryKey::LENGTH, IntegerValue::class)) !== null) { + $length = $lengthInteger->value; + } else { + $endStreamPos = $stream->lastPos(Marker::END_STREAM, $stream->getSizeInBytes() - $endOffsetObject) + ?? throw new ParseFailureException(sprintf('Unable to locate marker %s', Marker::END_STREAM->value)); + $eolPos = $stream->getEndOfCurrentLine($endStreamPos - 1, $endOffsetObject) + ?? throw new ParseFailureException(sprintf('Unable to locate marker %s', WhitespaceCharacter::LINE_FEED->value)); + $length = $eolPos - $startStreamPos; + } + + $content = bin2hex(CompressedObjectContentParser::parseBinary($stream, $startStreamPos, $length, $dictionary)->toString()); + $first = $dictionary->getValueForKey(DictionaryKey::FIRST, IntegerValue::class) + ?? throw new RuntimeException('Expected a dictionary entry for "First", none found'); + $buffer = new InfiniteBuffer(); + $previousObjectNumber = null; + $byteOffsets = []; + foreach (str_split(substr($content, 0, $first->value * 2), 2) as $char) { + $decodedChar = mb_chr((int) hexdec($char)); + if (WhitespaceCharacter::tryFrom($decodedChar) !== null) { + $numberInBuffer = $buffer->__toString(); + if (trim($numberInBuffer) === '') { + $buffer->flush(); + continue; + } + + if ($numberInBuffer !== (string)(int) $numberInBuffer) { + throw new ParseFailureException(sprintf('Number "%s" in buffer is not a valid number', $numberInBuffer)); + } + + $numberInBuffer = (int) $numberInBuffer; + if ($previousObjectNumber !== null) { + $byteOffsets[$previousObjectNumber] = $numberInBuffer; + $previousObjectNumber = null; + } else { + $previousObjectNumber = $numberInBuffer; + } + + $buffer->flush(); + continue; + } + + $buffer->addChar($decodedChar); + } + + return new CompressedObjectByteOffsets($byteOffsets); + } +} diff --git a/includes/pdfparser/Document/Object/Item/CompressedObject/CompressedObjectByteOffsets.php b/includes/pdfparser/Document/Object/Item/CompressedObject/CompressedObjectByteOffsets.php new file mode 100644 index 0000000..3ce5e12 --- /dev/null +++ b/includes/pdfparser/Document/Object/Item/CompressedObject/CompressedObjectByteOffsets.php @@ -0,0 +1,28 @@ + $objectNumberByteOffsets */ + public function __construct( + private readonly array $objectNumberByteOffsets, + ) { + } + + public function getRelativeByteOffsetForObject(int $objNumber): ?int { + return $this->objectNumberByteOffsets[$objNumber] ?? null; + } + + public function getNextRelativeByteOffset(int $currentByteOffset): ?int { + $byteOffsets = array_values($this->objectNumberByteOffsets); + sort($byteOffsets); + foreach ($byteOffsets as $byteOffset) { + if ($byteOffset > $currentByteOffset) { + return $byteOffset; + } + } + + return null; + } +} diff --git a/includes/pdfparser/Document/Object/Item/CompressedObject/CompressedObjectContent/CompressedObjectContentParser.php b/includes/pdfparser/Document/Object/Item/CompressedObject/CompressedObjectContent/CompressedObjectContentParser.php new file mode 100644 index 0000000..c94c0f0 --- /dev/null +++ b/includes/pdfparser/Document/Object/Item/CompressedObject/CompressedObjectContent/CompressedObjectContentParser.php @@ -0,0 +1,71 @@ +stream : $context)->read($startPos, $nrOfBytes); + if ($context instanceof Document && $context->security !== null && ($encryptDictionary = $context->getEncryptDictionary()) !== null) { + $binaryStreamContent = RC4::crypt( + $context->security->getUserFileEncryptionKey($encryptDictionary, $context->crossReferenceSource->getFirstId()), + $binaryStreamContent + ); + } + + if (($filterType = $dictionary->getTypeForKey(DictionaryKey::FILTER)) === FilterNameValue::class) { + $binaryStreamContent = ($dictionary->getValueForKey(DictionaryKey::FILTER, FilterNameValue::class) ?? throw new ParseFailureException()) + ->decodeBinary($binaryStreamContent, $dictionary, ($context instanceof Document ? $context : null)); + } elseif ($filterType === ArrayValue::class) { + foreach ($dictionary->getValueForKey(DictionaryKey::FILTER, ArrayValue::class)->value ?? throw new ParseFailureException() as $filterValue) { + if (is_string($filterValue) === false || ($filter = FilterNameValue::tryFrom(ltrim($filterValue, '/'))) === null) { + throw new ParseFailureException(); + } + + $binaryStreamContent = $filter + ->decodeBinary($binaryStreamContent, $dictionary, ($context instanceof Document ? $context : null)); + } + } elseif ($filterType === ReferenceValue::class) { + if (!$context instanceof Document) { + throw new ParseFailureException('Filter reference is only supported in a Document'); + } + + $filter = $dictionary->getObjectForReference($context, DictionaryKey::FILTER) ?? throw new ParseFailureException('Unable to retrieve filter object'); + if (($filterArray = ArrayValue::fromValue($filter->getStream()->toString())) instanceof ArrayValue === false) { + throw new ParseFailureException('Filter object is not an array'); + } + + foreach ($filterArray->value as $filterValue) { + if (is_string($filterValue) === false || ($filter = FilterNameValue::tryFrom(ltrim($filterValue, '/'))) === null) { + throw new ParseFailureException(); + } + + $binaryStreamContent = $filter + ->decodeBinary($binaryStreamContent, $dictionary, $context); + } + } elseif ($filterType !== null) { + throw new RuntimeException(sprintf('Expected filter to be a FilterNameValue or ArrayValue, got %s', $filterType)); + } + + return FileStream::fromString($binaryStreamContent); + } +} diff --git a/includes/pdfparser/Document/Object/Item/ObjectItem.php b/includes/pdfparser/Document/Object/Item/ObjectItem.php new file mode 100644 index 0000000..32ce927 --- /dev/null +++ b/includes/pdfparser/Document/Object/Item/ObjectItem.php @@ -0,0 +1,15 @@ +dictionary)) { + return $this->dictionary; + } + + $startDictionaryPos = $document->stream->firstPos(DelimiterCharacter::LESS_THAN_SIGN, $this->startOffset, $this->endOffset); + if ($startDictionaryPos === null) { + return $this->dictionary = new Dictionary(); + } + + $endDictionaryPos = $document->stream->firstPos(Marker::STREAM, $startDictionaryPos, $this->endOffset) + ?? $document->stream->lastPos(Marker::END_OBJ, $document->stream->getSizeInBytes() - $this->endOffset) + ?? throw new ParseFailureException('Unable to locate start of stream or end of current object'); + + return $this->dictionary = DictionaryParser::parse($document->stream, $startDictionaryPos, $endDictionaryPos - $startDictionaryPos); + } + + public function getCompressedObject(int $objectNumber, Document $document): CompressedObject { + $byteOffsets = $this->getByteOffsets($document); + $startByteOffset = $byteOffsets->getRelativeByteOffsetForObject($objectNumber) + ?? throw new InvalidArgumentException('Compressed object does not exist in this uncompressed object'); + + return new CompressedObject( + $objectNumber, + $this, + $startByteOffset, + $byteOffsets->getNextRelativeByteOffset($startByteOffset), + ); + } + + public function getByteOffsets(Document $document): CompressedObjectByteOffsets { + if (isset($this->byteOffsets)) { + return $this->byteOffsets; + } + + $dictionary = $this->getDictionary($document); + if ($dictionary->getType() !== TypeNameValue::OBJ_STM) { + throw new ParseFailureException('Unable to get stream data from item that is not a stream'); + } + + return $this->byteOffsets = CompressedObjectByteOffsetParser::parse( + $document->stream, + $this->startOffset, + $this->endOffset, + $dictionary + ); + } + + #[Override] + public function getContent(Document $document): Stream { + if (($startStreamPos = $document->stream->getStartNextLineAfter(Marker::STREAM, $this->startOffset, $this->endOffset)) !== null + && ($endStreamPos = $document->stream->lastPos(Marker::END_STREAM, $document->stream->getSizeInBytes() - $this->endOffset)) !== null) { + return CompressedObjectContentParser::parseBinary( + $document, + $startStreamPos, + ($document->stream->getEndOfCurrentLine($endStreamPos - 1, $this->endOffset) + ?? throw new ParseFailureException(sprintf('Unable to locate marker %s', WhitespaceCharacter::LINE_FEED->value))) - $startStreamPos, + $this->getDictionary($document), + ); + } + + $nextLineAfterStartObj = $document->stream->getStartNextLineAfter(Marker::OBJ, $this->startOffset, $this->endOffset) + ?? throw new ParseFailureException(sprintf('Unable to locate newline after marker %s', Marker::OBJ->value)); + $endObjPos = $document->stream->lastPos(Marker::END_OBJ, $document->stream->getSizeInBytes() - $this->endOffset) + ?? throw new ParseFailureException(sprintf('Unable to locate marker %s', Marker::END_OBJ->value)); + $eolObjContent = $document->stream->getEndOfCurrentLine($endObjPos - 2, $this->endOffset) + ?? throw new ParseFailureException(sprintf('Unable to locate newline after marker %s', Marker::END_OBJ->value)); + + return FileStream::fromString( + $document->stream->read( + $nextLineAfterStartObj, + $eolObjContent - $nextLineAfterStartObj, + ) + ); + } +} diff --git a/includes/pdfparser/Document/Object/Item/UncompressedObject/UncompressedObjectParser.php b/includes/pdfparser/Document/Object/Item/UncompressedObject/UncompressedObjectParser.php new file mode 100644 index 0000000..159bdf1 --- /dev/null +++ b/includes/pdfparser/Document/Object/Item/UncompressedObject/UncompressedObjectParser.php @@ -0,0 +1,29 @@ +firstPos(Marker::END_OBJ, $crossReferenceEntry->byteOffsetInDecodedStream, $stream->getSizeInBytes()) ?? throw new ParseFailureException('Unable to locate end of object'); + $startObj = $stream->firstPos(Marker::OBJ, $crossReferenceEntry->byteOffsetInDecodedStream, $endObj) ?? throw new ParseFailureException('Unable to locate start of object'); + $objHeader = $stream->read($crossReferenceEntry->byteOffsetInDecodedStream, $startObj + Marker::OBJ->length() - $crossReferenceEntry->byteOffsetInDecodedStream); + $objHeaderParts = explode(WhitespaceCharacter::SPACE->value, str_replace([WhitespaceCharacter::LINE_FEED->value], ' ', trim($objHeader))); + if (count($objHeaderParts) !== 3 || (int) $objHeaderParts[0] !== $objectNumber || (int) $objHeaderParts[1] !== $crossReferenceEntry->generationNumber || $objHeaderParts[2] !== Marker::OBJ->value) { + throw new ParseFailureException(sprintf('Expected "%d %d %s" on first line, got "%s"', $objectNumber, $crossReferenceEntry->generationNumber, Marker::OBJ->value, $objHeader)); + } + + return new UncompressedObject( + $objectNumber, + $crossReferenceEntry->generationNumber, + $crossReferenceEntry->byteOffsetInDecodedStream, + $endObj + Marker::END_OBJ->length(), + ); + } +} diff --git a/includes/pdfparser/Document/Security/SecurityAlgorithm.php b/includes/pdfparser/Document/Security/SecurityAlgorithm.php new file mode 100644 index 0000000..e101054 --- /dev/null +++ b/includes/pdfparser/Document/Security/SecurityAlgorithm.php @@ -0,0 +1,21 @@ +getUserPasswordEntry(); + $securityHandlerRevision = $encryptDictionary->getStandardSecurityHandlerRevision(); + + $fileEncryptionKey = $this->getUserFileEncryptionKey($encryptDictionary, $firstID); + if ($securityHandlerRevision === StandardSecurityHandlerRevision::v2) { // @see 7.6.4.4.3, step b + return hash_equals($userPasswordEntry, RC4::crypt($fileEncryptionKey, self::PADDING_STRING)); + } + + if (in_array($securityHandlerRevision, [StandardSecurityHandlerRevision::v3, StandardSecurityHandlerRevision::v4], true)) { // @see 7.6.4.4.4, step b through e + $hash = md5(self::PADDING_STRING . $firstID, true); + $encryptedHash = RC4::crypt($fileEncryptionKey, $hash); + for ($i = 1; $i <= 19; $i++) { + $encryptedHash = RC4::crypt( + implode('', array_map( + fn ($c) => chr(ord($c) ^ $i), + str_split($fileEncryptionKey) + )), + $encryptedHash, + ); + } + + return hash_equals(substr($userPasswordEntry, 0, 16), $encryptedHash); + } + + throw new NotImplementedException('Unsupported security handler revision: ' . $securityHandlerRevision->value); + } + + /** @see 7.6.4.4.6 */ + public function isOwnerPasswordValid(EncryptDictionary $encryptDictionary, string $firstID): bool { + $fileEncryptionKey = $this->getOwnerFileEncryptionKey($encryptDictionary); + + $ownerPasswordEntry = $encryptDictionary->getOwnerPasswordEntry(); + if ($encryptDictionary->getStandardSecurityHandlerRevision() === StandardSecurityHandlerRevision::v2) { + $userPassword = RC4::crypt($fileEncryptionKey, $ownerPasswordEntry); + } else { + $userPassword = $ownerPasswordEntry; + for ($i = 19; $i >= 0; $i--) { + $userPassword = RC4::crypt( + implode('', array_map( + fn ($c) => chr(ord($c) ^ $i), + str_split($fileEncryptionKey) + )), + $userPassword, + ); + } + } + + if ($this->userPassword !== null && $userPassword !== $this->userPassword) { + return false; + } + + $this->userPassword = $userPassword; + return $this->isUserPasswordValid($encryptDictionary, $firstID); + } + + /** @see 7.6.4.4.2 */ + public function getUserFileEncryptionKey(EncryptDictionary $encryptDictionary, string $firstIDValue): string { + if (in_array($encryptDictionary->getStandardSecurityHandlerRevision(), [StandardSecurityHandlerRevision::v2, StandardSecurityHandlerRevision::v3, StandardSecurityHandlerRevision::v4], true) === false) { + throw new NotImplementedException('Unsupported security handler revision: ' . $encryptDictionary->getStandardSecurityHandlerRevision()->value); + } + + $fileEncryptionKeyLengthInBits = $encryptDictionary->getLengthFileEncryptionKeyInBits() ?? throw new ParseFailureException(); + if ($encryptDictionary->getSecurityAlgorithm() === SecurityAlgorithm::AES_Key_length_256) { // V = 4 + throw new NotImplementedException('AES-based stream decryption is not yet supported.'); + } + + if ($fileEncryptionKeyLengthInBits % 8 !== 0 || !is_int($fileEncryptionKeyLengthInBytes = $fileEncryptionKeyLengthInBits / 8)) { + throw new ParseFailureException('Unsupported file encryption key length in bits: ' . $fileEncryptionKeyLengthInBits); + } + + $hashedString = + $this->getPaddedUserPassword() // step a+b + . $encryptDictionary->getOwnerPasswordEntry() // step c + . pack('V', $encryptDictionary->getPValue()) // step d + . $firstIDValue; // step e + if ($encryptDictionary->getStandardSecurityHandlerRevision()->value >= 4 && $encryptDictionary->isMetadataEncrypted() === false) { + $hashedString .= "\xFF\xFF\xFF\xFF"; + } + + $md5Hash = md5($hashedString, true); + if ($encryptDictionary->getStandardSecurityHandlerRevision() === StandardSecurityHandlerRevision::v2) { + return substr($md5Hash, 0, 5); + } + + for ($i = 1; $i <= 50; $i++) { // step h + $md5Hash = md5(substr($md5Hash, 0, $fileEncryptionKeyLengthInBytes), true); + } + + return substr($md5Hash, 0, $fileEncryptionKeyLengthInBytes); + } + + private function getOwnerFileEncryptionKey(EncryptDictionary $encryptDictionary): string { + if (in_array($encryptDictionary->getStandardSecurityHandlerRevision(), [StandardSecurityHandlerRevision::v2, StandardSecurityHandlerRevision::v3, StandardSecurityHandlerRevision::v4], true) === false) { + throw new NotImplementedException('Unsupported security handler revision: ' . $encryptDictionary->getStandardSecurityHandlerRevision()->value); + } + + $fileEncryptionKeyLengthInBits = $encryptDictionary->getLengthFileEncryptionKeyInBits() ?? throw new ParseFailureException(); + if ($encryptDictionary->getSecurityAlgorithm() === SecurityAlgorithm::AES_Key_length_256) { // V = 4 + throw new NotImplementedException('AES-based stream decryption is not yet supported.'); + } + + if ($fileEncryptionKeyLengthInBits % 8 !== 0 || !is_int($fileEncryptionKeyLengthInBytes = $fileEncryptionKeyLengthInBits / 8)) { + throw new ParseFailureException('Unsupported file encryption key length in bits: ' . $fileEncryptionKeyLengthInBits); + } + + $md5Hash = md5($this->getPaddedOwnerPassword(), true); + if ($encryptDictionary->getStandardSecurityHandlerRevision() !== StandardSecurityHandlerRevision::v2) { + for ($i = 1; $i <= 50; $i++) { // step c + $md5Hash = md5($md5Hash, true); + } + } + + if ($encryptDictionary->getStandardSecurityHandlerRevision() === StandardSecurityHandlerRevision::v2) { + return substr($md5Hash, 0, 5); + } + + return substr($md5Hash, 0, $fileEncryptionKeyLengthInBytes); + } + + /** @see 7.6.4.3.2 step a */ + public function getPaddedUserPassword(): string { + return substr($this->userPassword ?? '', 0, self::PASSWORD_LENGTH) + . substr(self::PADDING_STRING, 0, max(0, self::PASSWORD_LENGTH - strlen($this->userPassword ?? ''))); + } + + /** @see 7.6.4.3.2 step a */ + public function getPaddedOwnerPassword(): string { + return substr($this->ownerPassword ?? $this->userPassword ?? '', 0, self::PASSWORD_LENGTH) + . substr(self::PADDING_STRING, 0, max(0, self::PASSWORD_LENGTH - strlen($this->ownerPassword ?? $this->userPassword ?? ''))); + } +} diff --git a/includes/pdfparser/Document/Security/StandardSecurityHandlerRevision.php b/includes/pdfparser/Document/Security/StandardSecurityHandlerRevision.php new file mode 100644 index 0000000..f557320 --- /dev/null +++ b/includes/pdfparser/Document/Security/StandardSecurityHandlerRevision.php @@ -0,0 +1,13 @@ +read(0, Marker::VERSION->length()) !== Marker::VERSION->value) { + throw new ParseFailureException('Unexpected start of file format. is this a pdf?'); + } + + $versionString = $stream->read(strlen(Marker::VERSION->value), Version::length()); + $version = Version::tryFrom($versionString); + if ($version === null) { + throw new ParseFailureException(sprintf('Unsupported PDF version "%s"', $versionString)); + } + + return $version; + } +} diff --git a/includes/pdfparser/Exception/AuthenticationFailedException.php b/includes/pdfparser/Exception/AuthenticationFailedException.php new file mode 100644 index 0000000..fd4e1a1 --- /dev/null +++ b/includes/pdfparser/Exception/AuthenticationFailedException.php @@ -0,0 +1,6 @@ +parse($stream, $security); + } + + /** + * @param bool $useFileCache if set to true, the file will be cached to a temporary file. This will use less memory, but will be significantly slower + * @throws PdfParserException + */ + public function parseString(string $content, bool $useFileCache = false, ?StandardSecurity $security = null): Document { + if ($useFileCache) { + $stream = FileStream::fromString($content); + } else { + $stream = new InMemoryStream($content); + } + + return $this->parse($stream, $security); + } +} diff --git a/includes/pdfparser/Stream/AbstractStream.php b/includes/pdfparser/Stream/AbstractStream.php new file mode 100644 index 0000000..0267ff0 --- /dev/null +++ b/includes/pdfparser/Stream/AbstractStream.php @@ -0,0 +1,65 @@ +firstPos($needle, $offsetFromStart, $before); + if ($markerPos === null) { + return null; + } + + return $this->getStartOfNextLine($markerPos, $before); + } + + #[Override] + public function getStartOfNextLine(int $byteOffset, int $before): ?int { + $firstLineFeedPos = $this->firstPos(WhitespaceCharacter::LINE_FEED, $byteOffset, $before); + $firstCarriageReturnPos = $this->firstPos(WhitespaceCharacter::CARRIAGE_RETURN, $byteOffset, $before); + if ($firstLineFeedPos === null && $firstCarriageReturnPos === null) { + return null; + } + + if ($firstCarriageReturnPos === null) { + return $firstLineFeedPos + 1; + } + + if ($firstLineFeedPos === null) { + return $firstCarriageReturnPos + 1; + } + + return min($firstLineFeedPos, $firstCarriageReturnPos) + + (abs($firstCarriageReturnPos - $firstLineFeedPos) === 1 ? 2 : 1); // If the CR and LF are next to each other, we need to add 2 bytes, otherwise 1 + } + + #[Override] + public function getEndOfCurrentLine(int $byteOffset, int $before): ?int { + $firstLineFeedPos = $this->firstPos(WhitespaceCharacter::LINE_FEED, $byteOffset, $before); + $firstCarriageReturnPos = $this->firstPos(WhitespaceCharacter::CARRIAGE_RETURN, $byteOffset, $before); + if ($firstLineFeedPos === null && $firstCarriageReturnPos === null) { + return null; + } + + if ($firstCarriageReturnPos === null) { + return $firstLineFeedPos; + } + + if ($firstLineFeedPos === null) { + return $firstCarriageReturnPos; + } + + return min($firstLineFeedPos, $firstCarriageReturnPos); + } + + #[Override] + public function toString(): string { + return $this->read(0, $this->getSizeInBytes()); + } +} diff --git a/includes/pdfparser/Stream/FileStream.php b/includes/pdfparser/Stream/FileStream.php new file mode 100644 index 0000000..498c54f --- /dev/null +++ b/includes/pdfparser/Stream/FileStream.php @@ -0,0 +1,157 @@ +handle = $handle; + } + + public static function openFile(string $path): self { + $handle = fopen($path, 'rb'); + if ($handle === false) { + throw new InvalidArgumentException(sprintf('Failed to open file at path "%s"', $path)); + } + + return new self($handle); + } + + public static function fromString(string $content): self { + $handle = fopen('php://temp', 'rb+'); + if ($handle === false) { + throw new RuntimeException('Unable to create file handle to temp'); + } + + fwrite($handle, $content); + rewind($handle); + + return new self($handle); + } + + #[Override] + public function getSizeInBytes(): int { + $stats = fstat($this->handle); + if ($stats === false) { + throw new RuntimeException('Unable to retrieve file information'); + } + + return $stats['size']; + } + + #[Override] + public function read(int $from, int $nrOfBytes): string { + if ($nrOfBytes <= 0) { + throw new InvalidArgumentException(sprintf('$nrOfBytes must be greater than 0, %d given', $nrOfBytes)); + } + + fseek($this->handle, $from); + + $bytes = fread($this->handle, $nrOfBytes); + if ($bytes === false) { + throw new RuntimeException('Unable to read from handle'); + } + + return $bytes; + } + + #[Override] + public function slice(int $startByteOffset, int $endByteOffset): string { + if ($startByteOffset <= 0) { + throw new InvalidArgumentException(sprintf('$startByteOffset must be greater than 0, %d given', $startByteOffset)); + } + + if ($endByteOffset - $startByteOffset < 1) { + throw new InvalidArgumentException(sprintf('End byte offset %d should be bigger than start byte offset %d', $endByteOffset, $startByteOffset)); + } + + fseek($this->handle, $startByteOffset); + + $bytes = fread($this->handle, $endByteOffset - $startByteOffset); + if ($bytes === false) { + throw new RuntimeException('Unable to read bytes from handle'); + } + + return $bytes; + } + + #[Override] + public function chars(int $from, int $nrOfBytes): iterable { + if ($from < 0) { + throw new InvalidArgumentException(sprintf('StartOffset should be greater than zero, %d given', $from)); + } + + if ($nrOfBytes <= 0) { + throw new InvalidArgumentException(sprintf('$nrOfBytes to read must be greater than 0, %d given', $nrOfBytes)); + } + + $bytesRead = 0; + while ($bytesRead < $nrOfBytes) { + fseek($this->handle, $from + $bytesRead); + $bytes = fread($this->handle, 1); + if ($bytes === false) { + throw new RuntimeException('Unable to read bytes from stream'); + } + yield $bytes; + $bytesRead++; + } + } + + #[Override] + public function firstPos(WhitespaceCharacter|Marker|DelimiterCharacter|ToUnicodeCMapOperator $needle, int $offsetFromStart, int $before): ?int { + $rollingCharBuffer = new RollingCharBuffer($needleLength = strlen($needle->value)); + while ($offsetFromStart < $before) { + fseek($this->handle, $offsetFromStart); + $character = fgetc($this->handle); + if ($character === false) { + throw new RuntimeException('Unable to get char from stream'); + } + $rollingCharBuffer->next($character); + $offsetFromStart++; + if ($rollingCharBuffer->seenString($needle->value)) { + return $offsetFromStart - $needleLength; + } + } + + return null; + } + + #[Override] + public function lastPos(WhitespaceCharacter|Marker|DelimiterCharacter|ToUnicodeCMapOperator $needle, int $offsetFromEnd): ?int { + $rollingCharBuffer = new RollingCharBuffer(strlen($needle->value)); + $offsetFromEnd++; + while (fseek($this->handle, - $offsetFromEnd, SEEK_END) !== -1) { + $character = fgetc($this->handle); + if ($character === false) { + throw new RuntimeException('Unable to get character from stream'); + } + $rollingCharBuffer->next($character); + $offsetFromEnd++; + if ($rollingCharBuffer->seenReverseString($needle->value)) { + return $this->getSizeInBytes() - $offsetFromEnd + 1; + } + } + + return null; + } + + public function __destruct() { + fclose($this->handle); + } +} diff --git a/includes/pdfparser/Stream/InMemoryStream.php b/includes/pdfparser/Stream/InMemoryStream.php new file mode 100644 index 0000000..6f3d0fe --- /dev/null +++ b/includes/pdfparser/Stream/InMemoryStream.php @@ -0,0 +1,79 @@ +content); + } + + #[Override] + public function read(int $from, int $nrOfBytes): string { + if ($nrOfBytes <= 0) { + throw new InvalidArgumentException(sprintf('$nrOfBytes must be greater than 0, %d given', $nrOfBytes)); + } + + return substr($this->content, $from, $nrOfBytes); + } + + #[Override] + public function slice(int $startByteOffset, int $endByteOffset): string { + if ($startByteOffset <= 0) { + throw new InvalidArgumentException(sprintf('$startByteOffset must be greater than 0, %d given', $startByteOffset)); + } + + if ($endByteOffset - $startByteOffset < 1) { + throw new InvalidArgumentException(sprintf('End byte offset %d should be bigger than start byte offset %d', $endByteOffset, $startByteOffset)); + } + + return substr($this->content, $startByteOffset, $endByteOffset - $startByteOffset); + } + + #[Override] + public function chars(int $from, int $nrOfBytes): iterable { + if ($from < 0) { + throw new InvalidArgumentException(sprintf('$from must be greater than zero, %d given', $from)); + } + + if ($nrOfBytes <= 0) { + throw new InvalidArgumentException(sprintf('$nrOfBytes to read must be greater than zero, %d given', $nrOfBytes)); + } + + foreach (str_split(substr($this->content, $from, $nrOfBytes)) as $char) { + yield $char; + } + } + + #[Override] + public function firstPos(WhitespaceCharacter|DelimiterCharacter|ToUnicodeCMapOperator|Marker $needle, int $offsetFromStart, int $before): ?int { + $firstPos = strpos($this->content, $needle->value, $offsetFromStart); + if ($firstPos === false || $firstPos > $before) { + return null; + } + + return $firstPos; + } + + #[Override] + public function lastPos(WhitespaceCharacter|DelimiterCharacter|ToUnicodeCMapOperator|Marker $needle, int $offsetFromEnd): ?int { + $pos = strrpos($this->content, $needle->value, -$offsetFromEnd); + if ($pos === false) { + return null; + } + + return $pos; + } +} diff --git a/includes/pdfparser/Stream/Stream.php b/includes/pdfparser/Stream/Stream.php new file mode 100644 index 0000000..b40f438 --- /dev/null +++ b/includes/pdfparser/Stream/Stream.php @@ -0,0 +1,41 @@ + $nrOfBytes */ + public function read(int $from, int $nrOfBytes): string; + + public function toString(): string; + + /** + * @phpstan-assert int<0, max> $startByteOffset + * @phpstan-assert int<0, max> $endByteOffset + */ + public function slice(int $startByteOffset, int $endByteOffset): string; + + /** + * @phpstan-assert int<0, max> $from + * @phpstan-assert int<1, max> $nrOfBytes + * + * @return iterable + */ + public function chars(int $from, int $nrOfBytes): iterable; + + public function firstPos(WhitespaceCharacter|Marker|DelimiterCharacter|ToUnicodeCMapOperator $needle, int $offsetFromStart, int $before): ?int; + + public function lastPos(WhitespaceCharacter|Marker|DelimiterCharacter|ToUnicodeCMapOperator $needle, int $offsetFromEnd): ?int; + + public function getStartNextLineAfter(WhitespaceCharacter|Marker|DelimiterCharacter|ToUnicodeCMapOperator $needle, int $offsetFromStart, int $before): ?int; + + public function getStartOfNextLine(int $byteOffset, int $before): ?int; + + public function getEndOfCurrentLine(int $byteOffset, int $before): ?int; +} diff --git a/includes/pdfparser_autoloader.php b/includes/pdfparser_autoloader.php new file mode 100644 index 0000000..d388162 --- /dev/null +++ b/includes/pdfparser_autoloader.php @@ -0,0 +1,19 @@ + get_env_var('MAIL_TRANSPORT', 'smtp'), - 'smtp_host' => get_env_var('SMTP_HOST'), - 'smtp_port' => get_env_var('SMTP_PORT', 587), - 'smtp_secure' => get_env_var('SMTP_SECURE', 'tls'), // tls or ssl - 'smtp_user' => get_env_var('SMTP_USER'), - 'smtp_pass' => get_env_var('SMTP_PASS'), - 'from_email' => get_env_var('MAIL_FROM'), - 'from_name' => get_env_var('MAIL_FROM_NAME'), - 'reply_to' => get_env_var('MAIL_REPLY_TO'), + + 'transport' => mail_get_env_var('MAIL_TRANSPORT', 'smtp'), + + 'smtp_host' => mail_get_env_var('SMTP_HOST'), + + 'smtp_port' => mail_get_env_var('SMTP_PORT', 587), + + 'smtp_secure' => mail_get_env_var('SMTP_SECURE', 'tls'), // tls or ssl + + 'smtp_user' => mail_get_env_var('SMTP_USER'), + + 'smtp_pass' => mail_get_env_var('SMTP_PASS'), + + 'from_email' => mail_get_env_var('MAIL_FROM'), + + 'from_name' => mail_get_env_var('MAIL_FROM_NAME'), + + 'reply_to' => mail_get_env_var('MAIL_REPLY_TO'), + + // Optional DKIM signing - 'dkim_domain' => get_env_var('DKIM_DOMAIN', ''), - 'dkim_selector' => get_env_var('DKIM_SELECTOR', 'default'), - 'dkim_private_key_path' => get_env_var('DKIM_PRIVATE_KEY_PATH', ''), // Path to the private key file + + 'dkim_domain' => mail_get_env_var('DKIM_DOMAIN', ''), + + 'dkim_selector' => mail_get_env_var('DKIM_SELECTOR', 'default'), + + 'dkim_private_key_path' => mail_get_env_var('DKIM_PRIVATE_KEY_PATH', ''), // Path to the private key file + ]; diff --git a/uploads/kb_documents/695f78e68b6ef_Opis_dzia__ania_algorytmu_losuj__cego.pdf b/uploads/kb_documents/695f78e68b6ef_Opis_dzia__ania_algorytmu_losuj__cego.pdf new file mode 100644 index 0000000..df1d156 Binary files /dev/null and b/uploads/kb_documents/695f78e68b6ef_Opis_dzia__ania_algorytmu_losuj__cego.pdf differ diff --git a/uploads/kb_documents/695f7992c2b65_test.pdf b/uploads/kb_documents/695f7992c2b65_test.pdf new file mode 100644 index 0000000..1d2c320 --- /dev/null +++ b/uploads/kb_documents/695f7992c2b65_test.pdf @@ -0,0 +1 @@ +dummy pdf content \ No newline at end of file diff --git a/uploads/kb_documents/695f799cf3eb0_test.pdf b/uploads/kb_documents/695f799cf3eb0_test.pdf new file mode 100644 index 0000000..1d2c320 --- /dev/null +++ b/uploads/kb_documents/695f799cf3eb0_test.pdf @@ -0,0 +1 @@ +dummy pdf content \ No newline at end of file diff --git a/uploads/kb_documents/695f79a6b7bc5_test.pdf b/uploads/kb_documents/695f79a6b7bc5_test.pdf new file mode 100644 index 0000000..774c2ea Binary files /dev/null and b/uploads/kb_documents/695f79a6b7bc5_test.pdf differ diff --git a/uploads/kb_documents/695f79d3473af_test.pdf b/uploads/kb_documents/695f79d3473af_test.pdf new file mode 100644 index 0000000..774c2ea Binary files /dev/null and b/uploads/kb_documents/695f79d3473af_test.pdf differ diff --git a/uploads/kb_documents/695f79de3ea94_test.pdf b/uploads/kb_documents/695f79de3ea94_test.pdf new file mode 100644 index 0000000..774c2ea Binary files /dev/null and b/uploads/kb_documents/695f79de3ea94_test.pdf differ diff --git a/uploads/kb_documents/695f79e4205ab_test.pdf b/uploads/kb_documents/695f79e4205ab_test.pdf new file mode 100644 index 0000000..774c2ea Binary files /dev/null and b/uploads/kb_documents/695f79e4205ab_test.pdf differ diff --git a/uploads/kb_documents/695f9142a7bc4_695f79ef7d3a7_test.pdf b/uploads/kb_documents/695f9142a7bc4_695f79ef7d3a7_test.pdf new file mode 100644 index 0000000..774c2ea Binary files /dev/null and b/uploads/kb_documents/695f9142a7bc4_695f79ef7d3a7_test.pdf differ