Get Color-Spaces
This script extracts all color related information from an existing PDF file.
It makes use of the \SetaPDF_Core_Parser_Content
class which encapsulate a content stream,
process it and call registered callback methods on specific operators.
The helper classes are used to handle recursive parsing of PDF structures (Pages/Form XObjects).
PHP
<?php // load and register the autoload function use com\setasign\SetaPDF\Demos\Inspector\ColorInspector; require_once '../../../../../bootstrap.php'; // prepare some files $files = glob($assetsDirectory . '/pdfs/misc/*.pdf'); $files[] = $assetsDirectory . '/pdfs/Brand-Guide.pdf'; $files[] = $assetsDirectory . '/pdfs/Brand-Guide - with-comments.pdf'; $path = displayFiles($files); // require helper classes require_once $classesDirectory . '/ContentStreamProcessor/ColorProcessor.php'; require_once $classesDirectory . '/Inspector/ColorInspector.php'; $document = \SetaPDF_Core_Document::loadByFilename($path); $inspector = new ColorInspector($document); $colors = $inspector->getColors(); if (count($colors) === 0) { echo 'No color definitions found.'; exit(); } $allColorSpaces = []; foreach ($colors AS $color) { $allColorSpaces[$color['colorSpace']] = $color['colorSpace']; } echo '<pre>'; echo 'Color space(s) found: ' . implode(', ', $allColorSpaces) . "\n\n"; foreach ($colors AS $color) { $data = $color['data']; $className = get_class($data); echo $color['colorSpace'] . ': ' . $className . "\n"; switch (true) { case ($data instanceof \SetaPDF_Core_ColorSpace_Separation): echo ' Name: ' . $data->getName() . "\n"; echo ' Alt: ' . $data->getAlternateColorSpace()->getFamily() . "\n"; break; case ($data instanceof \SetaPDF_Core_ColorSpace_IccBased): $parser = $data->getIccProfileStream()->getParser(); echo ' Description: ' . $parser->getDescription() . "\n"; echo ' Number of components: ' . $parser->getNumberOfComponents() . "\n"; break; case ($data instanceof \SetaPDF_Core_ColorSpace_DeviceN): echo ' Names: ' . implode(', ', $data->getNames()) . "\n"; echo ' Alt: ' . $data->getAlternateColorSpace()->getFamily() . "\n"; break; } echo ' Location: ' . $color['location'] . "\n"; echo ' Info: ' . $color['info'] . "\n"; echo "\n"; } echo '</pre>';
PHP
<?php namespace com\setasign\SetaPDF\Demos\Inspector; use com\setasign\SetaPDF\Demos\ContentStreamProcessor\ColorProcessor; /** * Class ColorInspector */ class ColorInspector { /** * @var \SetaPDF_Core_Document */ protected $_document; /** * All found color definitions * * @var array */ protected $_colors = []; /** * Information about the currently processed "location" * * @var string */ protected $_currentLocation; /** * The constructor * * @param \SetaPDF_Core_Document $document */ public function __construct(\SetaPDF_Core_Document $document) { $this->_document = $document; } /** * Get all used colors * * @param bool $processAnnotations Set to false to ignore color definitions in annotation appearance streams * @param null|integer $maxPages The maximum of pages to process * @return array */ public function getColors($processAnnotations = true, $maxPages = null) { $pages = $this->_document->getCatalog()->getPages(); $pageCount = $pages->count(); $maxPages = $maxPages === null ? $pageCount : min($maxPages, $pageCount); for ($pageNo = 1; $pageNo <= $maxPages; $pageNo++) { $this->_currentLocation = 'Page ' . $pageNo; $page = $pages->getPage($pageNo); $canvas = $page->getCanvas(); $streamProcessor = new ColorProcessor($canvas, $this); $streamProcessor->process(); if ($processAnnotations === false) { continue; } $annotations = $page->getAnnotations(); $allAnnotations = $annotations->getAll(); foreach ($allAnnotations AS $annotation) { $dict = $annotation->getDictionary(); $ap = $dict->getValue('AP'); if ($ap === null) { continue; } $this->_currentLocation = 'Annotation (' . $dict->getValue('Subtype')->getValue() . ') on Page ' . $pageNo; foreach ($ap AS $type => $value) { $object = $value->ensure(); if ($object instanceof \SetaPDF_Core_Type_Stream) { $streamProcessor = new ColorProcessor($annotation->getAppearance($type)->getCanvas(), $this); $streamProcessor->process(); } elseif ($object instanceof \SetaPDF_Core_Type_Dictionary) { foreach ($object AS $subType => $subValue) { $subOject = $subValue->ensure(); if ($subOject instanceof \SetaPDF_Core_Type_Stream) { $streamProcessor = new ColorProcessor( $annotation->getAppearance($type, $subType)->getCanvas(), $this ); $streamProcessor->process(); } } } } } } return $this->_colors; } /** * A method which will register found color definitions. * * @param $colorSpace * @param null $data * @param null $info */ public function addFoundColor($colorSpace, $data = null, $info = null) { $this->_colors[] = [ 'colorSpace' => $colorSpace, 'data' => $data, 'info' => $info, 'location' => $this->_currentLocation, ]; } }
PHP
<?php namespace com\setasign\SetaPDF\Demos\ContentStreamProcessor; use com\setasign\SetaPDF\Demos\Inspector\ColorInspector; /** * Class ColorsProcessor * * This class offer the desired callback methods for the content stream parser */ class ColorProcessor { /** * @var ColorInspector */ protected $_colorInspector; /** * @var \SetaPDF_Core_Canvas */ protected $_canvas; /** * @var \SetaPDF_Core_Parser_Content */ protected $_parser; /** * The constructor * * @param \SetaPDF_Core_Canvas $canvas * @param ColorInspector $colorInspector */ public function __construct(\SetaPDF_Core_Canvas $canvas, ColorInspector $colorInspector) { $this->_canvas = $canvas; $this->_colorInspector = $colorInspector; } /** * Callback for standard color operators * * @param array $args * @param string $operator */ public function _color(array $args, $operator) { $color = \SetaPDF_Core_DataStructure_Color::createByComponents($args); $info = 'Standard color operator (' . $operator . ') in content stream.'; if ($color instanceof \SetaPDF_Core_DataStructure_Color_Rgb) { $colorSpace = 'DeviceRGB'; } elseif ($color instanceof \SetaPDF_Core_DataStructure_Color_Cmyk) { $colorSpace = 'DeviceCMYK'; } else { $colorSpace = 'DeviceGray'; } $this->_colorInspector->addFoundColor($colorSpace, $color, $info); } /** * Callback for color space operators * * @param array $args * @param string $operator * @throws \SetaPDF_Core_Exception */ public function _colorSpace(array $args, $operator) { $colorSpace = $args[0]; $colorSpaces = $this->_canvas->getResources(true, false, \SetaPDF_Core_Resource::TYPE_COLOR_SPACE); if ($colorSpaces && $colorSpaces->offsetExists($colorSpace->getValue())) { $colorSpace = $colorSpaces->getValue($colorSpace->getValue()); } $colorSpace = \SetaPDF_Core_ColorSpace::createByDefinition($colorSpace); $info = 'Color space operator (' . $operator . ') in content stream.'; $this->_resolveColorSpace($colorSpace, $info); } /** * Helper method to recursily resolve color space and their alternate color spaces * * @param \SetaPDF_Core_ColorSpace $colorSpace * @param string $info * @throws \SetaPDF_Core_Exception */ protected function _resolveColorSpace(\SetaPDF_Core_ColorSpace $colorSpace, $info) { $this->_colorInspector->addFoundColor($colorSpace->getFamily(), $colorSpace, $info); if ($colorSpace instanceof \SetaPDF_Core_ColorSpace_Separation) { $alternate = $colorSpace->getAlternateColorSpace(); $info = 'Alternate color space for Separation color space.'; $this->_resolveColorSpace($alternate, $info); } elseif ($colorSpace instanceof \SetaPDF_Core_ColorSpace_DeviceN) { $alternate = $colorSpace->getAlternateColorSpace(); $info = 'Alternate color space for DeviceN color space.'; $this->_resolveColorSpace($alternate, $info); } elseif ($colorSpace instanceof \SetaPDF_Core_ColorSpace_Indexed) { $base = $colorSpace->getBase(); $info = 'Base color space for Indexed color space.'; $this->_resolveColorSpace($base, $info); } elseif ($colorSpace instanceof \SetaPDF_Core_ColorSpace_IccBased) { $stream = $colorSpace->getIccProfileStream(); $alternate = $stream->getAlternate(); if ($alternate) { $info = 'Alternate color space for ICC profile color space.'; $this->_resolveColorSpace($alternate, $info); } /* See ICC.1:2010 - Table 19 (ICC1v43_2010-12.pdf) */ $info = 'Color space signature extracted from ICC profile.'; $colorSpace = $stream->getParser()->getColorSpace(); $this->_colorInspector->addFoundColor(trim($colorSpace), $stream, $info); } } /** * Callback for painting a XObject * * @param array $args * @throws \SetaPDF_Core_Exception * @throws \SetaPDF_Exception_NotImplemented */ public function _paintXObject(array $args) { $name = $args[0]->getValue(); $xObjects = $this->_canvas->getResources(true, false, \SetaPDF_Core_Resource::TYPE_X_OBJECT); if ($xObjects === false) { return; } $xObjectIndirectObject = $xObjects->getValue($name); if (!($xObjectIndirectObject instanceof \SetaPDF_Core_Type_IndirectReference)) { return; } $xObject = \SetaPDF_Core_XObject::get($xObjectIndirectObject); if ($xObject instanceof \SetaPDF_Core_XObject_Image) { $dict = $xObject->getIndirectObject()->ensure()->getValue(); if ($dict->offsetExists('ImageMask') && $dict->getValue('ImageMask')->ensure()->getValue() === true) { return; } $colorSpace = $xObject->getColorSpace(); $info = 'Color space of an image used in a content stream.'; $this->_resolveColorSpace($colorSpace, $info); } elseif ($xObject instanceof \SetaPDF_Core_XObject_Form) { /* Get the colorspace from the transparency group */ $group = $xObject->getGroup(); if ($group instanceof \SetaPDF_Core_TransparencyGroup) { $colorSpace = $group->getColorSpace(); if ($colorSpace !== null) { $info = 'Color space from Transparency Group of XObject.'; $this->_resolveColorSpace($colorSpace, $info); } } /* We got a Form XObject - start recusrive processing */ $streamProcessor = new self($xObject->getCanvas(), $this->_colorInspector); $streamProcessor->process(); } } /** * Callback for inline image operator * * @param $args */ public function _startInlineImageData($args) { $dict = new \SetaPDF_Core_Type_Dictionary(); for ($i = 0, $c = count($args); $i < $c; $i += 2) { $dict[$args[$i]] = $args[$i + 1]; } $colorSpace = $dict->offsetExists('CS') ? $dict->getValue('CS') : $dict->getValue('ColorSpace'); if ($colorSpace === null) { return; } $colorSpace = $colorSpace->getValue(); switch ($colorSpace) { case 'G': $colorSpace = 'DeviceGray'; break; case 'RGB': $colorSpace = 'DeviceRGB'; break; case 'CMYK': $colorSpace = 'DeviceCMYK'; break; case 'I': $colorSpace = 'Indexed'; break; } $info = 'Color space of an inline image in content stream.'; $this->_colorInspector->addFoundColor( $colorSpace, \SetaPDF_Core_ColorSpace::createByDefinition($colorSpace), $info ); } /** * Callback for shading operator * * @param array $args * @throws \SetaPDF_Core_Exception */ public function _paintShapeAndColourShading($args) { $name = $args[0]->getValue(); $shadings = $this->_canvas->getResources(true, false, \SetaPDF_Core_Resource::TYPE_SHADING); if ($shadings === false) { return; } $shadingIndirectObject = $shadings->getValue($name); if (!($shadingIndirectObject instanceof \SetaPDF_Core_Type_IndirectReference)) { return; } try { /** @var \SetaPDF_Core_Type_Dictionary $shading */ $shading = $shadingIndirectObject->ensure(); } catch (\SetaPDF_Core_Type_IndirectReference_Exception $e) { return; } if ($shading instanceof \SetaPDF_Core_Type_Stream) { $shading = $shading->getValue(); } $colorSpaceValue = $shading->getValue('ColorSpace'); if ($colorSpaceValue === null) { return; } $colorSpace = \SetaPDF_Core_ColorSpace::createByDefinition($colorSpaceValue); $info = 'Paint shading operator in content stream.'; $this->_resolveColorSpace($colorSpace, $info); } /** * Process the content stream */ public function process() { try { $stream = $this->_canvas->getStream(); } catch (\SetaPDF_Core_Filter_Exception $e) { // if a stream cannot be unfiltered, we ignore it return; } $this->_parser = new \SetaPDF_Core_Parser_Content($stream); /* Register colorspace operators * f.g. -> /DeviceRGB CS % Set DeviceRGB colour space */ $this->_parser->registerOperator( ['CS', 'cs'], [$this, '_colorSpace'] ); /* Register default color space operators */ $this->_parser->registerOperator( ['G', 'g', 'RG', 'rg', 'K', 'k'], [$this, '_color'] ); /* Register draw operator for XObjects */ $this->_parser->registerOperator('Do', [$this, '_paintXObject']); /* Inline image */ $this->_parser->registerOperator('ID', [$this, '_startInlineImageData']); /* Shading Operator */ $this->_parser->registerOperator('sh', [$this, '_paintShapeAndColourShading']); $this->_parser->process(); } }