Get Color-Spaces
This script extracts all color related information from an existing PDF file.
It makes use of the \setasign\SetaPDF2\Core\Parser\Content class which encapsulate a content stream,
process it and call registered callback methods on specific operators.
The helper classes are used to handle recursive parsing of PDF structures (Pages/Form XObjects).
PHP
<?php
use setasign\SetaPDF2\Demos\Inspector\ColorInspector;
use setasign\SetaPDF2\Core\ColorSpace\DeviceN;
use setasign\SetaPDF2\Core\ColorSpace\IccBased;
use setasign\SetaPDF2\Core\ColorSpace\Separation;
use setasign\SetaPDF2\Core\Document;
// load and register the autoload function
require_once '../../../../../bootstrap.php';
// prepare some files
$files = glob($assetsDirectory . '/pdfs/misc/*.pdf');
$files[] = $assetsDirectory . '/pdfs/Brand-Guide.pdf';
$files[] = $assetsDirectory . '/pdfs/Brand-Guide - with-comments.pdf';
$path = displayFiles($files);
// require helper classes
require_once $classesDirectory . '/ContentStreamProcessor/ColorProcessor.php';
require_once $classesDirectory . '/Inspector/ColorInspector.php';
$document = Document::loadByFilename($path);
$inspector = new ColorInspector($document);
$colors = $inspector->getColors();
if (count($colors) === 0) {
echo 'No color definitions found.';
exit();
}
$allColorSpaces = [];
foreach ($colors AS $color) {
$allColorSpaces[$color['colorSpace']] = $color['colorSpace'];
}
echo '<pre>';
echo 'Color space(s) found: ' . implode(', ', $allColorSpaces) . "\n\n";
foreach ($colors AS $color) {
$data = $color['data'];
$className = get_class($data);
echo $color['colorSpace'] . ': ' . $className . "\n";
switch (true) {
case ($data instanceof Separation):
echo ' Name: ' . $data->getName() . "\n";
echo ' Alt: ' . $data->getAlternateColorSpace()->getFamily() . "\n";
break;
case ($data instanceof IccBased):
$parser = $data->getIccProfileStream()->getParser();
echo ' Description: ' . $parser->getDescription() . "\n";
echo ' Number of components: ' . $parser->getNumberOfComponents() . "\n";
break;
case ($data instanceof DeviceN):
echo ' Names: ' . implode(', ', $data->getNames()) . "\n";
echo ' Alt: ' . $data->getAlternateColorSpace()->getFamily() . "\n";
break;
}
echo ' Location: ' . $color['location'] . "\n";
echo ' Info: ' . $color['info'] . "\n";
echo "\n";
}
echo '</pre>';
PHP
<?php
namespace setasign\SetaPDF2\Demos\Inspector;
use setasign\SetaPDF2\Demos\ContentStreamProcessor\ColorProcessor;
use setasign\SetaPDF2\Core\Document;
use setasign\SetaPDF2\Core\Type\PdfDictionary;
use setasign\SetaPDF2\Core\Type\PdfStream;
/**
* Class ColorInspector
*/
class ColorInspector
{
/**
* @var Document
*/
protected $_document;
/**
* All found color definitions
*
* @var array
*/
protected $_colors = [];
/**
* Information about the currently processed "location"
*
* @var string
*/
protected $_currentLocation;
/**
* The constructor
*
* @param Document $document
*/
public function __construct(Document $document)
{
$this->_document = $document;
}
/**
* Get all used colors
*
* @param bool $processAnnotations Set to `false` to ignore color definitions in annotation appearance streams
* @param null|integer $maxPages The maximum of pages to process
* @return array
*/
public function getColors($processAnnotations = true, $maxPages = null)
{
$pages = $this->_document->getCatalog()->getPages();
$pageCount = $pages->count();
$maxPages = $maxPages === null ? $pageCount : min($maxPages, $pageCount);
for ($pageNo = 1; $pageNo <= $maxPages; $pageNo++) {
$this->_currentLocation = 'Page ' . $pageNo;
$page = $pages->getPage($pageNo);
$canvas = $page->getCanvas();
$streamProcessor = new ColorProcessor($canvas, $this);
$streamProcessor->process();
if ($processAnnotations === false) {
continue;
}
$annotations = $page->getAnnotations();
$allAnnotations = $annotations->getAll();
foreach ($allAnnotations AS $annotation) {
$dict = $annotation->getDictionary();
$ap = $dict->getValue('AP');
if ($ap === null) {
continue;
}
$this->_currentLocation = 'Annotation (' . $dict->getValue('Subtype')->getValue() . ') on Page ' . $pageNo;
foreach ($ap AS $type => $value) {
$object = $value->ensure();
if ($object instanceof PdfStream) {
$streamProcessor = new ColorProcessor($annotation->getAppearance($type)->getCanvas(), $this);
$streamProcessor->process();
} elseif ($object instanceof PdfDictionary) {
foreach ($object AS $subType => $subValue) {
$subObject = $subValue->ensure();
if ($subObject instanceof PdfStream) {
$streamProcessor = new ColorProcessor(
$annotation->getAppearance($type, $subType)->getCanvas(), $this
);
$streamProcessor->process();
}
}
}
}
}
}
return $this->_colors;
}
/**
* A method which will register found color definitions.
*
* @param $colorSpace
* @param $data
* @param $info
*/
public function addFoundColor($colorSpace, $data = null, $info = null)
{
$this->_colors[] = [
'colorSpace' => $colorSpace,
'data' => $data,
'info' => $info,
'location' => $this->_currentLocation,
];
}
}
PHP
<?php
namespace setasign\SetaPDF2\Demos\ContentStreamProcessor;
use setasign\SetaPDF2\Demos\Inspector\ColorInspector;
use setasign\SetaPDF2\Core\Canvas\Canvas;
use setasign\SetaPDF2\Core\ColorSpace\ColorSpace;
use setasign\SetaPDF2\Core\ColorSpace\DeviceN;
use setasign\SetaPDF2\Core\ColorSpace\IccBased;
use setasign\SetaPDF2\Core\ColorSpace\Indexed;
use setasign\SetaPDF2\Core\ColorSpace\Separation;
use setasign\SetaPDF2\Core\DataStructure\Color\AbstractColor;
use setasign\SetaPDF2\Core\DataStructure\Color\Cmyk;
use setasign\SetaPDF2\Core\DataStructure\Color\Rgb;
use setasign\SetaPDF2\Core\Filter\Exception as FilterException;
use setasign\SetaPDF2\Core\Parser\Content;
use setasign\SetaPDF2\Core\Resource\ResourceInterface;
use setasign\SetaPDF2\Core\TransparencyGroup;
use setasign\SetaPDF2\Core\Type\PdfDictionary;
use setasign\SetaPDF2\Core\Type\PdfIndirectReference;
use setasign\SetaPDF2\Core\Type\PdfStream;
use setasign\SetaPDF2\Core\XObject\XObject;
use setasign\SetaPDF2\Core\XObject\Form;
use setasign\SetaPDF2\Core\XObject\Image;
use setasign\SetaPDF2\Exception;
use setasign\SetaPDF2\NotImplementedException;
/**
* Class ColorsProcessor
*
* This class offer the desired callback methods for the content stream parser
*/
class ColorProcessor
{
/**
* @var ColorInspector
*/
protected $_colorInspector;
/**
* @var Canvas
*/
protected $_canvas;
/**
* @var Content
*/
protected $_parser;
/**
* @var array All object ids of visited XObjects to prevent circular references
*/
protected $_xObjectObjectIds = [];
/**
* The constructor
*
* @param Canvas $canvas
* @param ColorInspector $colorInspector
*/
public function __construct(Canvas $canvas, ColorInspector $colorInspector)
{
$this->_canvas = $canvas;
$this->_colorInspector = $colorInspector;
}
/**
* Callback for standard color operators
*
* @param array $args
* @param string $operator
*/
public function _color(array $args, string $operator)
{
$color = AbstractColor::createByComponents($args);
$info = 'Standard color operator (' . $operator . ') in content stream.';
if ($color instanceof Rgb) {
$colorSpace = 'DeviceRGB';
} elseif ($color instanceof Cmyk) {
$colorSpace = 'DeviceCMYK';
} else {
$colorSpace = 'DeviceGray';
}
$this->_colorInspector->addFoundColor($colorSpace, $color, $info);
}
/**
* Callback for color space operators
*
* @param array $args
* @param string $operator
* @throws Exception
*/
public function _colorSpace(array $args, $operator)
{
$colorSpace = $args[0];
$colorSpaces = $this->_canvas->getResources(true, false, ResourceInterface::TYPE_COLOR_SPACE);
if ($colorSpaces && $colorSpaces->offsetExists($colorSpace->getValue())) {
$colorSpace = $colorSpaces->getValue($colorSpace->getValue());
}
$colorSpace = ColorSpace::createByDefinition($colorSpace);
$info = 'Color space operator (' . $operator . ') in content stream.';
$this->_resolveColorSpace($colorSpace, $info);
}
/**
* Helper method to recursively resolve color space and their alternate color spaces
*
* @param ColorSpace $colorSpace
* @param string $info
* @throws Exception
*/
protected function _resolveColorSpace(ColorSpace $colorSpace, string $info)
{
$this->_colorInspector->addFoundColor($colorSpace->getFamily(), $colorSpace, $info);
if ($colorSpace instanceof Separation) {
$alternate = $colorSpace->getAlternateColorSpace();
$info = 'Alternate color space for Separation color space.';
$this->_resolveColorSpace($alternate, $info);
} elseif ($colorSpace instanceof DeviceN) {
$alternate = $colorSpace->getAlternateColorSpace();
$info = 'Alternate color space for DeviceN color space.';
$this->_resolveColorSpace($alternate, $info);
} elseif ($colorSpace instanceof Indexed) {
$base = $colorSpace->getBase();
$info = 'Base color space for Indexed color space.';
$this->_resolveColorSpace($base, $info);
} elseif ($colorSpace instanceof IccBased) {
$stream = $colorSpace->getIccProfileStream();
$alternate = $stream->getAlternate();
if ($alternate) {
$info = 'Alternate color space for ICC profile color space.';
$this->_resolveColorSpace($alternate, $info);
}
/* See ICC.1:2010 - Table 19 (ICC1v43_2010-12.pdf)
*/
$info = 'Color space signature extracted from ICC profile.';
$colorSpace = $stream->getParser()->getColorSpace();
$this->_colorInspector->addFoundColor(trim($colorSpace), $stream, $info);
}
}
/**
* Callback for painting a XObject
*
* @param array $args
* @throws Exception
* @throws NotImplementedException
*/
public function _paintXObject(array $args)
{
$name = $args[0]->getValue();
$xObjects = $this->_canvas->getResources(true, false, ResourceInterface::TYPE_X_OBJECT);
if ($xObjects === false) {
return;
}
$xObjectIndirectObject = $xObjects->getValue($name);
if (!($xObjectIndirectObject instanceof PdfIndirectReference)) {
return;
}
$xObject = XObject::get($xObjectIndirectObject);
if ($xObject instanceof Image) {
$dict = $xObject->getIndirectObject()->ensure()->getValue();
if ($dict->offsetExists('ImageMask') && $dict->getValue('ImageMask')->ensure()->getValue() === true) {
return;
}
$colorSpace = $xObject->getColorSpace();
$info = 'Color space of an image used in a content stream.';
$this->_resolveColorSpace($colorSpace, $info);
} elseif ($xObject instanceof Form) {
/* Get the colorspace from the transparency group */
$group = $xObject->getGroup();
if ($group instanceof TransparencyGroup) {
$colorSpace = $group->getColorSpace();
if ($colorSpace !== null) {
$info = 'Color space from Transparency Group of XObject.';
$this->_resolveColorSpace($colorSpace, $info);
}
}
if (isset($this->_xObjectObjectIds[$xObjectIndirectObject->getObjectId()])) {
// recursion
return;
}
$this->_xObjectObjectIds[$xObject->getIndirectObject()->getObjectId()] = true;
/* We got a Form XObject - start recursive processing
*/
$streamProcessor = new self($xObject->getCanvas(), $this->_colorInspector);
$streamProcessor->_xObjectObjectIds =& $this->_xObjectObjectIds;
$streamProcessor->process();
unset($this->_xObjectObjectIds[$xObject->getIndirectObject()->getObjectId()]);
}
}
/**
* Callback for inline image operator
*
* @param array $args
*/
public function _startInlineImageData(array $args)
{
$dict = new PdfDictionary();
for ($i = 0, $c = count($args); $i < $c; $i += 2) {
$dict[$args[$i]] = $args[$i + 1];
}
$colorSpace = $dict->offsetExists('CS') ? $dict->getValue('CS') : $dict->getValue('ColorSpace');
if ($colorSpace === null) {
return;
}
$colorSpace = $colorSpace->getValue();
switch ($colorSpace) {
case 'G':
$colorSpace = 'DeviceGray';
break;
case 'RGB':
$colorSpace = 'DeviceRGB';
break;
case 'CMYK':
$colorSpace = 'DeviceCMYK';
break;
case 'I':
$colorSpace = 'Indexed';
break;
}
$info = 'Color space of an inline image in content stream.';
$this->_colorInspector->addFoundColor(
$colorSpace, ColorSpace::createByDefinition($colorSpace), $info
);
}
/**
* Callback for shading operator
*
* @param array $args
* @throws Exception
*/
public function _paintShapeAndColourShading($args)
{
$name = $args[0]->getValue();
$shadings = $this->_canvas->getResources(true, false, ResourceInterface::TYPE_SHADING);
if ($shadings === false) {
return;
}
$shadingIndirectObject = $shadings->getValue($name);
if (!($shadingIndirectObject instanceof PdfIndirectReference)) {
return;
}
try {
/** @var PdfDictionary $shading */
$shading = $shadingIndirectObject->ensure();
} catch (Exception $e) {
return;
}
if ($shading instanceof PdfStream) {
$shading = $shading->getValue();
}
$colorSpaceValue = $shading->getValue('ColorSpace');
if ($colorSpaceValue === null) {
return;
}
$colorSpace = ColorSpace::createByDefinition($colorSpaceValue);
$info = 'Paint shading operator in content stream.';
$this->_resolveColorSpace($colorSpace, $info);
}
/**
* Process the content stream
*/
public function process()
{
try {
$stream = $this->_canvas->getStream();
} catch (FilterException $e) {
// if a stream cannot be unfiltered, we ignore it
return;
}
$this->_parser = new Content($stream);
/* Register colorspace operators
* f.g. -> /DeviceRGB CS % Set DeviceRGB colour space
*/
$this->_parser->registerOperator(
['CS', 'cs'],
[$this, '_colorSpace']
);
/* Register default color space operators */
$this->_parser->registerOperator(
['G', 'g', 'RG', 'rg', 'K', 'k'],
[$this, '_color']
);
/* Register draw operator for XObjects */
$this->_parser->registerOperator('Do', [$this, '_paintXObject']);
/* Inline image */
$this->_parser->registerOperator('ID', [$this, '_startInlineImageData']);
/* Shading Operator */
$this->_parser->registerOperator('sh', [$this, '_paintShapeAndColourShading']);
$this->_parser->process();
}
}
