SetaPDF Demos PHP libraries to handle, modify or create PDF files

Highlight Words with HTML

This demo searches for words and highlight them via HTML above an image of the PDF page.

PHP
<?php

use setasign\SetaPDF2\Core\Canvas\GraphicState;
use setasign\SetaPDF2\Core\Document;
use setasign\SetaPDF2\Core\Geometry\Point;
use setasign\SetaPDF2\Core\Geometry\Rectangle;
use setasign\SetaPDF2\Core\Geometry\Vector;
use setasign\SetaPDF2\Extractor\Extractor;
use setasign\SetaPDF2\Extractor\Result\Words;
use setasign\SetaPDF2\Extractor\Strategy\WordStrategy;

// load and register the autoload function
require_once __DIR__ . '/../../../../../bootstrap.php';

$files = [
    $assetsDirectory . '/pdfs/lenstown/Laboratory-Report.pdf',
    $assetsDirectory . '/pdfs/misc/boxes/[1000 500 -1000 -500].pdf',
    $assetsDirectory . '/pdfs/misc/boxes/[1000 500 -1000 -500]-R90.pdf',
    $assetsDirectory . '/pdfs/misc/boxes/[1000 500 -1000 -500]-R-90.pdf',
    $assetsDirectory . '/pdfs/misc/rotated/90.pdf',
    $assetsDirectory . '/pdfs/misc/rotated/180.pdf',
    $assetsDirectory . '/pdfs/misc/rotated/270.pdf',
];

$file = displayFiles($files);
$dpi = 72;
$pageNo = 1;
$imageFile = 'images/' . basename($file, '.pdf') . '-' . $dpi . '.png';

if (!file_exists($imageFile)) {
    $cmd = 'mutool draw -F png -r ' . escapeshellarg($dpi) . ' -o ' . escapeshellarg($imageFile)
         . ' ' . escapeshellarg($file) . ' ' . escapeshellarg($pageNo);

    exec($cmd, $output, $resultCode);

   if ($resultCode !== 0) {
       echo 'Thumbnail could not be generated. Please make sure that ' .
            '<a href="https://www.mupdf.com/docs/manual-mutool-draw.html" target="_blank">mutool</a> is installed ' .
            'and that the images/ folder is writable.';
       die();
   }
}

$document = Document::loadByFilename($file);
$page = $document->getCatalog()->getPages()->getPage($pageNo);

$extractor = new Extractor($document);
$extractor->setStrategy(new WordStrategy());

/** @var Words $words */
$words = $extractor->getResultByPageNumber(1);

// this is the factor between points and px
$dpiFactor = 1/72 * $dpi;

// get some general information
$pageWidth = $page->getWidth() * $dpiFactor;
$pageHeight = $page->getHeight() * $dpiFactor;
$rotation = $page->getRotation();

// now we create a graphic state instance
$gs = new GraphicState();
// scale it by the DPI factor
$gs->scale($dpiFactor, $dpiFactor);

$box = $page->getCropBox();
// translate it in view to the crop box origin
$gs->translate(-$box->llx, -$box->lly);
// rotate it
$gs->rotate($box->llx, $box->lly, -$rotation);

// depending on the rotation value, translate back
switch ($rotation) {
    case 90:
        $gs->translate(-$box->getWidth(), 0);
        break;
    case 180:
        $gs->translate(-$box->getWidth(), -$box->getHeight());
        break;
    case 270:
        $gs->translate(0, -$box->getHeight());
        break;
}

// this little helper applies the graphic state to a given point
$f = static function(Point $p) use ($gs) {
    $v = new Vector($p->getX(), $p->getY(), 0);
    return $v->multiply($gs->getCurrentTransformationMatrix())->toPoint();
};

echo <<< HTML
<div style="position: relative; width: {$pageWidth}px; height: {$pageHeight}px;border: 1px solid lightgrey;">
    <img src="$imageFile" style="position: absolute; width: {$pageWidth}px; height: {$pageHeight}px;"/>
HTML;

foreach ($words as $word) {
    foreach ($word->getBounds() as $bounds) {
        // create a rectangle with ordered vertices (because e.g. Ll will not be lower left after a rotation)
        $rect = new Rectangle(
            $f($bounds->getLl()),
            $f($bounds->getUr())
        );

        echo '<div style="position:absolute;border:1px solid #ff00ff;background-color:#ff00ff3C;' .
            'left:' . $rect->getLl()->getX() . 'px;' .
            'bottom:' . $rect->getLl()->getY() . 'px;' .
            'width:' . ($rect->getWidth()) . 'px;height: ' . ($rect->getHeight()) . 'px;' .
            '" title="' . htmlspecialchars($word->getString()). '"></div>';
    }
}

echo '</div>';