SetaPDF Demos

There seems to be a problem loading the components. Please check your PHP error logs for details!

Common issues could be that you missed to install the trial license or that you are using a trial version on an unsupported PHP version.

Highlight Words

This demo adds highlight annotations above found words.

It caches the intermediate result in a session to speed things up.

PHP
<?php

// load and register the autoload function
require_once __DIR__ . '/../../../../../bootstrap.php';

$word = displaySelect('Highlight:', [
    'Terms' => 'Terms',
    'tektown' => 'tektown',
    'Information' =>  'Information',
    'con' => 'con',
    'eos' => 'eos',
    'sed' => 'sed',
    'volum' => 'volum'
]);

// load the document
$document = \SetaPDF_Core_Document::loadByFilename(
    $assetsDirectory . '/pdfs/tektown/Terms-and-Conditions.pdf',
    new \SetaPDF_Core_Writer_Http('document.pdf', true)
);

// initate an extractor instance
$extractor = new \SetaPDF_Extractor($document);

// create the word extraction strategy and pass it to the extractor instance
$strategy = new \SetaPDF_Extractor_Strategy_Word();
$extractor->setStrategy($strategy);

// get access to the documents pages instance
$pages = $document->getCatalog()->getPages();

// check if the words are saved in the temporary cache
if (isset($_SESSION['wordsPerPage'])) {
    $wordsPerPage = $_SESSION['wordsPerPage'];
    // otherwise...
} else {
    $wordsPerPage = $_SESSION['wordsPerPage'] = [];

    // walk through the pages and extract the word
    for ($pageNo = 1; $pageNo <= $pages->count(); $pageNo++) {
        $words = $extractor->getResultByPageNumber($pageNo);
        // restrucutre the data to be less memory intensive in the "cache"
        foreach ($words AS $_word) {
            $wordsPerPage[$pageNo][] = [
                'string' => $_word->getString(),
                'bounds' => $_word->getBounds()
            ];
        }
    }

    // cache the words per page
    $_SESSION['wordsPerPage'] = $wordsPerPage;
    unset($words);
}

// a simple counter
$found = 0;

// walk through the pages...
for ($pageNo = 1; $pageNo <= $pages->count(); $pageNo++) {
    // get access to the pages annotations instance
    $annotations = $pages->getPage($pageNo)->getAnnotations();

    // iterate over the words
    foreach ($wordsPerPage[$pageNo] AS $_word) {
        // check for a match
        if ($_word['string'] !== $word) {
            continue;
        }

        // if a match occurs, create a highlight annotation and add it to the pages annotations instance
        $bounds = $_word['bounds'];
        foreach ($bounds AS $bound) {
            $rect = new \SetaPDF_Core_Geometry_Rectangle($bound->getLl(), $bound->getUr());
            $rect = \SetaPDF_Core_DataStructure_Rectangle::byRectangle($rect);

            $annotation = new \SetaPDF_Core_Document_Page_Annotation_Highlight($rect);
            $annotation->setColor([1, 1, 0]);
            $annotation->setContents('Match #' . (++$found));
            $annotation->setPrintFlag();
            $annotations->add($annotation);
        }
    }
}

// save the resulting document
$document->save()->finish();