Edit: I noticed there is a deletePages() function. If I call this function prior to re-indexing would that work? For example, this code in the cron script:
Code: Select all
// Specify crawl settings as defined in php/crawlindex.php
$objci = new crawlIndex($obsql);
$objci->deletePages();
$objci->reindex = 1; //sets to re-index existing registered pages (0 to not re-index)
$objci->max_depth = 2; //depth to index
$objci->url_exclude = array('/index.php', '/search', 'webapps', '/branch/', '/account/', '/account_', '/departments/pc', 'changeStyle.php', '/news/memo'); //paths to exclude
$objci->deltags = [ ['a'=>[]], ['form'=>[]], ['select'=>[]], ['script'=>[]], ['link'=>[]], ['style'=>[]] ]; // array with tags to complete delete [ [tag=>[attr=>[values]]], ... ]
$_SESSION['ssep_dom_id'] = getDomainId($obsql, $objci->domain); //gets $_SESSION['ssep_dom_id'] from database
$start_url = 'https://' . $objci->domain;
$objci->run($start_url); //starts indexing