If you're using
LogMiner to analyse your Apache/IIS logs, here's an easy recipe to be notified of sites that send visitors your way.
First, let's define a Pg/PLSQL function to extract the new referrers:
CREATE TYPE new_referrer AS (
site_id int8,
site_name text,
referrer text,
hits int
);
CREATE OR REPLACE FUNCTION get_new_referrers()
RETURNS SETOF new_referrer AS $body$
DECLARE
rec new_referrer;
lastRef int8;
BEGIN
SELECT INTO lastRef CAST( value AS int8 )
FROM catalog
WHERE name = 'LastReferrer';
IF lastRef IS NULL THEN
INSERT INTO catalog ( name, value ) VALUES ( 'LastReferrer', 0 );
lastRef := 0;
END IF;
FOR rec IN
EXECUTE 'SELECT s.id AS site_id, s.name AS site_name, r.referrer, tmp.hits ' ||
'FROM ( ' ||
' SELECT site, referrer, COUNT(*) AS hits ' ||
' FROM accesses ' ||
' WHERE referrer > ' || lastRef ||
' GROUP BY site, referrer ' ||
') tmp ' ||
'JOIN sites s ON s.id = tmp.site ' ||
'JOIN referrers r ON tmp.referrer = r.id ' ||
'WHERE r.extern = true ' ||
'ORDER BY s.name, tmp.hits DESC, r.referrer'
LOOP
RETURN NEXT rec;
END LOOP;
UPDATE catalog
SET value = ( SELECT MAX( id ) FROM referrers )
WHERE name = 'LastReferrer';
RETURN;
END;
$body$ LANGUAGE plpgsql;
Then, a little PHP script to run the query and format the results:
#!/usr/local/bin/php<?php $lines =
array();
$lastSite =
'';
// quickest and dirtiest way to execute a query in PHP ;-) exec ( "echo 'SELECT r.* " .
"FROM get_new_referrers() r " .
"LEFT JOIN search_engines se ON ( r.referrer ~* se.pattern AND r.referrer ~* se.query_pattern )" .
"WHERE se.id IS NULL' | /usr/local/pgsql/bin/psql -U logminer -At logminer",
$lines );
foreach( $lines as $line ) { $parts =
explode( '|',
$line,
4 );
if( $lastSite !=
$parts[ 0 ] ) { printf( "\n\n\n\n" .
"-------------------------------------------------------\n" .
"Referrers for %s\n" .
"-------------------------------------------------------\n\n",
$parts[ 1 ] );
$lastSite =
$parts[ 0 ];
} printf( "[%d] %s\n\n",
$parts[ 3 ],
$parts[ 2 ] );
}?>
The query above grabs the referrers for every site available in the DB, filtering requests coming from known search engines to reduce the level of "noise".
This script is finally invoked by the cron job that processes the logs:
referrers_report.php | mail -s 'New referrers report' email@example.com