#!/usr/local/bin/perl
#
# March 21, 2011: Large re-write by Matt Kuba
#
use lib '/var/www/vweb/dynamite/bin';
use strict;
my $r = shift;
use Auth::APIKey;
use CGI ':standard';
use Geo::Currency;
use Geo::Region;
use GearmanInclude;
use HTML::Entities;
use MemcachedInclude;
use Miner::Newlink;
use Storable qw(nfreeze);
use SQL::MySQL;
use Util::Affil;
use Util::Extractor;
use Util::Extractor::Request;
use Util::ID;
use Util::MD5;
use Util::Select;
use Util::String;
use Util::Transform;
use Util::Transform::LID;
use URI::Escape;
use XMLRequestInclude;
my $base_path = "/var/www/vweb/dynamite";
setpriority(0,0,-10);
Sys::Proctitle::setproctitle("miner ( REDIRECT )");
#
# December 22, 2009:
# This is the shortcut for all API queries to determine which resellers are valid
#
if(param("resellers") || param("reseller")) {
print_resellers();
setpriority(0,0,+10);
exit(1);
}
print header(
-type => (param("trace") > 0) ? 'text/plain' : 'text/html',
-expires => 'Sat, 18 Jul 1983 12:00:00 GMT',
-pragma => 'no-cache',
-cache_control => 'private,no-cache,no-store,must-revalidate,max-age=0,pre-check=0,post-check=0'
);
#
# December 19, 2009:
# Setup a default user if we do not have one
#
my $user = param("user");
if(!Util::ID::is_id($user,"u")) {
$user = param("id");
}
if(!Util::ID::is_id($user,"u")) {
$user = Auth::APIKey::get_user();
}
if(!Util::ID::is_id($user,"u")) {
$user = "u00000601";
}
Sys::Proctitle::setproctitle("miner ( REDIRECT: $user )");
# Target could be a lid or a URL. If blank, display an error.
my $dest = redirect_destination($user);
# setup defaults
my $url = '';
my ($lid,$aid,$pid,$rid,$eid,$price,$priority);
# if dest is a lid
if(Util::ID::is_id($dest,"l")) {
Sys::Proctitle::setproctitle("miner ( REDIRECT: $dest )");
# hopefully this should be cached and fast
my $linkhash_ref = Util::Select::id_to_hash($dest);
if ($linkhash_ref->{"link-$dest"} && $linkhash_ref->{"link-$dest"}->{"-name"} =~ /^https?:/i) {
$lid = $dest;
$aid = -1;
$pid = $linkhash_ref->{"link-$lid"}->{"pid"};
$rid = $linkhash_ref->{"link-$lid"}->{"rid"};
if (!param("clean")) {
my $affil_ref = lid_to_best_hash($lid,$user);
$aid = $affil_ref->{"link-$lid"}->{"best"};
if($affil_ref->{"link-$lid"}->{"affil-$aid"} && $affil_ref->{"link-$lid"}->{"affil-$aid"}->{"url"} =~ /^https?:/i) {
$url = $affil_ref->{"link-$lid"}->{"affil-$aid"}->{"url"};
$eid = $affil_ref->{"link-$lid"}->{"affil-$aid"}->{"eid"};
$priority = $affil_ref->{"link-$lid"}->{"affil-$aid"}->{"priority"};
}
}
if($url !~ /^https?:/i) {
# either clean=1 or something bad happened with lid_to_best_hash. recover by setting $url to -name
$url = $linkhash_ref->{"link-$lid"}->{"-name"};
$aid = $lid;
}
# We need to convert everything to USD
my $reseller_hash_ref = Util::Select::id_to_hash($rid);
$price = Geo::Currency::convertcurrency("USD",$reseller_hash_ref->{"reseller-$rid"}->{"currency"}) * $linkhash_ref->{"link-$lid"}->{"price"};
}
}
# if dest is a url
if($dest =~ /^https?:/i) {
my $rid = url_to_reseller($dest);
if(Util::ID::is_id($rid,"r")) {
Sys::Proctitle::setproctitle("miner ( REDIRECT: $rid )");
if ( !param("clean") ) {
($url,$eid,$priority) = blind_url_to_affil($dest,$user);
}
}
if ($url !~ /^https?:/i) {
$url = $dest;
$eid = -1;
$priority = 0;
}
}
if ($url =~ /^https?:/i) {
# yay! let's output this bad boy
print_redirect($url);
} else {
print "<html><body>An error occured and has been logged. Please try again later.<br/>$url</body></html>\n";
setpriority(0,0,+10);
exit(0);
}
# EVERYTHING PAST HERE NEEDS TO BE FORKED
use POSIX 'setsid';
use Apache2::SubProcess;
my $childpid;
my $forked = 1;
# don't fork if debug > 1
if (param('debug') > 1) {
$childpid = 0;
} else {
# if the fork fails, still need to process log
if(!defined($childpid = fork)) {
$childpid = 0;
$forked = 0;
} else {
# we forked a child (we are parent)
# have to make sure this process never blocks waiting for children
$SIG{'CHLD'} = 'IGNORE';
}
}
unless ($childpid) { # child does:
unless (param('debug') > 1) {
open STDIN, "</dev/null";
open STDOUT, ">/dev/null";
}
setsid if $forked;
chdir '/' if $forked;
#sleep 5; # simulate search time
my $log_ref;
$log_ref->{"lid"} = $lid || "-1";
$log_ref->{"rid"} = $rid || "-1";
$log_ref->{"aid"} = $aid || "-1";
$log_ref->{"pid"} = $pid || "0";
$log_ref->{"engine"} = $eid || "-1";
$log_ref->{"price"} = $price || "0";
$log_ref->{"priority"} = $priority || "0";
log_now($user,$log_ref);
# mod_perl hijacks our exit call so we have to call CORE::exit to terminate our child
CORE::exit(0) if ($forked);
}
setpriority(0,0,+10);
exit(1);
#
# redirect_destination
# This sub attempts to figure out where the user wants to end up.
# The order for choices is lid, url, pid/rid, rid, pid-only, freetext search
#
sub redirect_destination($) {
my $user = shift;
#
# December 7, 2011:
# Add support for a hackily passed rid-pid combo
#
my $lid = param("lid");
my $pid = param("pid");
my $rid = param("rid");
if($lid =~ /^(r\d{8})-(p\d{8})$/) {
$rid = $1;
$pid = $2;
$lid = "";
}
#
# Process a passed LID, the basic redirect
#
if($lid =~ /^[tl](?:\d{12}|\d{8})$/) {
$lid =~ s/^t/l/g;
# make sure it is a valid lid
# this could block, but it pretty much has to finish
my $hash = Util::Select::id_to_hash($lid);
if ($hash->{"link-$lid"} && $hash->{"link-$lid"}->{"-name"} =~ /^https?:/) {
return $lid;
}
}
#
# url logic
# It may have come under q= or url=
#
my @names = ("url","q");
my $url;
foreach my $name (@names) {
$url = param($name);
# Sometimes we are passed encoded html characters. Decode those.
$url = decode_entities($url);
# June 10, 2010:
# Remove leading garbage that might get passed to the redirector for some reason
$url =~ s/^[^a-z]+?(https?:)/$1/i;
if($url =~ /^https?:/i) {
last;
}
}
if($url =~ /^https?:/i) {
# this has the potential to block, but for now risk it to have the clean url.
$lid = Miner::Newlink::newlink($url);
if(Util::ID::is_id($lid,"l")) {
return $lid;
}
# either not a buy page or unknown merchant, return url only.
return $url;
}
#
# pid/rid logic
# most of this traffic comes from the dashboard
#
$rid =~ s/r1/r0/g;
if(Util::ID::is_id($pid,"p") && Util::ID::is_id($rid,"r")) {
# double check that we don't already index it
my @lids = Util::Transform::LID::pid_to_lids( $pid,$rid );
my $lid_hash = Util::Select::id_to_hash(@lids);
my @lids;
#
# December 14, 2011:
# Add support for removing bad LIDs from the redirector
#
foreach my $lid_key (keys %{$lid_hash}) {
if($lid_hash->{$lid_key}->{"ean"} > 5) {
delete($lid_hash->{$lid_key});
next;
}
if(param("rid") =~ /^r1/ && $lid_hash->{$lid_key}->{"3rd_price"} == 0) {
delete($lid_hash->{$lid_key});
next;
}
if(param("rid") =~ /^r0/ && $lid_hash->{$lid_key}->{"3rd_time"} > $lid_hash->{$lid_key}->{"etime"}) {
delete($lid_hash->{$lid_key});
next;
}
push(@lids,$lid_hash->{$lid_key}->{"-id"});
}
if ($#lids >= 0) {
# return the first lid. this could be expanded to utilize pid_to_best_hash (maybe)
return $lids[0];
}
Sys::Proctitle::setproctitle("miner ( REDIRECT: $pid )");
my $reseller_hash = Util::Select::id_to_hash($rid);
$reseller_hash->{"reseller-r01000001"}->{"-name"} = "Google Products";
$reseller_hash->{"reseller-r01000001"}->{"-id"} = "r01000001";
$reseller_hash->{"reseller-r01000001"}->{"currency"} = "USD";
$reseller_hash->{"reseller-r01000001"}->{"country"} = "US";
$reseller_hash->{"reseller-r01000001"}->{"featured"} = "US";
$reseller_hash->{"reseller-r01000001"}->{"home"} = "http://www.google.com/prdhp";
$reseller_hash->{"reseller-r01000001"}->{"rsearch"} = "http://www.google.com/search?tbm=shop&q=";
# the following creates a search url
#
# July 11, 2011:
# Added the ability to search any domain
#
my $pid_hash = Util::Select::id_to_hash($pid);
my $sku = trim($pid_hash->{"product-$pid"}->{"sku"});
if($sku) {
if(!$sku) {
$sku = trim($pid_hash->{"product-$pid"}->{"keysku"});
}
my $memd = $MemcachedInclude::memd;
if(!$memd) {
$memd = init_memcached();
}
my $manu_sku;
if($sku !~ /\d[a-z]/i && $sku !~ /[a-z]\d/) {
$manu_sku = Util::Transform::pid_to_fullstring($pid_hash->{"product-$pid"}->{"manufacturer"});
$manu_sku = trim("$manu_sku $sku");
}
my $url = $reseller_hash->{"reseller-$rid"}->{"rsearch"} . uri_escape($manu_sku);
my $md5_search = md5_hash($url);
my $get = $memd->get("CACHE_GET-$md5_search");
my @lids = grep(/^l/,split(/,/,$get));
if(!@lids) {
my $request = Util::Extractor::Request->new(
url => $url,
flags => FETCH_TYPE_API,
fetchmode => FETCH_MODE_SINGLE
);
my $uresp = Util::Extractor::fuse_redirect($request);
@lids = Miner::Newlink::super_newlink($uresp->cache,$uresp->redirect);
}
my $lid_hash = Util::Select::id_to_hash(@lids);
my @lids;
foreach my $lid_key (keys %{$lid_hash}) {
if($lid_hash->{$lid_key}->{"ean"} > 5) {
delete($lid_hash->{$lid_key});
next;
}
if(param("rid") =~ /^r1/ && $lid_hash->{$lid_key}->{"3rd_price"} == 0) {
delete($lid_hash->{$lid_key});
next;
}
if(param("rid") =~ /^r0/ && $lid_hash->{$lid_key}->{"3rd_time"} > $lid_hash->{$lid_key}->{"etime"}) {
delete($lid_hash->{$lid_key});
next;
}
push(@lids,$lid_hash->{$lid_key}->{"-id"});
}
if($#lids == 0) {
return $lids[0];
}
if($reseller_hash->{"reseller-$rid"} && $reseller_hash->{"reseller-$rid"}->{"rsearch"} =~ /^https?:/i) {
return $reseller_hash->{"reseller-$rid"}->{"rsearch"} . uri_escape($sku);
}
my $domain = match_domain($reseller_hash->{"reseller-$rid"}->{"home"});
return "http://www.google.com/#q=site:$domain ". uri_escape($sku);
}
}
#
# September 1, 2011:
# Add support fro searchinf for fake skus
#
if($pid =~ /^f/ && Util::ID::is_id($rid,"r")) {
my $lid = $pid;
$lid =~ s/^f/l/g;
my $lid_hash = id_to_hash($lid);
my $sku = trim($lid_hash->{"link-$lid"}->{"scratch"});
my $reseller_hash = Util::Select::id_to_hash($rid);
if($reseller_hash->{"reseller-$rid"} && $reseller_hash->{"reseller-$rid"}->{"rsearch"} =~ /^https?:/i) {
my $rsearch = $reseller_hash->{"reseller-$rid"}->{"rsearch"};
return $rsearch . uri_escape($sku);
}
my $domain = match_domain($reseller_hash->{"reseller-$rid"}->{"home"});
return "http://www.google.com/#q=site:$domain ". uri_escape($sku);
}
#
# rid logic
#
if(Util::ID::is_id($rid,"r")) {
my $reseller_hash = Util::Select::id_to_hash($rid);
if ($reseller_hash->{"reseller-$rid"} && $reseller_hash->{"reseller-$rid"}->{"home"} =~ /^https?:/i) {
return $reseller_hash->{"reseller-$rid"}->{"home"};
}
}
#
# pid logic
#
if(Util::ID::is_id($pid,"p")) {
my $ip = $ENV{"REMOTE_ADDR"};
my $country = Geo::Region::ip_to_country($ip);
my $hash_ref = pid_to_best_hash($pid,$user,$country);
$lid = $hash_ref->{"bestlid"};
if(Util::ID::is_id($lid,"l")) {
return $lid;
}
}
#
# I got nothing! Return blank.
#
return '';
}
#
# December 22, 2009:
# It makes sense for us to occasionally take a URL and quickly convert it if possible.
#
sub blind_url_to_affil($$) {
my ($url,$user) = @_;
if($url !~ /^https?:/i) {
return $url;
}
my $rid = url_to_reseller($url);
if(!Util::ID::is_id($rid,"r") || !Util::ID::is_id($user,"u")) {
return ($url,"","");
}
#
# Order by priority descending. That way, whatever id id has the lowest priority wins, so long as its enabled
# February 24, 2010:
# Missed priority flag from the SQL query. It's in now
#
my ($pre,$post,$engine,$priority);
eval {
my $dbh = connect_to_db("read");
my $query = "SELECT priority,engineid,pre_linkin_id,linkin_id from enginetrans_ndb where status='1'
AND dd_uid=? AND engineid=? order by priority DESC";
my $sth = $dbh->prepare($query);
$sth->execute($user,$rid);
while(my $ref = $sth->fetchrow_hashref()) {
$post = $ref->{'linkin_id'};
$pre = $ref->{'pre_linkin_id'};
$engine = $ref->{'engineid'};
$priority = $ref->{'priority'};
}
$sth->finish();
$dbh->disconnect();
};
$url = affil_to_paid_affil($url,$pre,$post);
return ($url,$engine,$priority);
}
#
# Given a URL, print the actual redirect that will be given to the user
#
sub print_redirect($) {
my $link = shift;
#
# If the link isn't formed well, don't even try to redirect the user anywhere
#
if($link !~ /^https?:/i) {
return;
}
$link = Util::Extractor::custom_url($link);
$_ = $link;
$link =~ s/&/&/g;
#
# Don't let the HTML redirect in the header.
# Some browsers do not process this correctly.
#
# December 25, 2008:
# We thought this was some kind of webkit derivatives. It turns out, it's pretty mcuh everyone.
# We need to find a method so that if a user clicks and clicks again it gets filtered out. Prefereably at the click
# level, but also at the log level. The log level is more important for accounting
#
# Decmeber 29, 2008: there is no such thing as a bad useragent anymore. Redirect everyone the same way.
#
# if(!bad_useragent($useragent)) {
#
# Everyone else is OK
#
# December 22, 2009:
# Include all debug information right here
#
if(param("debug") > 0) {
print "<html><body><textarea cols=150 rows=30 wrap=physical>";
}
print("<html>\n");
print("<head>\n");
print("<META NAME=\"robots\" CONTENT=\"noarchive\"><META HTTP-EQUIV=\"CACHE-CONTROL\" CONTENT=\"NO-CACHE\">");
printf("<META HTTP-EQUIV=\"Refresh\" content=\"0;url=%s\">\n",$link);
printf("<script type='text/javascript'> top.location.replace='%s'; </script>\n",$link);
print("</head>\n");
print("<body>\n");
printf("<noscript>Please click <a href='%s'>here</a> if you are not redirected immediately.\n",$link);
printf("<img src='http://detonator.dynamitedata.com/i/1x1.gif'>\n");
printf("</noscript></body>\n");
print("</html>\n");
if(param("debug") > 0) {
print "</textarea></body></html>\n";
}
return 1;
}
#
# March 20, 2009: discovered double log bug in here.
# April 9, 2009: This should definitely be forked to the background eventually. We dont' want to hold up a user for this
#
sub log_now($$) {
my ($user,$log_ref) = @_;
my $pid = $log_ref->{"pid"} || "0";
my $lid = $log_ref->{"lid"} || "-1";
my $rid = $log_ref->{"rid"} || "-1";
my $aid = $log_ref->{"aid"} || "-1";
my $engine = $log_ref->{"engine"} || "-1";
my $price = $log_ref->{"price"} || "0";
my $priority = $log_ref->{"priority"} || "0";
if(!Util::ID::is_id($user,"u")) {
return 0;
}
#
# March 25, 2009: ignore stuff that isn't correct
#
if($engine !~ /^[re]\d{8}$/) {
$engine = -1;
}
if($aid !~ /^[la](?:\d{12}|\d{8})$/) {
$aid = -1;
}
if($pid !~ /^p\d{8}$/) {
$pid = 0;
}
if($lid !~ /^l(?:\d{12}|\d{8})$/) {
$lid = -1;
}
if($rid !~ /^r\d{8}$/) {
$rid = -1;
}
if($priority <= 0) {
$priority = 0;
}
my $status = "1";
my $ip = $ENV{"REMOTE_ADDR"};
#
# March 22, 2010:
# Check to see what country this user is from. If he is from
# a different country than the reseller, then do not log this traffic
# We may have to doublecheck which ISO code for the UK is being returned by
# the ip_to_country function
#
# April 6, 2010:
# Refactorered so that we can include merchants that ship to more than one country
#
my $country = Geo::Region::ip_to_country($ip);
if($country =~ /^[A-Z]{2}$/ && Util::ID::is_id($rid,"r")) {
my $rid_hash = Util::Select::id_to_hash($rid);
my $rid_country = $rid_hash->{"reseller-$rid"}->{"country"};
if($rid_country && !grep(/$country/,$rid_country)) {
$status = 'L';
}
}
my $source = $ENV{"HTTP_REFERER"};
if($source !~ /^https?:/i) {
$source = "-";
}
my $time = time();
#
# January 25, 2010:
#
# Log this user for the remainder of the day. Do not count any duplicates
# as they will certainly not be counted by the engines
#
my $memd = init_memcached();
if(Util::ID::is_id($aid,"la") || Util::ID::is_id($engine,"re")) {
my $key = "REDIRECT-$ip-$aid";
if (!Util::ID::is_id($aid,"la")) {
$key = "REDIRECT-$ip-$engine";
}
if($memd->get($key)) {
print STDERR "[$user] Warning: duplicate log error\n";
$status = 'A';
} else {
my ($sec,$min,$hour,undef,undef,undef,undef,undef,undef) = localtime(time());
$memd->set($key,$user,60*60*24-($hour*60*60)-($min*60)-$sec);
}
}
# March 22, 2011
# status 0 if no rid just to make sure we don't break any compatibility.
if(!Util::ID::is_id($rid,"r")) {
$status= '0';
}
#
# March 25, 2010:
# Start to filter out by user agent. This will significantly cut down
# on bot traffic
#
my $useragent = $ENV{"HTTP_USER_AGENT"};
if($useragent =~ /(?:bot|crawler|spider|yandex|slurp)/i || $useragent eq "") {
$status = 'b';
}
# Oct 28, 2010
# this whole eval section needs to be moved to gearman_logger in MinerInclude
# this means no db wait durring a redirect, making it faster (in theory)
# basically we need to pass a frozen hashref to gearman
# March 22, 2011
# Moved back to direct method since we fork now
my %loghash = (
logtype => 'redirect',
ip => $ip,
uid => $user,
page => $source,
pid => $pid,
lid => $lid,
rid => $rid,
aid => $aid,
engine => $engine,
price => $price,
useragent => $useragent,
priority => $priority,
status => $status,
);
my $frozen = nfreeze(\%loghash);
my $gearc = $GearmanInclude::gearc;
$gearc = init_gearman();
return $gearc->dispatch_background(logger => $frozen, { uniq => '-' } );
}