#!/var/tmp/local/perl/bin/perl -w use strict; use warnings; use WWW::Mechanize; use Data::Dumper; use Getopt::Long; use Pod::Usage; use DBI; Getopt::Long::Configure("no_ignore_case"); =head1 SYNOPSIS local_watcher.pl [-h | --help, show this help page] [-a | --add , add Amazon URL to check] [-d | --dbfile , default is /tmp/price.db which must have been initialized] [-m | --mailto , default is specified in the code ] [-u | --update, usually called from cron job] Examples: - Add a URL to track local_watcher.pl -a http://www.amazon.com/exec/obidos/ASIN/0596515170/yuonwesidewil-20 -d /tmp/price.db - Update price info and send email if price has dropped local_watcher.pl -d /tmp/price.db -u -m yuonlamp@yahoo.com =cut my ($help, $url, $dbfile, $mailto, $update); pod2usage() unless (GetOptions("help" => \$help, "add|a=s" => \$url, "update|u" => \$update, "dbfile|d=s" => \$dbfile, "mailto|m=s" => \$mailto, ) && (!defined $help)); my ($dbh, $mail_to); if (!$dbfile) { $dbfile = "/tmp/price.db"; } if (!$mailto) { $mailto = 'yuonlamp@yahoo.com'; } $dbh = DBI->connect("dbi:SQLite:dbname=$dbfile","",""); if ($url) { #$url = "http://www.amazon.com/exec/obidos/ASIN/0596515170/yuonwesidewil-20"; init_product_from_url($url); } unless ($update) { exit(0); } my $products; # find the active product info $products = get_active_product_info(); foreach my $id (keys %$products) { my $url; my $info; my $last_price; $url = $products->{$id}->{url}; # Get the latest positive price $last_price = get_last_price($id); # Now make http call to $url and parse the resulting html file $info = get_product_info_from_url($url); # update the prices table if price > 0 if ($info->{price} and $info->{price} > 0) { update_price_info($id, $info->{price}); } if (($info->{price} > 0) and ($last_price > $info->{price})) { $info->{old_price} = $last_price; send_email($info); } } exit(0); sub get_rand_file_name { my $rand; my $tmpfile; $rand = int(rand(50000)); $tmpfile = "/tmp/amazon.$$.$rand.html"; return $tmpfile; } sub send_email { my ($info) = (@_); my $tmpfile; my $fh; my $msg; my $subject; $subject = "Product price dropped\n"; $msg = "Product (" . $info->{name} . ") price has dropped from \$" ; $msg .= $info->{old_price} . " to \$" . $info->{price} . ".\n"; $msg .= "Please visit " . $info->{url} . " for details.\n"; $tmpfile = get_rand_file_name(); open $fh, "> $tmpfile"; print $fh $msg; close($fh); system("mail -s \"$subject\" $mailto < $tmpfile"); unlink($tmpfile); } sub get_active_product_info { my $sql; my $results = {}; $sql = "select * from products where status = 1"; eval { my $sth = $dbh->prepare($sql); $sth->execute(); $results = $sth->fetchall_hashref('id'); }; if ($@) { print STDERR "Error: $@\n"; } return $results; } sub init_product_from_url { my ($url) = (@_); my $info; my $id; $url = normalize_url($url); $info = get_product_info_from_url($url); $id = insert_product_info($info); if ($id > 0) { if ($info->{price} and $info->{price} > 0) { update_price_info($id, $info->{price}); } } } sub normalize_url { my ($url) = (@_); # Remove everything after the '?' $url =~ s/^([^?]+)\??(.*)$/$1/; return $url; } sub get_last_price { my ($id) = (@_); my $sql; my $price; $sql = "select price, max(ts) from prices where id = ? and price > 0"; $price = 0; eval { my $results; my $sth = $dbh->prepare($sql); $sth->execute($id); $results = $sth->fetchall_arrayref(); if (defined $results->[0]->[0]) { $price = $results->[0]->[0]; } }; if ($@) { print STDERR "Error: $@\n"; $price = -1; } return $price; } sub update_price_info { my ($id, $price) = (@_); my $sql; $sql = "insert into prices values(?, ?, ?)"; eval { my $results; my $sth = $dbh->prepare($sql); $sth->execute($id, time(), $price); }; if ($@) { print STDERR "Error: $@\n"; } } # Given $product which is reference to a hash that contains product # info sub insert_product_info { my ($product) = (@_); # First we need to update the product table # locking should be done, but for simplicity, we don't lock it for # the time being my $url; my $sql; my $id; if (!$product->{id} or !$product->{name} or !$product->{url}) { return 0; } $url = normalize_url($product->{url}); $sql = "insert into products values(NULL, ?, ?, ?, ?);"; $id = -1; eval { my $results; my $sth = $dbh->prepare($sql); $sth->execute($product->{id}, $product->{name}, $product->{url}, $product->{active}); $id = $dbh->last_insert_id(undef, undef, "products", "id"); }; if ($@) { print STDERR "Error: $@\n"; } return $id; } sub init_product_info { my ($product); $product->{price} = 0; $product->{id} = 0; $product->{name} = ""; $product->{active} = 0; return $product; } # in reality, this code needs to make HTTP calls to the URL # and get its product ID and price information sub get_product_info_from_url { my ($url) = (@_); my $agent; my $tmpfile; my ($product); $product = init_product_info(); $tmpfile = get_rand_file_name(); $agent = init_agent(); $agent->get($url); if ($agent->status() == 200) { $agent->save_content($tmpfile); $product = get_product_info_from_file($tmpfile); unlink($tmpfile); } $product->{url} = $url; $product->{active} = 1; return $product; } sub init_agent { my $agentstr = 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.1.12) Gecko/20080201 Firefox/2.0.0.12'; my $agent = WWW::Mechanize->new(autocheck => 1, agent => $agentstr); return $agent; } sub get_product_info_from_file { my ($file) = (@_); my $fh; my $price; my $id; my $name; my %product; $product{price} = 0; $product{id} = 0; $product{name} = ""; open $fh, $file; while (<$fh>) { if (/class="priceLarge">\$([.0-9]+)/) { $product{price} = $1; } if (/name="ASIN"\s+value="([^"]+)"/) { $product{id} = $1; } if (/