#!/usr/local/bin/perl -w #This little script searches out bad grammer and email the offenders. use LWP::UserAgent; use HTTP::Request; use HTTP::Request::Common qw(POST); use HTTP::Response; use URI::URL; use URI::Escape; $continue = 1; print ("Didja remember to get a fresh timestamp?\n\n"); my $ua = new LWP::UserAgent; # We'll need this to check response headers print "Grabbing URLs!\n"; & grabAltaVistaOffenders; & delDupes; while ($continue == 1) { print "Found $#newOffenders on this page!\n"; for ($j=0;$j <= $#newOffenders;$j++) #As long as there are more frames, let's do this. { print "Checking $newOffenders[$j]\n"; $badInstance = "null"; my $ua = new LWP::UserAgent; # We'll need this to check response headers my $request = new HTTP::Request('GET', $newOffenders[$j]); my $response = $ua->request($request); if ($response->is_success) { print "Checking for Gets!\n"; if ($response->content =~ /(.{1,57}get's.{1,57})/im) { $badInstance = $1; $badInstance =~ s/<[^>]*>//g; print "Checking for mailto!\n"; if ($response->content =~ /mailto\s*:\s*(\w+\@[^\"\'\s\>\?]*)/i) { $theirEmail = $1; if ($badInstance ne "null") { $ua = new LWP::UserAgent; print ("Mailto: $theirEmail\n"); print ("Found at $newOffenders[$j]\n"); print("Is this bad? - $badInstance\n"); print chr(7); $answer = ; if ($answer =~ /y/i) { open (LOOSES, "gets.txt") or die "Could not open Loosers!"; @looses = ; close LOOSES; foreach $guy (@looses) { if ($guy =~ /:::/) { $guy =~ s/:::/"$badInstance"/; } } $looses = join ("", @looses); my $req = POST 'http://www.yawmail.com/cgi-bin/yawmail.cgi', [ sessionid => 'grammarbot:0:1:0:6:180720203:16:4:1', to => $theirEmail, subject => 'Typo on '.$newOffenders[$j], cc => '', bcc => 'grammarbot@yawmail.com', files => '', msg => $looses, send => 'Send', ]; $req->authorization_basic('grammarbot', 'sowell'); $content = $ua->request($req)->as_string; push (@mailed, $newOffenders[$j]); } } else { print "something went wrong!"; } } else { push (@noMailto, $newOffenders[$j]); } } } } print "you have scoured 2 pages of Google responses. Would you like to do two more?"; print chr(7); print chr(7); $answer = ; if ($answer =~ /y/i) { $continue = 1; & grabGoogleOffenders; & delDupes; } else { $continue = 0; } } & printReport; sub printReport { open (REPORT, ">>report.txt"); open (ALREADYBOTHERED, ">>GetsalreadyBothered.txt"); foreach $user (@mailed) { print REPORT "Mailed to $user\n"; print ALREADYBOTHERED "$user\n"; } foreach $user (@noMailto) { print REPORT "Couldn\'t find Mailto at: $user\n"; } } sub grabGoogleOffenders { @urlList = ""; # Initialize the list @tempOffenders = ""; @variousOffenders = ""; @newOffenders = ""; open (POSITION, "googlePosition.txt"); while () { $position = $_; #if ($position > 200) #{ # die "That's it! You did all twenty!" #} } close POSITION; for ($p = $position;$p < $position+20;$p+=10) #Let's go grab the Google pages { push (@urlList, "http://www.google.com/search?q=get%27s&start=".$p."&sa=N"); } open (POSITION, ">googlePosition.txt");# Let's write position to a file so we don't "loose" our place. print POSITION $p; close POSITION; for ($k=0;$k<=$#urlList;$k++) #Now let's grab the URLs listed on those pages. { my $request = new HTTP::Request('GET', $urlList[$k]); my $response = $ua->request($request); if ($response->is_success) { $html = $response->content; @tempOffenders = ($html =~ /\/white\.gif\s*height=\d+\s*width=\d+\s*alt=\"\"><\/A>\s*) { $position = $_; } close POSITION; for ($p = $position;$p < $position+20;$p+=10) #Let's go grab the AltaVista pages { push (@urlList, "http://www.lycos.com/cgi-bin/pursuit?query=get%27s&cat=lycos&mtemp=pages&adv=1&first=".$p."&maxhits=10&ps=0&pm=1&u0=N&u1=Y&u2=Y&u3=Y&u4=cats&u5=1"); } open (POSITION, ">Lycosposition.txt");# Let's write position to a file so we don't "loose" our place. print POSITION $p; close POSITION; for ($k=0;$k<=$#urlList;$k++) #Now let's grab the URLs listed on those pages. { my $request = new HTTP::Request('GET', $urlList[$k]); my $response = $ua->request($request); if ($response->is_success) { $html = $response->content; @tempOffenders = ($html =~ /target=(http%3A[^\s\"\=]+(?:\.htm\w*|\.asp))/gi); push (@variousOffenders, @tempOffenders); } } @someOffenders = sort @variousOffenders; $prev = 'nonesuch'; @variousOffenders = grep($_ ne $prev && ($prev = $_), @someOffenders); #Take out dupes from perl FAQ #4 } sub grabAltaVistaOffenders { @urlList = ""; # Initialize the list @tempOffenders = ""; @variousOffenders = ""; @newOffenders = ""; open (POSITION, "position.txt"); while () { $position = $_; #$position += 10; if ($position > 200) { die "That's it! You did all twenty!" } } close POSITION; for ($p = $position;$p < $position+20;$p+=10) #Let's go grab the AltaVista pages { push (@urlList, "http://www.altavista.com/cgi-bin/query?pg=q&kl=XX&q=get%27s+you&stq=".$p."&c9k"); } open (POSITION, ">position.txt");# Let's write position to a file so we don't "loose" our place. print POSITION $p; close POSITION; for ($k=0;$k<=$#urlList;$k++) #Now let's grab the URLs listed on those pages. { my $request = new HTTP::Request('GET', $urlList[$k]); my $response = $ua->request($request); if ($response->is_success) { $html = $response->content; @tempOffenders = ($html =~ /\d\.\s*<\/b>) { chomp $_; push (@alreadyBothered, $_); } #From Perl Cookbook: my %seen; my @aonly; @seen{@alreadyBothered} = (); foreach $item(@variousOffenders){ push (@newOffenders, $item) unless exists $seen{$item}; } }