#!/usr/bin/perl -w # Copyright © 2010-2012 Jamie Zawinski # # Permission to use, copy, modify, distribute, and sell this software and its # documentation for any purpose is hereby granted without fee, provided that # the above copyright notice appear in all copies and that both that # copyright notice and this permission notice appear in supporting # documentation. No representations are made about the suitability of this # software for any purpose. It is provided "as is" without express or # implied warranty. # # Reads the DNA Lounge twitter stream, and re-posts those twits to Facebook. # Re-posts are recorded, so that we don't duplicate them. If the twit # a URL pointing to a DNA Lounge flyer or calendar entry, then the Facebook # post has an "attachment" appended to it with the flyer image, event, # description and links to buy tickets. # # # This must only be run on cerebrum, because it uses a state file: # "/home/store/.dnalounge-twithist". That file must be globally # writable, since this script is run by both the user "store" (for # various cron-driven posts like "This week!", and for mirroring # manually-made twits) and also by user "archive" (for the # calendar-cron-driven "Tonight!" and "Starting now!" posts.) # # Also, both "store" and "archive" must have a ".dnalounge-twitter-pass" # and ".dnalounge-facebook-pass" file. # # Created: 2-Oct-2010. require 5; use diagnostics; use strict; use bytes; use POSIX qw(mktime strftime); use Fcntl; use Fcntl ':flock'; # import LOCK_* constants use Net::Twitter; use WWW::Facebook::API; use Data::Dumper; use LWP::UserAgent; my $progname = $0; $progname =~ s@.*/@@g; my $version = q{ $Revision: 1.68 $ }; $version =~ s/^[^\d]+([\d.]+).*/$1/; my $verbose = 0; my $debug_p = 0; my $web_root = (-d "/var/www/dnalounge" ? "/var/www/dnalounge" : "."); my $rss_file = "$web_root/calendar/dnalounge.rss"; my $weekly_file = "$web_root/calendar/weekly-thisweek.txt"; my $url_base = "http://www.dnalounge.com/"; my $dummy_flyer = "${url_base}flyers/noflyer-thumb.jpg"; # All posting uids must share the same hist file, or there will be dups! # This file must be readable and writable by all users who run this script. # my $history_dir = "/home/store/"; sub load_twitter_keys($) { my ($user) = @_; my $consumer = 'UNKNOWN'; my $consumer_secret = 'UNKNOWN'; my $access = 'UNKNOWN'; my $access_secret = 'UNKNOWN'; # Read our twitter tokens error ("no \$HOME") unless defined($ENV{HOME}); my $file = "$ENV{HOME}/.$user-twitter-pass"; if (open (my $in, '<', $file)) { print STDERR "$progname: read $file\n" if ($verbose); while (<$in>) { s/#.*$//s; s/\s*$//s; if (m/^\s*$/s) { } elsif (m/^CONSUMER\s*[:=]\s*(.*)$/si) { $consumer = $1; } elsif (m/^CONSUMER_SECRET\s*[:=]\s*(.*)$/si) { $consumer_secret = $1; } elsif (m/^ACCESS\s*[:=]\s*(.*)$/si) { $access = $1; } elsif (m/^ACCESS_SECRET\s*[:=]\s*(.*)$/si) { $access_secret = $1; # } else { # error ("$file: unparsable line: $_"); } } close $in; } elsif ($debug_p) { print STDERR "$progname: $file: $!\n"; } else { error ("$file: $!"); } return ($consumer, $consumer_secret, $access, $access_secret); } sub load_facebook_keys($) { my ($app_name) = @_; my $api_key = 'UNKNOWN'; my $secret = 'UNKNOWN'; my $session_key = 'UNKNOWN'; my $file = "$ENV{HOME}/.$app_name-facebook-pass"; if (open (my $in, '<', $file)) { print STDERR "$progname: read $file\n" if ($verbose); while (<$in>) { s/#.*$//s; s/\s*$//s; if (m/^\s*$/s) { } elsif (m/^API_KEY\s*[:=]\s*(.*)$/si) { $api_key = $1; } elsif (m/^SECRET\s*[:=]\s*(.*)$/si) { $secret = $1; } elsif (m/^SESSION_KEY\s*[:=]\s*(.*)$/si) { $session_key = $1; } # elsif (m/[^\s]/s) { error ("$file: unparsable: $_"); } } close $in; } elsif ($debug_p) { print STDERR "$progname: $file: $!\n"; } else { error ("$file: $!"); } return ($api_key, $secret, $session_key); } # expands the first URL relative to the second. # sub expand_url($$) { my ($url, $base) = @_; $url =~ s/^\s+//gs; # lose whitespace at front and back $url =~ s/\s+$//gs; if (! ($url =~ m/^[a-z]+:/)) { $base =~ s@(\#.*)$@@; # strip anchors $base =~ s@(\?.*)$@@; # strip arguments $base =~ s@/[^/]*$@/@; # take off trailing file component my $tail = ''; if ($url =~ s@(\#.*)$@@) { $tail = $1; } # save anchors if ($url =~ s@(\?.*)$@@) { $tail = "$1$tail"; } # save arguments my $base2 = $base; $base2 =~ s@^([a-z]+:/+[^/]+)/.*@$1@ # if url is an absolute path if ($url =~ m@^/@); my $ourl = $url; $url = $base2 . $url; $url =~ s@/\./@/@g; # expand "." 1 while ($url =~ s@/[^/]+/\.\./@/@s); # expand ".." $url .= $tail; # put anchors/args back print STDERR "$progname: relative URL: $ourl --> $url\n" if ($verbose > 2); } else { print STDERR "$progname: absolute URL: $url\n" if ($verbose > 2); } return $url; } # Given a flyer, calendar and/or blog URL, find the corresponding event in # the calendar RSS (or blog) and return the things we'd like to link to: # ($title, $description, @tickets, $facebook_event_url, $thumb_img) # sub load_desc($$$$) { my ($flyer_url, $cal_url, $blog_url, $date) = @_; return unless ($flyer_url || $cal_url || $blog_url || $date); if ($blog_url) { my ($blog_file) = ($blog_url =~ m@\b(backstage/log/.*)$@si); $blog_file = "$web_root/$blog_file"; open (my $in, '<', $blog_file) || error ("$blog_file: $!"); local $/ = undef; # read entire file my $body = <$in>; close $in; ($body) = ($body =~ m@(.*)@s); error ("$blog_file: unparsable") unless $body; $body =~ s@@@si; my $title = $1 if ($body =~ s@
(.*?)\n
(

)?\n\n@@si); error ("$blog_file: unparsable title") unless $title; my ($date) = $1 if $title =~ s@]*>(.*?)@@gsi; $title =~ s/<[^>]+>//gsi; $title =~ s/\s+/ /gsi; $title =~ s/^\s+|\s+$//gsi; $title =~ s/<//gs; $title =~ s/&/&/gs; my ($img) = ($body =~ m@]+>//gsi; $body =~ s/\s+/ /gsi; # newlines not allowed $body =~ s/^\s+|\s+$//gsi; # $body =~ s/^(.{995}).*$/$1 .../s; # 1000 character limit. Truncate. $body =~ s/^(.{200}).*$/$1 .../s; # Let's go shorter for this one. $img = expand_url ($img, $blog_url) if $img; return ($title, $date, $body, undef, undef, $img); } return unless ($flyer_url || $cal_url || $date); open (my $in, '<', $rss_file) || error ("$rss_file: $!"); local $/ = undef; # read entire file my $body = <$in>; close $in; $body =~ s/(]*>([^<>]+)<@si; my ($eid) = m@]*>([^<>]+)<@si; my ($eurl) = m@]*>([^<>]+)<@si; my $ecal; # If the item uses the flyer as its guid, also find the calendar url. { my ($yyyy, $mm, $dd) = ($eurl =~ m@(\d{4})[-/](\d\d)(?:\.html#|[-/])(\d\d[ab]?)@s); $ecal = "${url_base}calendar/$yyyy/$mm-$dd.html"; } if (($eid && $date && $eid eq $date) || ($eflyer && $flyer_url && $eflyer eq $flyer_url) || ($eurl && $cal_url && $eurl eq $cal_url) || ($ecal && $cal_url && $ecal eq $cal_url)) { my @tix; my ($title) = m@]*>([^<>]+)<@si; my ($date) = m@]*>([^<>]+)<@si; my ($time) = m@]*>([^<>]+)<@si; my ($f2) = m@]*>([^<>]+)<@si; my ($desc) = m@]*>([^<>]+)<@si; my ($fb) = m@]*> (https?://www\.facebook\.com[^<>]+)<@six; s%]*)>([^<>]+)<%{ my ($a, $t) = ($1, $2); my ($n) = ($a =~ m/text="([^\"]+)"/si); $n = 'Buy Tickets' unless $n; my @s = ($n, $t); push @tix, \@s; ''; }%gsexi; $date =~ s@^(\d\d?) ([a-z]+) (\d{4}) \(([a-z]+)\)$@$4, $2 $1@si; $date = "$date. $time"; $flyer_url = $f2 unless $flyer_url; my $thumb = $flyer_url; $thumb =~ s@\.html$@-1-thumb.jpg@s if $thumb; $title =~ s/<//gs; $title =~ s/&/&/gs; return ($title, $date, $desc, \@tix, $fb, $thumb); } } return (); } # If the URL is a tinyurl, return the real URL. # sub unpack_tinyurl($;$); sub unpack_tinyurl($;$) { my ($url, $force_p) = @_; my $re = '^https?://( tinyurl\.com | bit\.ly | j\.mp | t\.co )/'; if (!$force_p) { return $url unless ($url =~ m@$re@sx); } my $ua = LWP::UserAgent->new (max_redirect => 0); $ua->agent ("$progname/$version"); my $res = $ua->get ($url); my $ret = ($res && $res->code) || 'null'; if ($ret eq '301' || $ret eq '302') { my $loc = $res->header('Location'); print STDERR "$progname: unpacked: $url => $loc\n" if ($verbose > 2); if ($loc) { $url = $loc; return $url unless ($url =~ m@$re@sx); return unpack_tinyurl($url); } } return $url; } sub fb_make_ticket_props($$$) { my ($tix, $rsvp, $prefix) = @_; my $props = ''; my @tix = ($tix ? @$tix : ()); if ($rsvp) { my @r = ("RSVP", $rsvp); push @tix, \@r; } foreach my $t (@tix) { my ($name, $url) = @$t; my $anchor; $props .= ", " if $props; # Split the name into label/link. # "Buy Tickets: $12" => "Tickets: [$10]" # "Buy Tickets" => "Tickets: [Buy]" if ($name =~ s/: *(\$[\d.]+)$//s) { $anchor = $1; } elsif ($name eq "RSVP") { $name = "Are you going"; $anchor = "RSVP"; } else { $name =~ s/\b(Buy|Tickets?)\b//gsi; # lose words "buy" and "tickets" $name =~ s/:\s*$//s; # lose trailing colon $name =~ s/^\s+|\s+$//gs; # compress space $name = "Tickets" unless $name; # "Tickets" if nothing left $anchor = "Buy"; # "Buy" if nothing found } $name = "$prefix: $name" if $prefix; $name =~ s/: Buy Tickets$//si; $name =~ s/([\\"])/\\$1/gs; $anchor =~ s/([\\"])/\\$1/gs; # Markup here works on the "wall" page but not on in-stream display. # $anchor = "$anchor"; $props .= "\"$name\": {\"text\": \"$anchor\", \"href\": \"$url\"}"; } return $props; } # Given some text we're going to post, look at any URLs in it and determine # an image attachment and corresponding caption to put on that post. # Returns ($new_text, $attachment_json_text, $actions_json_text). # sub fb_make_attachment($) { my ($txt) = @_; print STDERR "$progname: orig twit: $txt\n" if ($verbose > 2); my ($url) = ($txt =~ m@\b(https?:[^\s]+[a-z\d/])@s); my $flyer = ($url && $url =~ m@dnalounge\.com/flyers/@) ? $url : ''; my $calendar = ($url && $url =~ m@dnalounge\.com/calendar/@) ? $url : ''; my $blog = ($url && $url =~ m@dnalounge\.com/backstage/log/@)? $url : ''; if ($verbose) { print STDERR "$progname: flyer: $flyer\n" if ($flyer); print STDERR "$progname: cal: $calendar\n" if ($calendar); print STDERR "$progname: blog: $blog\n" if ($blog); } if (($flyer || $calendar) && $txt =~ m/^THIS WEEK[:!] (Sun|Mon|Tue|Wed|Thu|Fri|Sat)\b/si) { # If this is a "coming up this week" post, do it differently. return fb_make_weekly_attachment ($txt); } # Given the flyer and/or calendar URLs, find this event in the RSS. my ($title, $time, $desc, $tix, $rsvp, $thumb) = load_desc ($flyer, $calendar, $blog, undef); my ($attach, $actions); # If this is a "starting now" post, omit the tickets. $tix = undef if ($txt =~ m@\bstarting now\b@si); # If we found an RSS description, then we're going to include an # image attachment and use that description as the caption. if ($title) { # Lose the literal URL in the description. $txt =~ s@\Q$url\E@@s; # Strip the shit out of the description text, since it has to be # pretty simple for Facebook's presentation... # $desc =~ s/^[ \t]+|[ \t]+$//gm; $desc =~ s@(?:\s*--\s*)?\bhttps?://[^\s]+[a-z\d/](\s|$)@$1@gsi; $desc =~ s/\n\n+/\n/gs; $desc =~ s/^\s+|\s+$//gs; $desc =~ s/([a-z\d()])$/$1./gmi; $desc =~ s/^(.*?\bpresent?s\s+)?\Q$title\E\.?\s+//si; $desc =~ s/^(Tickets|VIP Service):\n*//gmi; $desc =~ s/^Join the .* groups? on Facebook[.:]\n*//gmi; $desc =~ s/\s+/ /gs; # newlines not allowed $desc =~ s/^(.{995}).*$/$1 .../s; # 1000 character limit. Truncate. $title =~ s/([\\"])/\\$1/gs; $desc =~ s/([\\"])/\\$1/gs; $time =~ s/([\\"])/\\$1/gs; # Figure out what image we're going to use. If there's no flyer, # use the "dummy" flyer. There has to be an image for the caption # to show up. # $flyer = $calendar || $blog || $url_base unless $flyer; $thumb = $dummy_flyer unless $thumb; # This crap expects JSON strings! Madness. # These are the links on the same line as "8 hours ago" and "Like". # if ($rsvp) { $actions = "[{\"href\": \"$rsvp\", \"text\": \"RSVP\" }]"; $flyer = $rsvp; # Prefer to link to FB event, not dnalounge.com $rsvp = undef; } $time = "$time"; $attach = ("{ " . " \"name\": \"$title\"," . " \"href\": \"$flyer\"," . " \"caption\": \"$time\"," . " \"description\": \"$desc\", "); # Beneath the description, put a link on its own line for each ticket # that is on sale. # my $props = fb_make_ticket_props ($tix, $rsvp, 0); $attach .= "\"properties\": { $props }, " if $props; # Finally, the image itself. $attach .= (" \"media\": [{" . " \"type\": \"image\"," . " \"src\": \"$thumb\"," . " \"href\": \"$flyer\"" . " }]" . "}"); $attach =~ s/\s+/ /gs; # newlines not allowed # Since we're doing an attachment, nuke any URLs in the first line. $txt =~ s@\s*\bhttps?:[^\s]+[a-z\d/]\s*@@gsi; } return ($txt, ($attach || ''), ($actions || '')); } sub fb_make_weekly_attachment($) { my ($txt) = @_; $txt =~ s@\s*\b(https?:[^\s]+[a-z\d/])\s*@ @si; my $url = ($1 || ''); $txt =~ s/\s+$//s; open (my $in, '<', $weekly_file) || error ("$weekly_file: $!"); my @events = <$in>; close $in; my $desc = ''; my $props = ""; my $imgs = ""; my $count = 0; foreach my $key (@events) { chop ($key); my ($yyyy, $mm, $dd, $suf) = ($key =~ m/^(\d{4})-(\d\d)-(\d\d)([ab]?)$/s); error ("unparsable key: $key") unless $dd; my ($title, $time, $desc2, $tix, $rsvp, $thumb) = load_desc (undef, undef, undef, $key); my $etime = mktime (0, 0, 0, $dd, $mm-1, $yyyy-1900, 0, 0, -1); my $dotw = strftime ("%A", localtime($etime)); # If the first event in the list is DG, omit it. # This is so that when we send this twit out on Monday, # it doesn't constantly list tonight's event. # next if ($count == 0 && $title =~ m/^Death Guild$/si); my $title2 = $title; $title2 =~ s/: .*$//s; my $prop = fb_make_ticket_props ($tix, undef, $title2); if ($prop) { $props .= ", " if $props; $props .= $prop; } $desc .= "$dotw: $title; "; if ($thumb) { my $url = "${url_base}calendar/$yyyy/$mm-$dd$suf.html"; $url = $rsvp if $rsvp; # Link to Facebook event if it exists $imgs .= ", " if $imgs; $imgs .= ("{" . " \"type\": \"image\"," . " \"src\": \"$thumb\"," . " \"href\": \"$url\"" . "}"); } $count++; last if ($count > 5); } $desc =~ s/\s+$//s; $desc =~ s/;$/!/s; $txt = "This week at DNA Lounge! $desc"; $desc = 'This week at DNA Lounge!'; my $attach = ("{ " . " \"name\": \"$desc\"," . " \"href\": \"$url\"," . " \"description\": \"\" "); $attach .= ", \"properties\": { $props } " if $props; $attach .= ", \"media\": [ $imgs ] " if $imgs; $attach .= "}"; $attach =~ s/\s+/ /gs; # newlines not allowed my $actions = ''; return ($txt, $attach, $actions); } # If there's as URL in the text, and it is from one of the common # image-posting services, find the URL of the underlying image itself. # If $data_p is true, download the raw bits of that image so we can # re-upload it to Facebook's image-hosting service. # # Returns ($new_text, $image_data_or_image_url, $image_page_href) # or (). # sub fb_find_photo($$) { my ($txt, $data_p) = @_; my $t2 = $txt; my ($page_url, $thumb_url, $data, $meta_p); if ($t2 =~ s@\s*\b(https?:// ( yfrog\.com | tumblr\.com | plixi\.com | img\.ly | post\.ly | twitgoo\.com | twitpic\.com | instagr\.am | lockerz\.com | twitter\.com ) /([-_a-z\d/]+))\s*@ @six) { my ($site, $id); ($page_url, $site, $id) = ($1, $2, $3); $thumb_url = $page_url; # initially - now munge it. if ($site =~ m/yfrog/si) { # Facebook won't do thumbnails with the "xxx:iphone" URL. # I'm guessing it doesn't like the colon, even when encoded. # So force one more redirect to get the underlying image URL. $thumb_url .= ':iphone'; $thumb_url = unpack_tinyurl($thumb_url, 1); $thumb_url =~ s/xsize=480&ysize=480/xsize=640&ysize=640/s; } elsif ($site =~ m/twitpic/si) { $thumb_url = 'http://twitpic.com/show/large/' . $id; } elsif ($site =~ m/img\.ly/si) { $thumb_url = 'http://img.ly/show/large/' . $id; } elsif ($site =~ m/plixi/si || $site =~ m/lockerz/si) { $thumb_url = 'http://api.plixi.com/api/tpapi.svc/imagefromurl' . '?size=big&url=' . $thumb_url; } elsif ($site =~ m/instagr\.am/si) { $id =~ s@/+$@@s; $thumb_url = 'http://instagr.am/' . $id . '/media/?size=l'; } elsif ($site =~ m/post\.ly/si || $site =~ m/twitgoo/si || $site =~ m/tumblr/si) { $meta_p = 1; } elsif ($site =~ m/twitter\.com/si && $page_url =~ m@status/([^/]+)/photo@si) { # Give me a fucking break. We have to dig it out of the XML API. $id = $1; my $u3 = 'http://api.twitter.com/1/statuses/show/' . $id . '.xml?include_entities=true'; my $ua = LWP::UserAgent->new (); $ua->agent ("$progname/$version"); my $res = $ua->get ($u3); my $ret = ($res && $res->code) || 'null'; if ($ret ne '200') { print STDERR "$progname: error: $ret for $u3 (skipping)\n"; } else { my $xml = $res->content; ($thumb_url) = ($xml =~ m@(.*?)@si); if ($thumb_url) { $thumb_url .= ':large'; } else { print STDERR "$progname: error: no image on $u3 (skipping)\n"; } } } else { error ("unknown site: $site"); } $t2 =~ s/\s+$//s; } my $ua = LWP::UserAgent->new (); $ua->agent ("$progname/$version"); if ($meta_p) { # Load HTML and pull image URL out of meta tags. print STDERR "$progname: retrieving image page $page_url\n" if $verbose; my $res = $ua->get ($page_url); my $ret = ($res && $res->code) || 'null'; if ($ret ne '200') { print STDERR "$progname: error: $ret for img $page_url (skipping)\n"; } else { my $html = $res->content; my ($link) = ($html =~ m@(]*? REL = ["'] image_src ["'] [^<>]* >)@six); my ($url2) = ($link =~ m@\b HREF = ["'] ( [^"'<>]+ ) @six) if $link; if ($url2) { print STDERR "$progname: found image_src $url2\n" if $verbose; $thumb_url = $url2; } else { print STDERR "$progname: no image_src\n" if $verbose; $thumb_url = undef; } } # Munge the URL for a non-thumb-sized image # if ($thumb_url && $thumb_url =~ m/posterous\.com/) { $thumb_url =~ s/thumb\d+\.jpg$/scaled500.jpg/s; } elsif ($thumb_url && $thumb_url =~ m/twitgoo/) { $thumb_url =~ s/_th\.jpg$/.jpg/s; } } if (! $data_p) { $data = $thumb_url; } elsif ($thumb_url) { print STDERR "$progname: retrieving image $thumb_url\n" if $verbose; my $res = $ua->get ($thumb_url); my $ret = ($res && $res->code) || 'null'; if ($ret ne '200') { print STDERR "$progname: error: $ret for img $thumb_url (skipping)\n"; } else { my $data2 = $res->content; my $L = length($data2); if ($L < 1024) { print STDERR "$progname: error: img $thumb_url is $L bytes" . " (skipping)\n"; } else { print STDERR "$progname: $thumb_url is $L bytes\n" if $verbose; $data = $data2; } } } return () unless $data; # If the original text contained more than one URL (could be multiple # photos, only one of which is used as the thumbnail) leave all of the # URLs in place. But if it contained only one URL, omit it, as the # thumbnail links directly there. # $t2 = $txt if ($txt =~ m/https?:.*https?:/si); return ($t2, $data, $page_url); } # If there's as URL in the text, see if it has metadata indicating what # thumbnail should be used to represent it. # # Returns ($new_text, $page_url, $thumb_url, $page_title) or (). # sub fb_find_thumbnail($) { my ($txt) = @_; my $t2 = $txt; my $url; my ($thumb_title, $thumb_url); if ($t2 =~ s@\s*\b(https?://[^\s\[\]()<>\"\']+[a-z\d/])\s*@ @si) { my $url = $1; $t2 =~ s/^\s+|\s+$//gsi; my $ua = LWP::UserAgent->new (); $ua->agent ("$progname/$version"); print STDERR "$progname: retrieving $url\n" if $verbose; my $res = $ua->get ($url); my $ret = ($res && $res->code) || 'null'; if ($ret ne '200') { print STDERR "$progname: error: $ret for $url (skipping)\n"; } else { my $html = $res->content; my ($link) = ($html =~ m@(]*? REL = ["'] image_src ["'] [^<>]* >)@six); ($link) = ($html =~ m@(]*? PROPERTY = ["'] og:image ["'][^<>]*>)@six) unless $link; my ($url2) = ($link =~ m@\b (?: HREF | CONTENT ) = ["'] ( [^"'<>]+ )@six) if $link; if ($url2) { ($thumb_title) = ($html =~ m@(.*?)@si); $thumb_title = '' unless $thumb_title; $thumb_title =~ s/\s+/ /gs; $thumb_title =~ s/^\s+|\s+$//gs; $thumb_url = $url2; print STDERR "$progname: found thumb $url2 ($thumb_title)\n" if $verbose; } else { print STDERR "$progname: no thumb\n" if $verbose; } return () unless $thumb_url; return ($t2, $url, $thumb_url, $thumb_title); } } return (); } # If this is a RT, use the user's twitter avatar as the image. # # Returns ($page_url, $thumb_url, $page_title) or (). # sub fb_find_twitter_avatar($) { my ($txt) = @_; my $t2 = $txt; my ($user) = ($txt =~ m/\b RT \b [:\s]* \@? ([_a-z\d]+)\b/six); ($user) = ($txt =~ m/\@([_a-z\d]+)\b/si) unless $user; return () unless $user; # Kludge: are we ourselves? return () if ($user eq 'dnapizza' || $user eq 'dnalounge' || $user eq 'jwz'); my $ua = LWP::UserAgent->new (); $ua->agent ("$progname/$version"); my $url = 'http://api.twitter.com/1/users/show.json?id=' . $user; print STDERR "$progname: retrieving $url\n" if $verbose; my $res = $ua->get ($url); my $ret = ($res && $res->code) || 'null'; if ($ret ne '200') { print STDERR "$progname: error: $ret for $url (skipping)\n"; } else { my $json = $res->content; $json =~ s/^{|}$//gs; # Kludge: lose nested arrays in JSON. 1 while ($json =~ s/{[^{}]*}/""/gs); my ($url2) = ($json =~ m@"profile_image_url":"([^"]+)@s); if (! $url2) { print STDERR "$progname: error: $user no avatar (skipping)\n"; return (); } my ($name2) = ($json =~ m@"name":"([^"]+)@s); $name2 = $user unless $name2; $url2 =~ s/\\//gs; $name2 =~ s/\\//gs; my $url = 'http://twitter.com/' . $user; return ($url, $url2, $name2); } return (); } # Post the given text to Facebook, possibly with an image attachment. # sub fb_post($$$) { my ($user, $app, $txt) = @_; $txt =~ s/\s+$//s; # If I have at-replied to myself, disregard that. $txt =~ s/^@\Q$user\E\s+//si; if ($txt =~ m/^@/s) { print STDERR "$progname: skipping at-reply $txt\n" if $verbose; return; } my ($api_key, $secret, $session_key) = load_facebook_keys ($app); my $client = WWW::Facebook::API->new ( parse => 1, throw_errors => 1, ); $client->api_key ($api_key); $client->secret ($secret); $client->desktop (1); $client->session_key ($session_key); $client->debug(1) if ($debug_p > 1); my $userid = $user; my $ret = $client->fql->query (query => "SELECT id FROM profile WHERE username = '$userid'"); $userid = ($ret ? @{$ret}[0]->{id} : 0); # if (!$userid && $user eq 'dnapizza') { $userid = '192927147394687'; } # if (!$userid && $user eq 'dnalounge') { $userid = '12161711085'; } error ("unable to find uid of $userid") unless $userid; # Unpack all tinyURLs. $txt =~ s@(https?:[^\s]+[a-z\d/])@{ unpack_tinyurl($1) }@gsexi; my ($attach, $actions); ($txt, $attach, $actions) = fb_make_attachment ($txt); # Facebook ignores newlines. # Let's turn them into dashes, unless previous line ended in punctuation. # $txt =~ s/^\s+|\s+$//gs; $txt =~ s/([a-z\d])\s*?\n\s*/$1 -- /gsi; $txt =~ s/\s+/ /gs; # What works better? # To upload a photo to Facebook directly (upload_photo_p = 1) # or to just post a link to that photo with a thumbnail (0)? # # On the Facebook iPhone app, photo uploads aren't appearing # in the stream. Also, the uploaded photos have "jwz" as the # author instead of "dnalounge". # my $upload_photo_p = 0; my ($photo, %args, %pargs); # If we don't have an attachment, this isn't a post from the DNA Robot. # Check to see if there's a photo to upload. # if (! $attach) { my ($txt2, $data, $photo_href) = fb_find_photo ($txt, $upload_photo_p); ($photo, $txt) = ($data, $txt2) if ($data); if ($photo) { if ($upload_photo_p) { %pargs = (caption => $txt, uid => $userid, data => ($debug_p ? '' : $photo)); } $txt =~ s/([\\"])/\\$1/gs; $attach = ("{ " . " \"name\": \"$txt\"," . " \"href\": \"$photo_href\"," . # url of image html page " \"description\": \"\", " . " \"media\": [{" . " \"type\": \"image\"," . " \"src\": \"$photo\"," . # url of image jpeg " \"href\": \"$photo_href\"" . # url of image html page " }]}"); $txt = ""; } } # If we don't have a photo either, see if the posted URL has a thumbnail, # and use that. # if (!$attach && !$photo) { my ($new_text, $page_url, $thumb_url, $page_title) = fb_find_thumbnail ($txt); if ($thumb_url) { $page_title =~ s/([\\"])/\\$1/gs; $txt = $new_text; $attach = ("{ " . " \"name\": \"$page_title\"," . " \"href\": \"$page_url\"," . " \"description\": \"\", " . " \"media\": [{" . " \"type\": \"image\"," . " \"src\": \"$thumb_url\"," . " \"href\": \"$page_url\"" . " }]}"); } } # Maybe this was an RT? Use a Twitter avatar. # if (!$attach && !$photo) { my ($page_url, $thumb_url, $page_title) = fb_find_twitter_avatar ($txt); if ($thumb_url) { $page_title =~ s/([\\"])/\\$1/gs; $attach = ("{ " . " \"name\": \"$page_title\"," . " \"href\": \"$page_url\"," . " \"description\": \"\", " . " \"media\": [{" . " \"type\": \"image\"," . " \"src\": \"$thumb_url\"," . " \"href\": \"$page_url\"" . " }]}"); } } %args = (message => $txt, uid => $userid, attachment => $attach, action_links => $actions); if ($debug_p) { $Data::Dumper::Indent = 1; $Data::Dumper::Terse = 1; $Data::Dumper::Quotekeys = 0; $Data::Dumper::Pair = "\t=> "; $Data::Dumper::Pad = " "; print STDERR "\nWould have posted to $user ($userid):\n"; print STDERR Dumper (\%pargs) if %pargs; print STDERR Dumper (\%args) if %args; return; } if (%pargs) { print STDERR "$progname: FB photo: $txt\n" if $verbose; $client->photos->upload (%pargs); } else { print STDERR "$progname: FB post: $txt\n" if $verbose; $client->stream->publish (%args); } } # Read the twitter stream, and post any new entries to Facebook. # Save the current state to the history file. # sub fbmirror($$) { my ($user, $app) = @_; my ($consumer, $consumer_secret, $access, $access_secret) = load_twitter_keys ($app); my $nt = Net::Twitter->new ( traits => [qw/OAuth API::REST WrapError/], consumer_key => $consumer, consumer_secret => $consumer_secret, access_token => $access, access_token_secret => $access_secret, ); # If it's more than 4 hours old, don't repost it. No longer timely. my $since = time() - (60 * 60 * 4); $since -= (60 * 60 * 24 * 2) if ($debug_p); my $ret = undef; # Twitter fails often. Retry a few times until we get a response. # my $retries = 10; for (my $i = 0; $i < $retries; $i++) { $ret = $nt->user_timeline ({id => $user, since => $since, include_rts => 1}); last if $ret; print STDERR "$progname: twitter: timeline: " . $nt->get_error()->{error} . " (retrying...)\n" if ($verbose > 1); sleep 5 + $i; } error ("twitter: timeline: " . $nt->get_error()->{error} . " (after $retries tries)") unless $ret; my @hist = (); $history_dir = '/tmp/' if ($debug_p && ! -d $history_dir); my $file = "$history_dir/.$user-twithist"; $file =~ s@//+@/@gs; print STDERR "$progname: awaiting lock: $file\n" if ($verbose > 1); my $hist_fd; open ($hist_fd, '+>>', $file) || error ("unable to write $file: $!"); flock ($hist_fd, LOCK_EX) || error ("unable to lock $file: $!"); seek ($hist_fd, 0, 0) || error ("unable to rewind $file: $!"); @hist = <$hist_fd>; print STDERR "$progname: read $file\n" if $verbose; my %hist; foreach my $h (@hist) { my ($id, $txt) = split(/\t/, $h); $hist{$id} = $txt; print STDERR "$progname: old: $id: $txt\n" if ($verbose > 3); } my $max = 100; # truncate, omitting really old entries @hist = @hist[0 .. $max] if ($#hist >= $max); my @new = (); foreach my $t (@$ret) { my $id = $t->{id}; if (! $hist{$id}) { my $txt = $t->{text}; # If it's a retweet, 'text' might be truncated at 140. # Make a longer-than-140 un-truncated version. if ($t->{retweeted_status}) { my $u2 = $t->{retweeted_status}->{user}->{screen_name}; my $t2 = $t->{retweeted_status}->{text}; $txt = "RT \@$u2: $t2"; } $txt =~ s/\n/\\n/gs; my $line = "$id\t$txt\n"; print STDERR "$progname: new: $id: $txt\n" if ($verbose > 3); push @new, $line; } } unshift @hist, @new; foreach my $t (@new) { my ($id, $txt) = split(/\t/, $t); $txt =~ s/\\n/\n/gs; fb_post ($user, $app, $txt); } if ($#new < 0) { print STDERR "$progname: $file unchanged\n" if ($verbose > 1); } elsif ($debug_p) { print STDERR "$progname: not writing $file\n" if ($verbose); print STDERR join('', @hist) . "\n" if ($verbose > 2); } else { print STDERR "$progname: writing $file\n" if $verbose; truncate ($hist_fd, 0) || error ("unable to truncate $file: $!"); seek ($hist_fd, 0, 0) || error ("unable to rewind $file: $!"); print $hist_fd join('', @hist); } flock ($hist_fd, LOCK_UN) || error ("unable to unlock $file: $!"); close ($hist_fd); } sub error($) { my ($err) = @_; print STDERR "$progname: $err\n"; exit 1; } sub usage() { print STDERR "usage: $progname [--verbose] " . "[--user u] [--app u] [--debug [text]]\n"; exit 1; } sub main() { my $user = 'dnalounge'; my $app = 'dnalounge'; my $debugtxt; while ($#ARGV >= 0) { $_ = shift @ARGV; if (m/^--?verbose$/) { $verbose++; } elsif (m/^--?debug$/) { $debug_p++; } elsif (m/^-v+$/) { $verbose += length($_)-1; } elsif (m/^--?user$/) { $user = shift @ARGV; } elsif (m/^--?app$/) { $app = shift @ARGV; } elsif (m/^-./) { usage; } elsif ($debug_p && !$debugtxt) { $debugtxt = $_; } else { usage; } } if ($debugtxt) { fb_post ($user, $app, $debugtxt); } else { fbmirror ($user, $app); } } main(); exit 0;