#!/usr/bin/perl -w # Copyright © 2002-2009 Jamie Zawinski # # Permission to use, copy, modify, distribute, and sell this software and its # documentation for any purpose is hereby granted without fee, provided that # the above copyright notice appear in all copies and that both that # copyright notice and this permission notice appear in supporting # documentation. No representations are made about the suitability of this # software for any purpose. It is provided "as is" without express or # implied warranty. # # Generates RSS 0.91 RDF (e.g., for Livejournal) from the What's New journal. # Created: 8-Jul-2002. # Updated to RSS 2.0, 8-Dec-2008. # # DNA Lounge URL: http://www.dnalounge.com/backstage/log/latest.rss # LiveJournal URL: http://www.livejournal.com/users/dnalounge/ # Documentation: http://my.netscape.com/publish/formats/rss-spec-0.91.html # Validator: http://feedvalidator.org/ require 5; use diagnostics; use strict; use POSIX; my $progname = $0; $progname =~ s@.*/@@g; my $version = q{ $Revision: 1.29 $ }; $version =~ s/^[^0-9]+([0-9.]+).*$/$1/; my $verbose = 0; my $rss_title = "DNA Lounge: What's New"; my $rss_desc = "Behind the scenes at the DNA Lounge, by Jamie Zawinski"; my $rss_lang = "en"; my $rss_webmaster = "webmaster\@dnalounge.com (DNA Lounge)"; my $rss_author = "jwz\@dnalounge.com (Jamie Zawinski)"; my $rss_img_url = "http://www.dnalounge.com/logo2.gif"; my $rss_img_width = 100; my $rss_img_height = 32; my $base_url = "http://www.dnalounge.com/backstage/log/latest.html"; my $base_rss = "http://www.dnalounge.com/backstage/log/latest.rss"; my $rss_nitems = 6; sub parse_html($$) { my ($file, $base_url) = @_; open (my $in, '<', $file) || error ("$file: $!"); print STDERR "$progname: reading $file\n" if ($verbose); local $/ = undef; # read entire file my $body = <$in>; my $mtime = (stat($in))[9]; close $in; $body =~ s@^.*?()@$1@s; $body =~ s@.*?$@@s; # lose comment block $body =~ s@.*\n\n@@si; $body =~ s@(.*?)\n\n@@s; my $title = $1; error ("$file: no SUBHEADING") unless $title; $title =~ s@<[^<>]*>@@gs; $title =~ s/ / /gs; $title =~ s/^\s*\d\d?[- ][A-z][a-z]+[- ]\d{4}\s+\([A-z][a-z][a-z]\)\s*//gs; $title =~ s/\s+/ /gs; $title =~ s/^\s+|\s+$//gs; my $desc = expand_urls ($body, $base_url); my ($yyyy, $mm, $dd) = ($file =~ m@\b(\d{4})/(\d\d)/(\d\d)\.@s); my $time = mktime (0, 0, 0, $dd, $mm-1, $yyyy-1900, 0, 0, -1); my $max = $time + (60 * 60 * 24); if ($mtime >= $time && $mtime < $max) { # write-date on file matches date of entry. $time = $mtime; } else { # File was modified later; cap date at midnight of date of entry. $time = $max - 60; } my $date = strftime ("%a, %d %b %Y %H:%M:%S GMT", gmtime ($time)); print STDERR "$progname: $file: parsed \"$title\"\n" if ($verbose > 1); return make_rss_item ($title, $base_url, $date, $desc); } sub get_items($$$) { my ($how_many, $dir, $base_url) = @_; $dir =~ s@/+$@@; # take off trailing slash $base_url =~ s@/[^/]*$@/@; # take off trailing file component my @result = (); opendir (my $DIR, "$dir") || error ("$dir: $!"); my @years = sort { $b cmp $a } readdir ($DIR); closedir $DIR; foreach my $year (@years) { next unless ($year =~ m/^\d\d\d\d$/s); opendir (my $YDIR, "$dir/$year") || error ("$dir/$year: $!"); my @months = sort { $b cmp $a } readdir ($YDIR); closedir $YDIR; foreach my $month (@months) { next unless ($month =~ m/^\d\d$/s); opendir (my $MDIR, "$dir/$year/$month") || error ("$dir/$year/$month: $!"); my @days = sort { $b cmp $a } readdir ($MDIR); closedir $MDIR; foreach my $day (@days) { next unless ($day =~ m/^\d\d\.html$/s); my $file = "$dir/$year/$month/$day"; my $url = "$base_url$year/$month/$day"; push @result, parse_html ($file, $url); last if ($#result+1 >= $how_many); } last if ($#result+1 >= $how_many); } last if ($#result+1 >= $how_many); } return @result; } sub make_rss($$$$$$$$) { my ($title, $link, $desc, $pubdate, $builddate, $nitems, $dir, $base_url) = @_; my $items = join ('', get_items ($nitems, $dir, $base_url)); $items =~ s/^/ /gm; return ("\n" . "\n" . " \n" . " $title\n" . " $link\n" . " $desc\n" . " $rss_lang\n" . " $rss_author\n" . " $rss_webmaster\n" . " \n" . #### these are optional " $pubdate\n" . " $builddate\n" . " \n" . " $title\n" . " $rss_img_url\n" . " $link\n" . " $rss_img_width\n" . " $rss_img_height\n" . " $title\n" . " \n" . $items . " \n" . "\n"); } # expands the first URL relative to the second. # sub expand_url($$) { my ($url, $base) = @_; $url =~ s/^\s+//gs; # lose whitespace at front and back $url =~ s/\s+$//gs; if (! ($url =~ m/^[a-z]+:/)) { $base =~ s@(\#.*)$@@; # strip anchors $base =~ s@(\?.*)$@@; # strip arguments $base =~ s@/[^/]*$@/@; # take off trailing file component my $tail = ''; if ($url =~ s@(\#.*)$@@) { $tail = $1; } # save anchors if ($url =~ s@(\?.*)$@@) { $tail = "$1$tail"; } # save arguments my $base2 = $base; $base2 =~ s@^([a-z]+:/+[^/]+)/.*@$1@ # if url is an absolute path if ($url =~ m@^/@); my $ourl = $url; $url = $base2 . $url; $url =~ s@/\./@/@g; # expand "." 1 while ($url =~ s@/[^/]+/\.\./@/@s); # expand ".." $url .= $tail; # put anchors/args back print STDERR "$progname: relative URL: $ourl --> $url\n" if ($verbose > 2); } else { print STDERR "$progname: absolute URL: $url\n" if ($verbose > 2); } return $url; } # converts all relative URLs in SRC= or HREF= to absolute URLs, # relative to the given base. # sub expand_urls($$) { my ($html, $base) = @_; $html =~ s/ # sub make_rss_item($$$$) { my ($title, $link, $date, $desc) = @_; $desc =~ s@@@gsi; # lose leading/trailing paragraph breaks $desc =~ s@^(<(P|BR)\b[^<>]*>\s*)+@@gsi; $desc =~ s@(\s*<(P|BR)\b[^<>]*>)+$@@gsi; $desc =~ s/\s+/ /gs; $desc =~ s/^\s+|\s+$//gs; $desc = "

$desc"; # Safari screws up if CDATA doesn't begin with

my $comments = ''; if ($desc =~ s@(

\s*)?(

.*?
\s*)@@si) { $comments = $link; $comments = " $comments\n"; } my $item = ("\n" . " $title\n" . " $link\n" . " $rss_author\n" . " $date\n" . $comments . " \n" . "\n"); return $item; } sub write_rss($$$$$$$) { my ($rss_title, $base_url, $rss_desc, $rss_nitems, $dir, $base, $file) = @_; my $bdate = strftime ("%a, %d %b %Y %H:%M:%S GMT", gmtime); my $body = make_rss ($rss_title, $base_url, $rss_desc, $bdate, $bdate, $rss_nitems, $dir, $base); my $nbody = $body; my $obody = ''; if (open (my $in, '<', $file)) { while (<$in>) { $obody .= $_; } close $in; } # strip the dates out of both files, for comparison purposes # $nbody =~ s@<([a-z]+Date)>(.*?)@<$1>...@gsi; $obody =~ s@<([a-z]+Date)>(.*?)@<$1>...@gsi; if ($nbody eq $obody) { print STDERR "$progname: $file unchanged\n" if ($verbose); } else { open (my $out, '>', $file) || error ("$file: $!"); (print $out $body) || error ("$file: $!"); close $out || error ("$file: $!");; print STDERR "$progname: wrote $file\n"; } } sub error($) { my ($err) = @_; print STDERR "$progname: $err\n"; exit 1; } sub usage() { print STDERR "usage: $progname [--verbose]\n"; exit 1; } sub main() { my $dir = undef; my $file = undef; while ($_ = $ARGV[0]) { shift @ARGV; if ($_ eq "--verbose") { $verbose++; } elsif (m/^-v+$/) { $verbose += length($_)-1; } elsif (-d $_ && !defined($dir)) { $dir = $_; } elsif (!defined($file)) { $file = $_; } elsif (m/^-./) { usage; } else { usage; } } $dir = "." unless defined ($dir); $file = "$dir/latest.rss" unless defined($file); # When writing files, make permissions match the parent directory # by computing a umask from the directory's permissions. # umask (~((stat($dir))[2] & 0666) & 0666); write_rss ($rss_title, $base_url, $rss_desc, $rss_nitems, $dir, $base_url, $file); } main(); exit 0;