#!/usr/bin/perl -w # Copyright © 2002-2007 Jamie Zawinski # # Permission to use, copy, modify, distribute, and sell this software and its # documentation for any purpose is hereby granted without fee, provided that # the above copyright notice appear in all copies and that both that # copyright notice and this permission notice appear in supporting # documentation. No representations are made about the suitability of this # software for any purpose. It is provided "as is" without express or # implied warranty. # # Generates RSS 0.91 RDF (e.g., for Livejournal) from the What's New journal. # Created: 8-Jul-2002. # # DNA Lounge URL: http://www.dnalounge.com/backstage/log/latest.rss # LiveJournal URL: http://www.livejournal.com/users/dnalounge/ # Documentation: http://my.netscape.com/publish/formats/rss-spec-0.91.html require 5; use diagnostics; use strict; use POSIX; my $progname = $0; $progname =~ s@.*/@@g; my $version = q{ $Revision: 1.16 $ }; $version =~ s/^[^0-9]+([0-9.]+).*$/$1/; my $verbose = 0; my $rss_title = "DNA Lounge: What's New"; my $rss_desc = "Behind the scenes at the DNA Lounge, by Jamie Zawinski"; my $rss_lang = "en"; my $rss_webmaster = "webmaster\@dnalounge.com"; my $rss_editor = "jwz\@dnalounge.com"; my $rss_img_title = "DNA Lounge"; my $rss_img_url = "http://www.dnalounge.com/logo-thumb.gif"; my $rss_img_width = 100; my $rss_img_height = 34; my $rss_img_link = "http://www.dnalounge.com/"; my $base_url = "http://www.dnalounge.com/backstage/log/latest.html"; my $rss_nitems = 6; sub parse_monthly_html($$) { my ($file, $base_url) = @_; local *IN; open (IN, "<$file") || error ("$file: $!"); print STDERR "$progname: reading $file\n" if ($verbose); my $body = ""; while () { $body .= $_; } close IN; # only keep stuff inside the "right" and "bottom" sections # { $_ = $body; my ($b1) = m/^.* (.*).*$/sx; my ($b2) = m/^.* (.*).*$/sx; error ("$file: unable to find %%RIGHT%% section") unless defined ($b1); error ("$file: unable to find %%BOTTOM%% section") unless defined ($b2); $body = "$b1\n$b2"; # When an entry splits between RIGHT and BOTTOM, the UL is closed # and re-opened. Fix that. $body =~ s@()(\s*\n)@$2$1@gsi; $body =~ s@\s*(

\s*)?

in the last section $sections[$#sections] =~ s@^(.*)\n.*?$@$1@s || error ("last section does not end with "); my @results = (); foreach (@sections) { s/^\s+//s; # lose leading whitespace s/\s+$//; # lose trailing whitespace s/^.*? 1); } return @results; } sub get_items($$$) { my ($how_many, $dir, $base_url) = @_; local *DIR; $dir =~ s@/+$@@; # take off trailing slash $base_url =~ s@/[^/]*$@/@; # take off trailing file component opendir (DIR, "$dir") || error ("$dir: $!"); my @files = readdir (DIR); closedir DIR; my @result = (); foreach (sort { $b cmp $a } (@files)) { next if (m/^\./); next unless (m/^\d{4}$/); my $year = $_; my $dir2 = "$dir/$year"; opendir (DIR, "$dir2") || error ("$dir2: $!"); my @files2 = readdir (DIR); closedir DIR; foreach (sort { $b cmp $a } (@files2)) { next if (m/^\./); next unless (m/^(\d\d)\.html$/); my $name = $_; my $file = "$dir2/$name"; push @result, parse_monthly_html ($file, "$base_url$year/$name"); if ($#result >= $how_many) { @result = @result[0 .. $how_many-1] if ($#result > $how_many); return @result; } } } return @result; } sub make_rss($$$$$$$$) { my ($title, $link, $desc, $pubdate, $builddate, $nitems, $dir, $base_url) = @_; my $items = join ('', get_items ($nitems, $dir, $base_url)); $items =~ s/^/ /gm; return ("\n" . "\n" . "\n" . "\n" . " \n" . " $title\n" . " $link\n" . " $desc\n" . " $rss_lang\n" . " $rss_webmaster\n" . " $rss_editor\n" . " $pubdate\n" . " $builddate\n" . " \n" . " $rss_img_title\n" . " $rss_img_url\n" . " $rss_img_width\n" . " $rss_img_height\n" . " $rss_img_link\n" . " \n" . $items . " \n" . "\n"); } # expands the first URL relative to the second. # sub expand_url($$) { my ($url, $base) = @_; $url =~ s/^\s+//gs; # lose whitespace at front and back $url =~ s/\s+$//gs; if (! ($url =~ m/^[a-z]+:/)) { $base =~ s@(\#.*)$@@; # strip anchors $base =~ s@(\?.*)$@@; # strip arguments $base =~ s@/[^/]*$@/@; # take off trailing file component my $tail = ''; if ($url =~ s@(\#.*)$@@) { $tail = $1; } # save anchors if ($url =~ s@(\?.*)$@@) { $tail = "$1$tail"; } # save arguments my $base2 = $base; $base2 =~ s@^([a-z]+:/+[^/]+)/.*@$1@ # if url is an absolute path if ($url =~ m@^/@); my $ourl = $url; $url = $base2 . $url; $url =~ s@/\./@/@g; # expand "." 1 while ($url =~ s@/[^/]+/\.\./@/@g); # expand ".." $url .= $tail; # put anchors/args back print STDERR "$progname: relative URL: $ourl --> $url\n" if ($verbose > 2); } else { print STDERR "$progname: absolute URL: $url\n" if ($verbose > 2); } return $url; } # converts all relative URLs in SRC= or HREF= to absolute URLs, # relative to the given base. # sub expand_urls($$) { my ($html, $base) = @_; $html =~ s/ # sub make_rss_item($$$) { my ($title, $link, $desc) = @_; $desc =~ s/^\s+//s; # lose leading and trailing whitespace $desc =~ s/\s+$//s; $desc =~ s/[ \t\n]+/ /gs; # convert all horizontal/vertical whitespace # to a single space (no newlines at all) # lose leading/trailing paragraph breaks $desc =~ s@^(<(P|BR)\b[^<>]*>\s*)+@@gsi; $desc =~ s@(\s*<(P|BR)\b[^<>]*>)+$@@gsi; $desc =~ s/ +$//gm; # lose trailing whitespace $desc =~ s/&/&/g; # de-HTMLify $desc =~ s//>/g; $desc =~ s/^/ /gm; $desc .= "\n" unless ($desc =~ m/\n$/s); my $item = ("\n" . " $title\n" . " $link\n" . " \n" . $desc . " \n" . "\n"); return $item; } sub write_rss($$$$$$$) { my ($rss_title, $base_url, $rss_desc, $rss_nitems, $dir, $base, $file) = @_; my $bdate = strftime ("%a, %e %b %Y %H:%M:%S GMT", gmtime); my $body = make_rss ($rss_title, $base_url, $rss_desc, $bdate, $bdate, $rss_nitems, $dir, $base); my $nbody = $body; my $obody = ''; local *IN; if (open (IN, "<$file")) { while () { $obody .= $_; } close IN; } # strip the dates out of both files, for comparison purposes # $nbody =~ s@<([a-z]+Date)>(.*?)@<$1>...@gsi; $obody =~ s@<([a-z]+Date)>(.*?)@<$1>...@gsi; if ($nbody eq $obody) { print STDERR "$progname: $file unchanged\n" if ($verbose); } else { local *OUT; open (OUT, ">$file") || error ("$file: $!"); print OUT $body || error ("$file: $!"); close OUT || error ("$file: $!");; print STDERR "$progname: wrote $file\n"; } } sub error($) { my ($err) = @_; print STDERR "$progname: $err\n"; exit 1; } sub usage() { print STDERR "usage: $progname [--verbose]\n"; exit 1; } sub main() { my $dir = undef; my $file = undef; while ($_ = $ARGV[0]) { shift @ARGV; if ($_ eq "--verbose") { $verbose++; } elsif (m/^-v+$/) { $verbose += length($_)-1; } elsif (-d $_ && !defined($dir)) { $dir = $_; } elsif (!defined($file)) { $file = $_; } elsif (m/^-./) { usage; } else { usage; } } $dir = "." unless defined ($dir); $file = "$dir/latest.rss" unless defined($file); write_rss ($rss_title, $base_url, $rss_desc, $rss_nitems, $dir, $base_url, $file); } main(); exit 0;