#!/usr/bin/perl -w # Copyright © 2000-2013 Jamie Zawinski . # # Permission to use, copy, modify, distribute, and sell this software and its # documentation for any purpose is hereby granted without fee, provided that # the above copyright notice appear in all copies and that both that # copyright notice and this permission notice appear in supporting # documentation. No representations are made about the suitability of this # software for any purpose. It is provided "as is" without express or # implied warranty. # # Created: 28-Nov-00. # # This script generates/updates menu contents and various other templatey # things on all of the DNA Lounge pages. It goes like this: # # $DNA::Menuify::write_file ($filename, $body) # # - extracts the "content" portions of the $body (the sections between # "BOTTOM_START and BOTTOM_END", etc.) # - inserts those into the template file; # - generates a menu for the target file, and splices that in too; # - writes the result out to the target file, if it is different. # # In this way, we ensure that any changes to the template file always get # pushed down into every file that uses it, and we don't update mtime # unless necessary. # # Setting $DNA::Menuify::debug means run "diff" instead of writing anything. # # $DNA::Menuify::verbose specifies how loud we are: # # 0: print nothing. # 1: print files written. # 2: print files unchanged. # 8: print lots of stuff. # 9: print even more stuff. # # # Other utility functions: # # error ($str) -- print message and exit # url_quote($s), url_unquote($s) -- do "%" encoding of the string. # html_quote($s) -- quotes &, < and > # expand_url ($url, $base) -- expand one relative URL # expand_urls ($html, $base) -- expand all URLs in the HTML # image_size ($file) -- returns width and height # cgi_exec ($cmd) -- run command, wrap in PRE require 5; use diagnostics; use strict; use Encode; use HTML::Entities; #use open ":encoding(utf8)"; # costs 53% speed package DNA::Menuify; use Exporter 'import'; use vars qw($VERSION @ISA @EXPORT_OK $verbose $debug $validate $base_url $template_file); @ISA = qw(Exporter); @EXPORT_OK = qw(write_file $verbose error url_quote url_unquote html_quote html_unquote expand_url expand_urls image_size cgi_exec); my $progname = $0; $progname =~ s@.*/@@g; my $VERSION = q{ $Revision: 1.71 $ }; $VERSION =~ s/^[^0-9]+([0-9.]+).*$/$1/; $verbose = 0; $debug = 0; $validate = 0; $base_url = undef; # the --base argument passed from caller $template_file = "utils/template.html"; # This is the base used in "Like" buttons and so on. Must be http, not https # or relative, or there will be different liker counts for the two versions. my $global_url_base = "http://www.dnalounge.com/"; my @menu = ("Home ./", "Calendar calendar/latest.html", "Directions directions/", "Tickets tickets/", "Webcasts webcast/", "Contact contact/", ); my @months = ("January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"); # For computing the next/prev tags. my %prev_link; my %next_link; # Returns the HTML for the left column menu table for a document living # at the given URL (really, relative path from the root.) # sub make_menu_html($$) { my ($current_url, $base) = @_; if ($base) { error ("base must be an absolute URL: $base") unless ($base =~ m@^(?:(?:https?:)?//[^/]+)?/(.*)$@); $current_url = $1; } error ("relative urls only, please: $current_url") if ($current_url =~ m@^[a-z]+:|^[/.]@si); $current_url =~ s@\.tmp$@@i; $current_url =~ s@/index.s?html?$@/@i; my $depth = 0; $_ = $current_url; while (m@^[^/]+/(.*)$@) { $depth++; $_ = $1; } my $prefix; if ($base) { $prefix = $base; } else { $prefix = "../" x $depth; } my $block = 1; my $output = ("
" . "" . "
\n" . "\n \n"; $output =~ s/^/ /gm; # To make sizing work right on horizontal menus, we can't have any # whitespace between
  • and
  • . So move the whitespace to # inside the tag. $output =~ s@(]*>)(\s+)(]*>)@$1$3$2@gs; $output =~ s@()(\s+)( 1); next; } while (<$in>) { next unless m@^\s*Redirect\s+\d+\s+([^\s]+)\s+([^\s]+)\s*$@; my ($from, $to) = ($1, $2); $from =~ s@^/@@; $to =~ s@^https?://[^/]+/@@; $to =~ s@\#.*$@@; $latest_table{$from} = $to; print STDERR "$progname: menuify: $from => $to\n" if ($verbose > 8); } close $in; } } # If the URL ends in "latest.html", expand it to the "real" URL. # my $latest_loaded_p = 0; sub expand_latest_url($$) { my ($url, $target_url) = @_; return $url unless (defined($url)); if (! $latest_loaded_p) { $latest_loaded_p = 1; load_latest(); } my $redir = $latest_table{$url}; if (defined ($redir) && $redir eq $target_url) { print STDERR "$progname: menuify: latest match: $url => $target_url\n" if ($verbose > 7); return $target_url; # this is the target of the "latest" link. } else { return $url; } } my %duplicable_keywords = ('HEADING' => 1, 'SUBHEADING' => 1); # Check to see that only the given set of keywords are in the given text. # Errors if a keyword is present more than once. If "demand" is true, # errors if any of the keywords are not present. # "filename" is only for error messages. # sub check_keywords($$$$@) { my ($filename, $text, $demand, $unknown, @keywords) = @_; my $line = 0; my %hits; foreach (@keywords) { $hits{$_} = 0; } my $nowrap_p = defined($hits{'NOWRAP'}); foreach (split(/\n/, $text)) { $line++; if (m@@) { my $k = $1; if (!defined($hits{$k})) { error ("$filename: $line: unrecognised keyword: %%$k%%") unless ($nowrap_p); } elsif ($hits{$k} > 1) { error ("$filename: $line: duplicate keyword: %%$k%%") unless $duplicable_keywords{$k}; } else { $hits{$k}++; print STDERR "$progname: menuify: $filename: $line: keyword %%$k%%\n" if ($verbose > 7); } } } if ($demand) { foreach (@keywords) { if ($hits{$_} == 0 && !$duplicable_keywords{$_}) { error ("$filename: missing keyword: %%$_%%"); } } } if ($unknown) { foreach (@keywords) { $hits{$_} = 1; } $text =~ s/\s+/ /gs; $text =~ s/(//s; # keep BOTTOM. $html =~ s/.*?$//s; $html =~ s///gs; $html =~ s/\s+/ /gs; # lose linebreaks $html =~ s@<(P|TR)\b[^<>]*>@\n\n@gs; #

    becomes blank line $html =~ s@<(BR|/DIV)\b[^<>]*>@\n@gs; $html =~ s/<[^<>]*>//gs; # lose tags $html =~ s/ +$|^ //gm; $html =~ s/\n\n\n+/\n\n/gs; # This is pretty hokey, but it's relatively fast. # Beware of exponential behavior in that regexp. my @lines0 = split(/\n/, $html); my @lines1 = (); foreach (@lines0) { if ($_ eq '') { push @lines1, ''; } else { 1 while (s/([^\n]{70})([^\n])/$1\n$2/s); # wrap every N chars push @lines1, (split (/\n/, $_)); } } return $#lines1+1; # count lines } # Given the body of a file, parse out the data in it and regenerate # and return the page. # # - anything on the page between %%MENU_START%% and %%MENU_END%% # will be replaced with a newly-generated menu; # # - anything inside %%HEAD_{START,END}%% and %%BOTTOM_{START,END}%% # will be preserved in the appropriate place; # # - everything else will be replaced with the contents of the # template file. sub patch_page_body($$$) { my ($filename, $body, $base) = @_; return $body if ($body eq ''); return $body if ($filename =~ m@calendar/weekly\.html@s); #### Kludge. return $body if ($filename =~ m@backstage/forms/sign\.html@s); #### Kludge. my $nowrap_p = 0; if ($body =~ m@^\s*(]*>\s*)?@s) { print "$progname: menuify: $filename is NOWRAP.\n" if ($verbose > 7); $nowrap_p = 1; check_keywords ($filename, $body, 0, 0, "NOWRAP", "HEADING", "SUBHEADING", ); } else { check_keywords ($filename, $body, 0, 1, "NOWRAP", "HEAD_START", "HEAD_END", "MENU_START", "MENU_END", "BOTTOM_START", "BOTTOM_END", "HEADING", "SUBHEADING", "OVERVIEW_START", "OVERVIEW_END", "SNAPSHOTS_START", "SNAPSHOTS_END", "THUMBS_START", "THUMBS_END", "BLOG_HEADER", "BLOG_FOOTER", ); } my $new_body = $template; # splice in parts of the old file... # if ($nowrap_p) { # Lose template's body and other section markers. $new_body =~ s@(]*>).*?( *)@$1\n$2@si; $new_body =~ s@\n@@gs; # Splice in file's body. my ($obody) = ($body =~ m@]*>\n?(.*?)\s*@si); error ("$filename: no body") unless $obody; $new_body =~ s@(]*>)@$1\n$obody@si; # If the old nowrap file had a stylesheet, put it back (for 404.html). my ($ostyle) = ($body =~ m@(]*>.*?)@si); $new_body =~ s@([ \t]*)(]*>\s*)?)@$1\n@si; } else { my $got_some = 0; foreach my $key ("HEAD", "BOTTOM") { $_ = $body; my $val; if (m/(.*)/xs) { $val = $1; # $val =~ s/^\s+//s; $val =~ s/^([ \t]*\n)+//s; # lose leading blank lines, not spaces $val =~ s/\s+$//s; $val .= "\n" unless ($val eq ""); $got_some++ if ($key =~ m/^BOTTOM/); print STDERR "$progname: menuify: $filename: patched %%$key%%\n" if ($verbose > 7); } else { $val = ""; print STDERR "$progname: menuify: $filename: no %%$key%% keyword\n" if ($verbose > 7); } $val = "\n" . $val . ""; $new_body =~ s//$val/; } error ("no BOTTOM tag in $filename?") unless $got_some; # Construct a new menu, and splice it in. # my $menu = make_menu_html ($filename, $base); $new_body =~ s/() /\n$menu/x; print STDERR "$progname: menuify: $filename: patched %%MENU%%\n" if ($verbose > 7); } $_ = $body; my ($title) = m@(.*)@si; if ($title) { $new_body =~ s@()(.*)()@$1$title$3@si; print STDERR "$progname: menuify: $filename: patched \n" if ($verbose > 7); } else { print STDERR "$progname: menuify: $filename: no <TITLE>\n" if ($verbose > 7); } # splice in ../../../ to get to images, etc... # my $depth = 0; if ($base) { error ("LINK REL=\"menubase\" must be an absolute URL: $base") unless ($base =~ m@^(?:(?:https?:)?//[^/]+)?/(.*)$@); $_ = $1; } else { $_ = $filename; } while (m@^[^/]+/(.*)$@) { $depth++; $_ = $1; } my $prefix; if ($base) { $prefix = $base; } else { $prefix = "../" x $depth; } # template-ize the tables that we use for headings # my ($prev_url, $next_url); ($new_body, $prev_url, $next_url) = handle_heading_table ($filename, $new_body, $prefix); $new_body =~ s/%%ROOT%%/$prefix/g; print STDERR "$progname: menuify: $filename: patched %%ROOT%% - $prefix\n" if ($verbose > 7); # Now handle <LINK>, <META> and <SCRIPT> tags in HEAD # { my $tlinks = ''; my $links = ''; $_ = $new_body; s@</HEAD>.*$@@s; # Lose the webbug, it confuses things. s@([ \t]*<SCRIPT TYPE="text/javascript">.*?</SCRIPT>[ \t]*\n?)@@si; $tlinks .= $1 while (s@([ \t]*<(LINK|META|SCRIPT)\b[^<>]+>[ \t]*\n?)@@si); $_ = $body; s@</HEAD>.*$@@s; # Lose the webbug, it confuses things. s@([ \t]*<SCRIPT TYPE="text/javascript">.*?</SCRIPT>[ \t]*\n?)@@si; $links .= $1 while (s@([ \t]*<(LINK|META|SCRIPT)\b[^<>]+>[ \t]*\n?)@@si); $links =~ s/[ \t]+$//mg; $links =~ s/\n+$//sg; $tlinks =~ s/[ \t]+$//mg; $tlinks =~ s/\n+$//sg; # Auto-generate/correct the LINK tags in backstage/log/ # if (defined ($prev_url) && !($prev_url =~ m@\./?$@)) { $links .= "\n <LINK REL=\"prev\" HREF=\"$prev_url\">"; } if (defined ($next_url) && !($next_url =~ m@\./?$@s)) { my $latest = ($filename =~ m@\d{4}\.html$@si ? "latest.html" : "../../latest.html"); $links .= "\n <LINK REL=\"next\" HREF=\"$next_url\">"; $links .= "\n <LINK REL=\"last\" HREF=\"$latest\">" } my $index_p = ($filename =~ m@(^|/)(index(-template)?|header)\.html(\.tmp)?$@); # update where the "up" link points based on the file name. # { my $log_p = ($filename =~ m@backstage/log/\d{4}/\d\d/\d\d\.html@); my $old_up; $old_up = $1 if ($links =~ m@\"up\"\s+HREF=\"([^\"]+)\"@i); my $new_up = (!$old_up || ($old_up =~ m@^[./]*$@) ? ($index_p ? "../" : "./") : $old_up); $tlinks .= "\n <LINK REL=\"up\" HREF=\"$new_up\">"; # Make sure the log files load the Facebook Javascript, # if it has a "fbcomments" tag in it. # if ($log_p && $new_body =~ m@\b(ID|CLASS)="fbcomments"@si) { $tlinks .= "\n <SCRIPT LANGUAGE=\"javascript\" SRC=\"../../comments.js\">"; } # No longer needed; the JS at the top of template.html does this async now. # # Make sure the file loads the Google +1 Javascript, # # if it has a "g-plusone" tag in it. # # # if ($new_body =~ m@\bCLASS="g-plusone"@si) { # $tlinks .= # "\n <SCRIPT LANGUAGE=\"javascript\"" . # " SRC=\"https://apis.google.com/js/plusone.js\">"; # } } # keep the links in the order seen, but let the ones in the target file # override the ones in the template file. # my %links_table; my @lkeys = (); my @mkeys = (); my @skeys = (); my $me_count = 0; my @stylesheets; my %stylesheets; my $alinks = $tlinks . $links; $alinks =~ s/>/>\001/gs; foreach (split (/\001/, $alinks)) { next if (m/^\s*$/s); my ($type, $key) = m/(REL|NAME|SRC|PROPERTY|HTTP-EQUIV)=\"([^\"]+)\"/si; error ("$filename: bad juju in LINK tag: $_\n") unless defined ($key); if ($key eq 'stylesheet') { my ($s) = m@HREF="([^"<>]+)"@si; error ("$filename: unparsable stylesheet: $_") unless $s; print STDERR "$progname: menuify: $filename: style: $s\n" if ($verbose > 8); # Omit cache-busting terms: treat foo.css?1 and foo.css?2 as the same. # Template file overrides target file, because $tlinks comes first. my $s2 = $s; $s2 =~ s@\?.*$@@s; push @stylesheets, " <LINK REL=\"stylesheet\" TYPE=\"text/css\" HREF=\"$s\">\n" unless ($stylesheets{$s2}); $stylesheets{$s2} = 1; next; } if ($key eq 'me') { # can be any number of "me" links. $key .= "$me_count"; $me_count++; } # Facebook apparently requires the "image_src" URL to be absolute. # This is, of course, completely undocumented. # if ($key eq 'image_src') { s@^(.*HREF=")([^"<>]+)(.*)$@{ my ($a, $u, $b) = ($1, $2, $3); $u = expand_url ($u, expand_url ($filename, $base || $base_url || $global_url_base)); $a . $u . $b; }@gsexi; } my $old = $links_table{$key}; if (defined ($old)) { if ($_ eq $old) { print STDERR "$progname: menuify: $filename: dup: $_\n" if ($verbose > 8); } else { print STDERR "$progname: menuify: $filename: NEW: $_\n" if ($verbose > 7); $links_table{$key} = $_; } } else { $links_table{$key} = $_; if ($type eq 'REL') { push @lkeys, $key; } elsif ($type eq 'SRC') { push @skeys, $key; } else { push @mkeys, $key; } print STDERR "$progname: menuify: $filename: new: $_\n" if ($verbose > 8); } } # Emit META tags, then LINK tags, then SCRIPT tags. $links = ''; foreach my $key (@mkeys, @lkeys, @skeys) { my $val = $links_table{$key}; $links .= "$val\n"; print STDERR "$progname: menuify: $filename: ==> $val\n" if ($verbose > 8); } if ($prefix eq '' && $index_p) { # no "Top" or "Up" on root page. $links =~ s@^\s*<LINK\s+REL=\"(top|up)\"[^<>]+>\s*\n@@gmi; print STDERR "$progname: menuify: $filename:" . " deleting root \"top\" and \"up\"\n" if ($verbose > 8); } $links =~ s/[ \t]+$//mg; $links =~ s/\n+$//sg; $links =~ s/^\n//mg; $links =~ s@(<SCRIPT[^<>]*>)\s*(\n|$)@$1</SCRIPT>$2@gsi; my $stylesheets = join ('', @stylesheets); my ($nh, $nb) = ($new_body =~ m@^(.*?</HEAD>)(.*)$@si); $nh =~ s@^[ \t]*<(LINK|META|SCRIPT)\b[^<>]+>[\t ]*\n@@mig; $nh =~ s@([ \t]*<TITLE)@$stylesheets$1@si; $nh =~ s@(\n[ \t]*</HEAD)@\n$links$1@si; $new_body = $nh . $nb; print STDERR "$progname: menuify: $filename: patched <LINK>\n" if ($verbose > 7); } # Preserve any "xmlns" crap in the HTML tag. my ($old_html_tag) = ($body =~ m@(<HTML[^<>]*>)@si); $new_body =~ s@<HTML[^<>]*>@$old_html_tag@si if ($old_html_tag); $new_body = normalize_embeds ($filename, $new_body); $new_body = protocol_relative_urls ($filename, $new_body); # fix up extra blank-line insertion stupidity $new_body =~ s/\n\n\n+/\n\n/gs; return $new_body; } sub protocol_relative_urls($$) { my ($filename, $html) = @_; # Use protocol-relative URLs in images and style sheets so that either # http or https work without warnings. $html =~ s@(< (IMG | SCRIPT) [^<>]* SRC = [\"\'] ) http: @$1@gsix; $html =~ s@(<LINK [^<>]* REL = [\"\'] ( [^\"\']+ ) [\"\'] [^<>]* HREF = [\"\'] ) ( [^\"\']+ ) @{ my ($head, $type, $url) = ($1, $2, $3); $url =~ s!^https?:!!s unless ($type =~ m/^( image_src | profile | publisher | me )$/six); $head . $url; }@gsexi; return $html; } sub url_quote($) { my ($u) = @_; $u =~ s|([^-a-zA-Z0-9.\@_\r\n])|sprintf("%%%02X", ord($1))|ge; return $u; } sub url_unquote($) { my ($url) = @_; $url =~ s/[+]/ /g; $url =~ s/%([a-z0-9]{2})/chr(hex($1))/ige; return $url; } # Converts &, <, >, " and any UTF8 characters to HTML entities. # Does not convert '. # sub html_quote($) { my ($s) = @_; return HTML::Entities::encode_entities ($s, # Exclude "=042 &=046 <=074 >=076 '^ \t\n\040\041\043-\045\047-\073\075\077-\176'); } # Convert any HTML entities to Unicode characters. # sub html_unquote($) { my ($s) = @_; return HTML::Entities::decode_entities ($s); } # expands the first URL relative to the second. # sub expand_url($$) { my ($url, $base) = @_; $url =~ s/^\s+//gs; # lose whitespace at front and back $url =~ s/\s+$//gs; if ($url =~ m@^[a-z]+:|^//@si) { print STDERR "$progname: absolute URL: $url\n" if ($verbose > 7); } else { $base =~ s@(\#.*)$@@; # strip anchors $base =~ s@(\?.*)$@@; # strip arguments $base =~ s@/[^/]*$@/@; # take off trailing file component my $tail = ''; if ($url =~ s@(\#.*)$@@) { $tail = $1; } # save anchors if ($url =~ s@(\?.*)$@@) { $tail = "$1$tail"; } # save arguments my $base2 = $base; $base2 =~ s@^(([a-z]+:)?//[^/]+)/.*@$1@si # url is an absolute path if ($url =~ m@^/@); my $ourl = $url; $url = $base2 . $url; $url =~ s@/\./@/@g; # expand "." 1 while ($url =~ s@/[^/]+/\.\./@/@); # expand ".." $url .= $tail; # put anchors/args back print STDERR "$progname: relative URL: $ourl --> $url\n" if ($verbose > 6); } return $url; } # converts all relative URLs in SRC= or HREF= to absolute URLs, # relative to the given base. # sub expand_urls($$) { my ($html, $base) = @_; $html =~ s/</\001</g; my @tags = split (/\001/, $html); foreach (@tags) { if (m/^(.*)\b(HREF|SRC)(\s*=\s*\")([^\"]+)(\".*)$/si) { my $head = "$1$2$3"; my $url = $4; my $tail = $5; $url = expand_url ($url, $base); $_ = "$head$url$tail"; } } return join ('', @tags); } # Patch Youtube embeds to all use the same parameters. # Likewise patch the Facebook "like" iframe. # sub normalize_embeds($$) { my ($filename, $body) = @_; $body =~ s@( ( <DIV \s+ CLASS="video"> \s* # old style <OBJECT[^<>]*> .*? </OBJECT> </DIV> ) | ( <DIV \s+ CLASS="video_floater"> \s* # new style <DIV \s+ CLASS="video_frame"> \s* <IFRAME[^<>]*> .*? </IFRAME> \s* </DIV> \s* </DIV> ))@{ my $embed = $1; my ($url) = ($embed =~ m/(?:SRC|DATA)=\"([^<>\"]+)\"/si); my ($title) = ($embed =~ m/TITLE=\"([^<>\"]+)\"/si); # my ($w) = ($embed =~ m/WIDTH=(\d+)/si); # my ($h) = ($embed =~ m/HEIGHT=(\d+)/si); my ($start) = ($url =~ m/start=(\d+)/si); if ($url !~ m!^(https?:)?//www\.justin\.tv/!s && $url !~ m!^(https?:)?//(www\.)?vimeo\.com/!s && $url !~ m!^(https?:)?//(www\.)?facebook\.com/!s) { $url =~ s/[?&].*$//s; my ($id) = ($url =~ m!/([^/]+)$!si); $url = "//www.youtube.com/embed/$id"; $url .= '?version=3'; # new hotness $url .= '&theme=dark'; # darker controls $url .= '&modestbranding=1'; # lose Youtube logo in controls $url .= '&fs=1'; # enable full screen button $url .= '&rel=0'; # turn off "related" mouseovers $url .= '&showsearch=0'; # turn off search field $url .= '&showinfo=0'; # turn off title overlay $url .= '&iv_load_policy=3'; # turn off annotations $url .= "&start=$start" if ($start); $url =~ s/\&/&/gsi; # URL-entity-quotify $embed = ("<DIV CLASS=\"video_floater\">" . "<DIV CLASS=\"video_frame\">" . "<IFRAME" . " CLASS=\"video_embed\"\n" . ($title ? " TITLE=\"$title\"\n" : "") . " SRC=\"$url\"\n" . "></IFRAME>" . "</DIV>" . "</DIV>"); } $embed; }@gsexi if ($body =~ m/<(IFRAME|OBJECT)/si); # faster search to bug out early my $forced_like = ''; # Don't rewrite URLs in calendar, blog index, justin.tv embed, # or mixtapes; or if --base was specified. # if (defined($base_url) || $filename =~ m|calendar/|s || $filename =~ m|backstage/log/\d{4}/\d\d/index\.html|s || $filename =~ m|webcast/justin\.tv\.html|s || $filename =~ m|xml/index\.html|s) { $forced_like = 1; } elsif ($filename eq 'index.html' || # top-level page $filename eq 'index-template.html' || # top-level template $filename eq 'xml/index.html') { # XML page my $facebook = 'dnalounge'; $facebook = 'dnapizza' if (`pwd` =~ m/dnapizza/); # Kludge $forced_like = 'https://www.facebook.com/' . $facebook; } elsif ($filename =~ m!flyers/(\d{4})/(\d\d)/(\d\d[ab])\.!s) { # Rewrite the "Like" URLs in flyer pages to point at the corresponding # calendar page instead. $forced_like = $global_url_base . "calendar/$1/$2-$3.html"; } # Rewrite Facebook Like buttons. # $body =~ s@(<IFRAME \s+ CLASS="fblike" [^<>]* > ( \s* </IFRAME> )? )@{ my $frame = $1; my ($url) = ($frame =~ m/HREF=([^<>&;"']+)/si); #error ("$filename: no URL in iframe: $frame") unless $url; $url = url_unquote($url || ''); my $url2; if ($forced_like eq 1) { $url2 = $url; } elsif ($forced_like) { $url2 = $forced_like; } else { # All other pages get URL set to "self". $url2 = $global_url_base . $filename; $url2 =~ s!(\.html)[^/]*$!$1!si; # ".html.tmp" -> ".html" $url2 =~ s!/index(-template)?\.html$!/!si; # "/index.html" -> "/" } print STDERR "$filename: changed URL from $url to $url2\n" if ($verbose > 7 && $url ne $url2); $url = ("https://www.facebook.com/plugins/like.php" . "?href=" . url_quote($url2) . "&layout=button_count" . "&show_faces=false" . "&action=like" . "&font=arial" . "&colorscheme=dark" . "&width=80" . "&height=20"); $url =~ s/&/&/gs; $frame = ("<IFRAME CLASS=\"fblike\"\n" . " SRC=\"$url\"\n" . " SCROLLING=NO" . " FRAMEBORDER=0" . " ALLOWTRANSPARENCY=TRUE>" . "</IFRAME>"); $frame; }@gsexi if ($body =~ m/<IFRAME/si); # faster search to bug out early # Rewrite Google Plus buttons. # $body =~ s@(<DIV \s+ CLASS="g-plusone" [^<>]* > ( \s* </DIV> )? )@{ my $frame = $1; my ($url) = ($frame =~ m/(?:DATA-)?HREF="([^<>&;"']+)"/si); #error ("$filename: no URL in plusone: $frame") unless $url; $url = url_unquote($url || ''); my $url2; if ($forced_like eq 1) { $url2 = $url; } elsif ($forced_like) { $url2 = $forced_like; } else { # All other pages get URL set to "self". $url2 = $global_url_base . $filename; $url2 =~ s!(\.html)[^/]*$!$1!si; # ".html.tmp" -> ".html" $url2 =~ s!/index(-template)?\.html$!/!si; # "/index.html" -> "/" } # Let's not aim the Google +1 button at Facebook... # This happens when $forced_like. # $url2 =~ s!^https?://www\.facebook\.com/([a-z]+)$!http://www.$1.com/!s; # WTF? $url2 =~ s!^https?://www\.facebook\.com/plugins/like.php\?href=!!s; print STDERR "$filename: changed URL from $url to $url2\n" if ($verbose > 7 && $url ne $url2); $frame = ("<DIV CLASS=\"g-plusone\"" . " DATA-SIZE=\"medium\"" . " DATA-COUNT=\"true\"" . " DATA-HREF=\"$url2\">" . "</DIV>"); $frame; }@gsexi if ($body =~ m/g-plusone/si); # faster search to bug out early # Convert all Facebook and Twitter URLs to https. # $body =~ s@(href="http)(://(www\.)?(facebook|twitter))@$1s$2@gsi; $body =~ s/^<!-- %%(MENU_HEADER|LEFT|BOTTOM)_START%% -->\n<!-- %%\1_END%% -->\n//gm; return $body; } my $scan_next_prev_p = 0; sub scan_next_prev($) { my ($dir) = @_; return if $scan_next_prev_p; $scan_next_prev_p = 1; opendir (my $ldir, $dir) || error ("$dir: $!"); my $prev_month; my $prev_day; $next_link{'1906-1998.html'} = '1998-1999.html'; $prev_link{'1998-1999.html'} = '1906-1998.html'; $prev_month = '1998-1999.html'; foreach my $year (sort (readdir ($ldir))) { next unless ($year =~ m/^\d{4}$/s); opendir (my $ydir, "$dir/$year") || error ("$dir/$year: $!"); foreach my $month (sort (readdir ($ydir))) { next unless ($month =~ m/^\d\d$/s); $month = "$year/$month"; $prev_link{$month} = $prev_month if $prev_month; $next_link{$prev_month} = $month if $prev_month; $prev_month = $month; opendir (my $mdir, "$dir/$month") || error ("$dir/$month: $!"); foreach my $day (sort (readdir ($mdir))) { next unless ($day =~ m/^\d\d\.html$/s); $day = "$month/$day"; $prev_link{$day} = $prev_day if $prev_day; $next_link{$prev_day} = $day if $prev_day; $prev_day = $day; } closedir ($mdir); } closedir ($ydir); } closedir ($ldir); } # Do the special-case magic to emit all the HTML marked by: # %%HEADING%%, %%SUBHEADING%%, %%BLOG_HEADER%% and %%BLOG_FOOTER%%. # These are all big hairy tables, some with navigation links in them. # # Returns: "new_body", "prev_url" (or undef), "next_url" (or undef) # sub handle_heading_table($$$) { my ($filename, $body, $prefix) = @_; my $obody = $body; my ($purl, $nurl, $lines); my $new_body = ""; while ($body =~ m@^(.*?<!-- \s*%%(HEADING|SUBHEADING|BLOG_HEADER|BLOG_FOOTER) %%\s*-->\s*)(.*)$@sx) { $new_body .= $1; my $tag = $2; $body = $3; my $heading_p = ($tag =~ m/HEADING/); my $subheading_p = ($heading_p && $tag =~ m/SUBHEADING/); $_ = $new_body; my ($indent) = m/([ \t]*)$/s; $new_body =~ s/[ \t]*$//s; # delete spaces at beginning of heading line $_ = $body; if ($heading_p) { m@^(.*?)(\n\s*\n.*)@s || error ("$filename: no blank line after %%$tag%% block?"); my $heading = $1; $body = $2; my $text = $heading; 1 while ($text =~ s@<[^<>]+>@@g); $text =~ s/ / /g; $text =~ s/\s+/ /gs; $text =~ s/(^\s+|\s+$)//gs; if ($subheading_p) { my $index_p = ($filename =~ m@/index\.html@s); print STDERR "$filename: subheading: $text\n" if ($verbose > 7); my ($dd) = $text =~ m/^\s*(\d\d?)\b/s; error ("$filename: no dotm in \"$text\"") unless $dd; $dd = sprintf("%02d", $dd); my ($date, $subj) = ($text =~ m/^(\d\d?[- ][a-z]+[- ]\d{4} \([a-z]+\))\s*(.*)$/si); error ("$filename: unparsable date: \"$text\"") unless $date; if ($index_p) { $date = ("<A NAME=\"$dd\" HREF=\"$dd.html\" CLASS=\"subD\">" . "$date</A>"); } else { $date = "<SPAN CLASS=\"subD\">$date</SPAN>"; } my $url = "$global_url_base$filename"; $url =~ s@/index\.html[^/]*$@/$dd.html@s; $url = ("https://www.facebook.com/plugins/like.php?href=" . url_quote($url)); my $like = "<IFRAME CLASS=\"fblike\" SRC=\"$url\"></IFRAME>"; $like = ("<DIV CLASS=\"likers fblikeBG\">" . "$like" . "<DIV CLASS=\"plusonebox\">" . "<DIV CLASS=\"g-plusone\"". " DATA-SIZE=\"medium\"" . " DATA-COUNT=\"true\"" . " DATA-HREF=\"$url\">" . "</DIV>" . "</DIV>" . "</DIV>"); $subj = "<BR>" unless $subj; $heading = ("<DIV CLASS=\"sub\">\n" . " $date\n" . " $like\n" . " $subj\n" . "</DIV>"); my $month_p = ($filename =~ m@\d\d/index\.html@s); if ($month_p) { my ($year, $mon) = ($filename =~ m@\b(\d{4})/(\d\d)\b@); $mon = $months[$mon-1]; $mon =~ s/^(...).*$/$1/s; $text = "$mon $year"; } else { $text =~ s@<[^<>]*>@@gs; } $new_body =~ s@(<TITLE>).*?() @$1DNA Lounge: $text$2@sx; # Make sure there's a paragraph break after a subheading. $body = "

    \n$body" unless ($body =~ m/^\s*

    /si); } else { print STDERR "$filename: heading: $text\n" if ($verbose > 7); $heading = ("

    " . "" . $text . "" . "
    " . "

    \n"); } $heading =~ s/^/$indent/gm; $new_body .= $heading; } else { # BLOG_HEADER or BLOG_FOOTER my $header_p = ($tag =~ m/HEADER/); my $footer_p = ($tag =~ m/FOOTER/); my $special_p = ($filename =~ m/\b(1906|1998)-/s); m@^(.*?\n\n)(.*)$@s || error ("$filename: no /DIV line after %%$tag%% block?"); my $heading = $1; $body = $2; my ($year, $mon, $day, $pyear, $pmon, $pday, $nyear, $nmon, $nday); my $month_p = ($filename =~ m@\d\d/index\.html@s); my ($dir, $ff) = ($filename =~ m@^(.*)/(\b\d{4}/.*)$@s); $ff =~ s@/[^/]*$@@s if ($month_p); # Kludge ($dir, $ff) = ($filename =~ m@^(.*)/(19\d\d-.*)$@s) if ($special_p); if ($dir) { my $ff2 = "$dir/$ff"; $ff2 =~ s@/[^/]+$@@s; $ff2 =~ s@/[^/]+$@@s unless ($special_p); $ff2 =~ s@/[^/]+$@@s unless ( $special_p || $month_p); scan_next_prev ($ff2); } my $prev = $ff ? $prev_link{$ff} : undef; my $next = $ff ? $next_link{$ff} : undef; $prev = '' unless $prev; $next = '' unless $next; if ($month_p || $special_p) { ($year, $mon) = ($ff =~ m@\b(\d{4})/(\d\d)/?$@s); ($pyear, $pmon) = ($prev =~ m@\b(\d{4})/(\d\d)/?$@s); ($nyear, $nmon) = ($next =~ m@\b(\d{4})/(\d\d)/?$@s); } elsif ($ff) { ($year, $mon, $day) = ($ff =~ m@\b(\d{4})/(\d\d)/(\d\d)\.html@); ($pyear, $pmon, $pday) = ($prev =~ m@\b(\d{4})/(\d\d)/(\d\d)\.html@); ($nyear, $nmon, $nday) = ($next =~ m@\b(\d{4})/(\d\d)/(\d\d)\.html@); } if ($pyear) { if ($year == $pyear && $mon == $pmon) { $prev = "$pday.html"; } elsif ($year == $pyear) { $prev = ($month_p ? "../$pmon/" : "../$pmon/$pday.html"); } else { $prev = ($month_p ? "../../$pyear/$pmon/" : "../../$pyear/$pmon/$pday.html"); } } if ($nyear) { if ($special_p) { $next = "$next/"; } elsif ($year == $nyear && $mon && $mon == $nmon) { $next = "$nday.html"; } elsif ($year == $nyear) { $next = ($month_p ? "../$nmon/" : "../$nmon/$nday.html"); } else { $next = ($month_p ? "../../$nyear/$nmon/" : "../../$nyear/$nmon/$nday.html"); } } if ($pmon) { $pmon = $months[$pmon-1]; $pmon =~ s/^(...).*$/$1/s; } if ($nmon) { $nmon = $months[$nmon-1]; $nmon =~ s/^(...).*$/$1/s; } my ($prev2, $next2); if ($month_p || $special_p) { $prev2 = "<< $pmon" if ($pmon); $next2 = "$nmon >>" if ($nmon); } else { $prev2 = "<< $pday $pmon" if ($pday); $next2 = "$nday $nmon >>" if ($nday); } if ($special_p) { if ($filename =~ m/1998-/s) { $prev = "1906-1998.html"; $prev2 = "<< 1906-1998"; $next2 = "2000 >>"; } else { $prev = "./"; $prev2 = "<< Top"; $next = "1998-1999.html"; $next2 = "1998-1999 >>"; } $next =~ s@^\.\./\.\./@@s; } elsif (!$prev2 && $month_p && !$special_p) { $prev = "../../1998-1999.html"; $prev2 = "<< 1998-1999"; } if (!$next && !$special_p) { $next = "../../"; $next2 = "Top >>"; } $purl = $prev if $prev; # return values $nurl = $next if $next; $prev = ($prev2 ? "$prev2" : ""); $next = ($next2 ? "$next2" : ""); if ($header_p) { $next .= ("\n" . " " . "RSS\n "); } $heading = ("

    \n" . "
    $prev
    \n" . "
    $next
    \n" . ($header_p ? (" DNA Sequencing
    \n" . " \n" . " with your host
    \n" . " \n" . " Jamie Zawinski
    \n") : "") . "
    \n" . "\n"); $heading =~ s@"\.\./\.\./@"@gs unless ($filename =~ m@/\d{4}/\d\d/@s); $heading =~ s@(.*?)@@gsi; # WTF. if (! $header_p) { # Don't emit a footer for single-entry files unless there # are more than N lines in that entry. It looks dumb otherwise. # if (!$month_p && !$special_p && !defined($lines)) { $lines = count_html_lines ($obody); } # no footers on single-day files. $heading = "
    \n
    \n" unless ($month_p || $special_p || $lines > 60); } $new_body .= $heading; } } $new_body .= $body; # pick up tail of file return ($new_body, $purl, $nurl); } sub diff_files($$) { my ($file1, $file2) = @_; my @cmd = ("diff", "-NU2", "$file1", "$file2"); system (@cmd); my $exit_value = $? >> 8; my $signal_num = $? & 127; my $dumped_core = $? & 128; error ("$cmd[0]: core dumped!") if ($dumped_core); error ("$cmd[0]: signal $signal_num!") if ($signal_num); return $exit_value; } # Write the body to the file, if it differs from what is already there. # Returns 1 if anything was written. # sub write_file_1($$) { my ($file, $body) = @_; $file =~ s@^\./@@; # take off leading "./" # If files are ASCII only, read, write and compare them as binary for speed. # Otherwise we need to do the full Unicrud dance. # my $utf8_p = (! ($body =~ m/^[\000-\176]*$/s)); my $enc = ($utf8_p ? 'utf8' : 'raw'); my $obody = ''; if (open (my $in, "<:$enc", $file)) { print STDERR "$progname: menuify: comparing $file\n" if ($verbose > 7); local $/ = undef; # read entire file $obody = <$in>; close $in; } if ($obody eq $body) { print STDERR "$progname: menuify: $file: unchanged\n" if ($verbose > 1); return 0; } my $file_tmp = "$file.tmp"; # When creating a file, make permissions match the parent directory # by computing a umask from the directory's permissions. my $dir = $file; $dir =~ s@(^|/)[^/]*$@@s; $dir = "." unless $dir; my $perms = ((stat($dir))[2] || 0) & 0666; my $oldu = umask (~$perms & 0777); unlink ($file_tmp); open (my $out, ">:$enc", $file_tmp) || error ("$file_tmp: $!"); (print $out $body) || error ("$file_tmp: write: $!");; close $out; umask $oldu; if ($debug) { print STDOUT "\n" . ('#' x 79) . "\n"; diff_files ("$file", "$file_tmp"); unlink "$file_tmp"; print STDERR "$progname: menuify: $file: unchanged\n" if ($verbose > 1); return 1; # would have changed file in non-debug, so say so. } if (!rename ("$file_tmp", "$file")) { unlink "$file_tmp"; error ("mv $file_tmp $file: $!"); } print STDERR "$progname: menuify: wrote $file\n" if ($verbose); return 1; } # If this is an HTML file, menuify the body. # Then write it to the file if it differs from what is already there. # Returns 1 if anything was written. # If $file is '-' then just returns the HTML instead. # sub write_file($$) { my ($file, $body) = @_; if ($file =~ m@\.html(\.[^/]+)?$@si || # HTML file: wrap it. $file eq '-') { parse_template_file (); my $base = $base_url; $base = $1 if ($body =~ m@ 1), ($validate > 1)); } } if ($file eq '-') { return $body; } else { return write_file_1 ($file, $body); } } # The HTML validator generates an error if we use a tag that isn't # in this list (in order to detect typos.) # Mostly duplicated in calendar/generate-calendar.pl # my %allowed_tags = ( "A" => 1, "ABBR" => 1, "AREA" => 2, "B" => 1, "BASE" => 2, "BLOCKQUOTE" => 1, "BODY" => 1, "BR" => 3, "DD" => 1, "DIV" => 1, "DL" => 1, "DT" => 1, "FORM" => 1, # "FRAMESET" => 1, "FRAME" => 2, "H1" => 1, "H2" => 1, "H3" => 1, "H4" => 1, "HEAD" => 1, "HR" => 2, "HTML" => 1, "I" => 1, "IMG" => 3, "INPUT" => 3, "LI" => 1, "LINK" => 3, "MAP" => 1, "META" => 3, "NOBR" => 1, # "NOFRAMES" => 1, "NOSCRIPT" => 1, "OL" => 1, "OPTION" => 1, "P" => 3, "PRE" => 1, "SCRIPT" => 1, "SELECT" => 1, "SPAN" => 1, "STRIKE" => 1, "STYLE" =>1, "SUP" => 1, "TABLE" => 1, "TEXTAREA" => 1, "TD" => 1, "TH" => 1, "TITLE" => 1, "TR" => 1, "TT" => 1, "U" => 1, "UL" => 1, "WBR" => 2, "OBJECT" => 1, "PARAM" => 1, "IFRAME" => 1, ); # Does some simple syntax-checking on the HTML: makes sure tags are # balanced, etc. # Mostly duplicated in calendar/generate-calendar.pl # sub validate_html($$) { my ($filename, $html) = @_; my @stack = (); my $debug = 0; # Kludge for doctype: turn it into a comment. # $html =~ s@^(\s*]*>)@@s; # Kludge for PHP embeds: turn them into comments. # # $html =~ s@(<\?.*?\?>)@@gs; # Kludge for self-closed tags like : map to # $html =~ s@(<([a-z:]+)\b[^<>]*?)/>@$1>@gsi; # lose text inside comments (but keep the newlines, for line numbering)... # $html =~ s@()@{ my ($a, $b, $c) = ($1, $2, $3); $b =~ s/^.*$//gm; "$a$b$c"; }@gse; $html =~ s/()//g; # lose comment tags themselves # lose text inside )@{ my ($a, $b, $c) = ($1, $2, $3); $b =~ s/^.*$//gm; "$a$b$c"; }@gsei; my $lineno = 1; my $upcoming_lines = 0; # Simpleminded check for stray ampersands and mis-typed entities. { my $ents = $html; $ents =~ s/&/\001&/gi; # Allow non-entity ampersands inside URLs. Technically illegal. $ents =~ s/\bHREF=\"[^\"\n]+\"//gsi; my @ents = split(m/\001/, $ents); my $count = 0; foreach my $e (@ents) { $lineno += $upcoming_lines; $upcoming_lines = -1; foreach (split ("\n", $e)) { $upcoming_lines++; } next if ($count++ == 0); if ($e !~ m/^&([a-z]+\d*|#\d\d+|#x[\da-f][\da-f]+);/si) { $e =~ s/^([^\s]*\s+[^\s<>]*)\b.*$/$1/s; error ("$filename: $lineno: non-entity ampersand: \"$e\""); } } } $html =~ s/\n/\001 /gs; $html =~ s/\s]+)\s*([^<>]*)/; next unless defined ($tag); $tag = uc($tag); error ("$filename: $lineno: broken tag: <$tag>") if (m/^<[^<>]*(<|$)/ || # < closed by < or EOL m/^<[^<>\"]*\"[^<>\"]*(>|$)/); # only one " before > or EOL my $t2 = $tag; $t2 =~ s@^/@@; my $code = $allowed_tags{$t2}; if ($tag =~ m@^/@) { # closing a tag my $otag = pop @stack; my $popped = 1; if (!defined ($otag)) { error ("$filename: $lineno: extranious <$tag>"); } elsif ($tag ne "/$otag") { my $t2 = $tag; $t2 =~ s@^/@@; $code = $allowed_tags{$t2}; if ($code && $code == 2) { error ("$filename: $lineno: unexpected close-tag form: <$tag>"); } elsif ($code && $code == 3) { # Optional close tag (e.g.,

    , ). # This isn't our closer. Put it back. push @stack, $otag; $popped = 0; } else { error ("$filename: $lineno: <$otag> closed by <$tag>"); } } print STDERR "$lineno: " . ($popped ? "POP " : "SKIP") . " " . ('. ' x ($#stack+1)) . "/$otag\n" if ($debug); } elsif (! $code) { error ("$filename: $lineno: unknown tag <$tag>"); } else { # if ($tag eq 'TD' && $attrs !~ m/\bALIGN=/) { # print STDERR "$filename: $lineno: <$tag> has no ALIGN attribute\n"; # } my $otag = $stack[$#stack] || ''; if ( ($tag eq 'TD' && $otag ne 'TR') || ($tag eq 'TH' && $otag ne 'TR') || ($tag eq 'TR' && $otag ne 'TABLE') || ($tag eq 'DT' && $otag ne 'DL') || ($tag eq 'DD' && $otag ne 'DL') || ($tag eq 'FRAME' && $otag ne 'FRAMESET') || ($otag eq 'TR' && $tag ne 'TD' && $tag ne 'TH') || ($otag eq 'TABLE' && $tag ne 'TR') || ($otag eq 'FRAMESET' && $tag ne 'FRAME' && $tag ne 'FRAMESET') || ($otag eq 'OPTION' && $tag eq 'OPTION') || ($otag eq 'LI' && $tag eq 'LI') || ($otag eq 'A' && $tag eq 'A') || ($otag eq 'A' && $tag eq 'UL') || ($otag eq 'A' && $tag eq 'OL') # || ($otag ne 'HEAD' && $tag eq 'STYLE') ) { error ("$filename: $lineno: <$tag> not allowed inside <$otag>"); } if ($code == 2 || $code == 3) { # ignore these -- don't insist on closing tags. print STDERR "$lineno: SKIP " . ('. ' x ($#stack+1)) . "$tag\n" if ($debug); } else { # opening a new tag print STDERR "$lineno: PUSH " . ('. ' x ($#stack+1)) . "$tag\n" if ($debug); push @stack, $tag; } } } if ($#stack >= 0) { error ("$filename: $lineno: unclosed tags: <" . join (">, <", @stack) . ">"); } } my %image_size_cache = (); # Returns the width and height of the image, error if it doesn't exist. # With 2 args: first arg is the URL of the image and second arg is the # contents of that URL. # sub image_size($;$) { my ($file, $body) = @_; my $cache = $image_size_cache{$file}; return @{$cache} if $cache; my $file2 = $file; if ($file =~ m@^[a-z]+://@si) { error ("$file: no body") unless $body; my ($suf) = ($file =~ m@\.([a-z\d]+)$@si); $file2 = sprintf ("%s/img-%08x.%s", ($ENV{TMPDIR} ? $ENV{TMPDIR} : "/tmp"), rand(0xFFFFFFFF), $suf); open (my $out, '>:raw', $file2) || error ("$file2: $!"); print $out $body; close $out; print STDERR "$progname: $file: wrote $file2\n" if ($verbose > 5); } else { error ("$file does not exist") unless ($debug || -f $file); return (0, 0) unless -f $file; } my $cmd = ('identify' . ' -define pdf:use-trimbox=true' . ' -density 300x300' . ' -format "%wx%h\n"' . ' "' . $file . '[0]"'); print STDERR "$progname: executing: $cmd\n" if ($verbose > 4); my $result = `$cmd`; print STDERR "$progname: ==> $result\n" if ($verbose > 4); unlink $file2 unless ($file eq $file2); my ($w, $h) = ($result =~ m/^(\d+)x(\d+)\s*$/); error ("no size: $file") unless ($w && $h); my @c = ($w, $h); $image_size_cache{$file} = \@c; return ($w, $h); } # Checks that the IMG tag has width and height matching the image. # sub validate_image($$$$) { my ($filename, $img, $body, $tag_opts) = @_; return if ($img =~ m@(^|/)logo\.gif$@s); # Kludge return if ($img =~ m@(^|/)(facebook|twitter)\.png$@s); # Kludge my ($fw, $fh) = image_size ($img, $body); my ($tw) = ($tag_opts =~ m@\b WIDTH \s* = \s* [\"\']? (\d+)@six); my ($th) = ($tag_opts =~ m@\b HEIGHT \s* = \s* [\"\']? (\d+)@six); ($tw) = ($tag_opts =~ m@\b width: \s* (\d+) \s* px \b@six) unless $tw; ($th) = ($tag_opts =~ m@\b height: \s* (\d+) \s* px \b@six) unless $th; if (!$tw || !$th) { # error ("$filename: $img: missing width/height (${fw}x$fh)") print STDERR "$progname: $filename: $img: missing width/height (${fw}x$fh)\n"; } elsif ($fw == $tw && $fh == $th) { print STDERR "$progname: $filename: size good (${tw}x$th)\n" if ($verbose > 3); } else { # error ("$filename: ${fw}x$fh, not ${tw}x$th"); print STDERR "$progname: $filename: $img: ${fw}x$fh, not ${tw}x$th\n"; } } # Hit the web site to check whether the URL exists. # my %validate_cache; sub validate_url($$$$) { my ($filename, $url, $images_p, $tag_opts) = @_; $url =~ s@#.*$@@s; # no anchors my $ret = $validate_cache{$url}; if (defined ($ret)) { print STDERR "$progname: $filename: cached: $url\n" if ($verbose > 4); } else { my $ua = LWP::UserAgent->new; $ua->agent("$progname/$VERSION"); print STDERR "$progname: $filename: checking: $url\n" if ($verbose > 3); $images_p = 0 if ($images_p && $url !~ m@\.(p?jpe?g|gif|png)$@si); my $res = ($images_p ? $ua->get ($url) : $ua->head ($url)); $ret = ($res && $res->code) || 'null'; $validate_cache{$url} = $ret; if ($ret ne '200') { print STDERR "$progname: $filename: $url: status: $ret\n"; return; } validate_image ($filename, $url, $res->content, $tag_opts) if ($images_p); } } # Checks for broken A and IMG tags. # $urls_p: whether to validate URLs, or only relative links to local files. # sub validate_links($$$$) { my ($filename, $html, $urls_p, $images_p) = @_; my @errors = (); return if ($filename eq 'header.html'); # Kludge. $html =~ s///gs; foreach (split (m/]*)>/si; my ($tag, $opts) = ($1, $2); my ($kind, $q, $url) = ($opts =~ m/(SRC|HREF|ONCLICK)=(["'])([^<>]+?)\2/si); next if ($opts =~ m/\b(ONCLICK)=/si); if (! $url) { print STDERR "$progname: menuify: $filename: skip: $tag $opts\n" if ($verbose > 8); next if ($opts =~ m/\b(NAME|TITLE)=/si); error ("$filename: unparsable " . uc($tag) . " tag: $opts"); } if ($url =~ m@^[a-z]+:|^/@si) { # has protocol or is web-rooted if ($filename =~ m@^[a-z]+:@si) { # base is a URL too $url = expand_url ($url, $filename); } else { if ($url !~ m@^[a-z]+:@si) { # no base to expand against print STDERR "$progname: $filename: skip absolute URL: $url\n" if ($verbose > 2); next; } } } if ($url =~ m@^(mailto|webcal|about|ftp|news|javascript|mms):@si || (!$urls_p && $url =~ m@^(https?):@si)) { print STDERR "$progname: $filename: skip URL: $url\n" if ($verbose > 2); next; } my $f2 = $filename; $f2 =~ s@/[^/]*$@/@s; $f2 =~ s@^[^/]*$@@s; if ($url =~ m@^[a-z]+:|^/@s) { $f2 = $url; } else { $f2 .= $url; } 1 while ($f2 =~ s@[^/]+[^./]/\.\./@@s); $f2 =~ s@\#.*$@@s; $f2 =~ s@\?.*$@@s; next if ($f2 eq ''); # points at "./" next if ($f2 eq '/'); if ($f2 =~ m@\.\./@s) { push @errors, "$filename: too many ..'s in \"$url\""; next; } my $this_images_p = ($images_p && $tag =~ m/^IMG$/si); if ($urls_p && $f2 =~ m@^([a-z]+://[^/]+/?)@si) { if ($filename =~ m@^\Q$1@s) { # URLs on same site validate_url ($filename, $f2, $this_images_p, $opts); } else { print STDERR "$progname: $filename: skip remote URL: $url\n" if ($verbose > 2); } next; } # Relative URL, but we're validating HTML from a URL. next if ($f2 =~ m@^https?:@si && $filename =~ m@^https?:@si); # Kludge next if ($f2 =~ m@^(calendar|flyers|gallery|backstage/log)(/\.)?/latest\.html$@s); next if ($f2 eq 'calendar/index.html'); next if ($f2 eq 'webcast/archive/'); next if ($f2 eq 'webcast/mixtapes/'); next if ($f2 eq 'backstage/src/archiver/archive.rss'); next if ($f2 eq '/dnalounge.css'); $f2 =~ s/%20/ /g; $f2 =~ s/&/&/g; if (! ($f2 =~ m@^(.*)/$@s ? -d $1 : -f $f2)) { push @errors, "$filename: $f2 does not exist"; next; } print STDERR "$progname: $filename: link: $f2\n" if ($verbose > 2); validate_image ($filename, $f2, undef, $opts) if ($this_images_p); } if (@errors) { my $err = shift @errors; foreach (@errors) { $err .= "\n$progname: $_"; } error ($err); } } # Passes the command to the shell, and prints its output (stdout and stderr) # to stdout inside a PRE. HTML-quotes the output, and tries to avoid too # much buffering. # sub cgi_exec($) { my ($cmd) = @_; $|=1; local %ENV = %ENV; delete $ENV{REQUEST_URI}; # So that exec'd programs do normal error(); # $ENV{LANG} = 'en_US' unless $ENV{LANG}; # Not set under Apache. $cmd = "( $cmd ) 2>&1"; open (my $pipe, '-|', $cmd) || error ("pipe: $cmd: $!"); # Someone is buffering the first 1k of the document! Blow through that # so that we can see output of sub-commands in real-time. # print STDOUT "\n"; print STDOUT "

    \n";
    
      while (<$pipe>) {
        $_ = html_quote($_);
        $_ =~ s@^(.*\bWARNING\b.*|sh:.*)$@$1@gm;
        $_ =~ s/\n/
    /gs; print STDOUT $_; } print STDOUT "
    \n"; close $pipe; } my $inside_error_p = 0; sub error($;$); sub error($;$) { my ($err, $http) = @_; die "RECURSIVE: $err" if $inside_error_p; $inside_error_p++; if (defined($ENV{REQUEST_URI})) { my $html = ''; my $stat = 400; my $tmpl = 500; my $file = "$tmpl.html"; if ($http) { if ($http =~ m/^(\d\d\d)[:\s]/s) { $stat = $1; } } $file = "../$file" unless (-f $file); $file = "../$file" unless (-f $file); $file = "../$file" unless (-f $file); $file = "/var/www/dnalounge/$tmpl.html" unless (-f $file); open (my $in, '<', $file) || error ("$file: $!"); print STDERR "$progname: reading $file\n" if ($verbose > 7); local $/ = undef; # read entire file $html = <$in>; close $in; $err =~ s/^error:\s+//si; $err =~ s/^(.)/\u$1/s; $err .= '.' if ($err =~ m/[a-z\d]$/s); my $title = $http ? $http : $err; foreach ($err, $title) { $_ = html_quote ("Error: $_"); } $html =~ s@()[^<>]*@$1DNA Lounge: $title@si; # $html =~ s@\b$tmpl\b@$stat@gs; $html =~ s@\b$tmpl\b@ERROR@gs; $err =~ s/^error:\s*//si; $err .= "<BR><BR>"; $html =~ s@(</TR>\s*<TR>\s*<TD[^<>]*>\s*).*?(\s*<P>)@$1$err$2@si; print "Status: $stat\n"; print "Content-Type: text/html\n\n"; print $html; exit (0); } else { print STDERR "$progname: ERROR: $err\n"; exit 1; } } 1;