#!/usr/bin/perl -w # Copyright © 2000-2008 Jamie Zawinski . # # Permission to use, copy, modify, distribute, and sell this software and its # documentation for any purpose is hereby granted without fee, provided that # the above copyright notice appear in all copies and that both that # copyright notice and this permission notice appear in supporting # documentation. No representations are made about the suitability of this # software for any purpose. It is provided "as is" without express or # implied warranty. # # Generates DNA Lounge calendar pages. # depends on "utils/menuify.pl", "cmp". # Created: 29-Nov-2000. ############################################################################ # # Syntax of the calendar.txt file: # # DOTM-MONTH-YEAR # DESCRIPTION # # Every DOTW from START-DATE to END-DATE # DESCRIPTION # # Every NTH DOTW from START-DATE to END-DATE # DESCRIPTION # e.g., "every first,third friday" of each month. # "first", "second", and "third" mean the obvious thing. # "last" is always the last one in the month (4th or 5th.) # "fifth" is the second-to-last one, in months that have # 5 of them (it is the "extra friday".) # "fourth" is never used. # # Holiday DOTM-MONTH[-YEAR] DESCRIPTION # # Holidays whose text begins with an asterisk are assumed to be federal # holidays, meaning if it falls on a sunday or monday, we note Sunday # events as being on a 3-day weekend. # # If there is an entry for a specific date, it overrides any "Every" # entries that might also apply. If multiple "every" entries apply # to a given date, the later one in the file wins. # # Descriptions are HTML, and can be multiple lines. # Each line of a description must be indented at least one space. # # The left column (title/genre/times/price) and right column (performers) # are separated by a blank line. # # The HTML text within a column does not wrap: use
and

liberally. # # There are various custom HTML-like tags that are used in the HTML that # are converted to "real" HTML when the actual files are generated, like # to indicate DJ names. # # Holiday descriptions show up over in the "Date" box. (Really this ought # to be done more automatically, but I don't know of a command-line program # that will compute those dates for me.) # # # More precisely: # # file := [ entry ]* # entry := static_event | recurring_event | holiday # # static_event := day_of_month_line event_data # day_of_month_line := dotm month year '(' dotw ')' '\n' event_data # event_data := [ hspace text '\n' ]* # # recurring_event := recurrence_line [ recurring_event_data ]* range # recurrence_line := 'every' [ repeat_words ]? day_words '\n' # day_words := day_word [ ',' day_word ]* # repeat_words := nth_of_month_words | alternating_word # nth_of_month_words := nth_of_month_word [ ',' nth_of_month_word ]* # nth_of_month_word := 'first' | 'second' | 'third' | 'fifth' | 'last' # alternating_word := 'other' # (note that there is no way to express # "every 3 weeks"; one can only express # "every 3rd week of the month".) # range := 'from' range_date [ 'to' range_date ]? # range_date := dotm '-' month '-' year # recurring_event_data := hspace text '\n' # # holiday := 'holiday' holiday_date text # holiday_date := dotm '-' month [ '-' year ] # # month := 'january' | 'february' | ... | 'december' | # 'jan' | 'feb' | ... | 'dec' # dotm := '1' | '2' | ... | '31' # year := '2000' | '2001' | ... # hspace := [ ' ' | '\t' ]+ # # # Examples: # # 2 January 2001 (tue) # blah blah blah # 18 January 2001 (thu) # All work and no play makes Jack a dull boy. # All work and no play makes Jack a dull boy. # # 27 feb 2001 (tue) # something else # # Every tuesday from 3-Jan-2001 # blah blah blah # # Every other friday from 3-Jan-2001 -- NOT YET IMPLEMENTED # blah blah blah # # Every first,third wednesday from 3-Jan-2001 # blah blah blah # blah blah blah # # Every fifth saturday from 3-Jan-2001 to 3-March-2001 # blah blah blah # # Holiday 02-Feb Groundhog Day # # Note that, in a month that has 5 saturdays, they are numbered # "first", "second", "third", "fifth", "last", because the "extra" # saturday goes before the "last" saturday. I think this is a pretty # stupid way to go about it, but I'm told that's how it's done. # # Specifying the day of the week on the line for static entries isn't # strictly necessary, but it functions as a checksum (we can error if # the dotm and dotw don't match.) # ############################################################################ require 5; use diagnostics; use strict; use bytes; use Text::Wrap; # Utter foulness! Without this, [:upper:] doesn't work on Latin1 characters. use locale; use POSIX qw(locale_h mktime); setlocale(LC_ALL, "en_US"); ############################################################################## # # Configuration and stuff # ############################################################################## my $progname = $0; $progname =~ s@.*/@@g; my $version = q{ $Revision: 1.392 $ }; $version =~ s/^[^0-9]+([0-9.]+).*$/$1/; my $exec_dir = $0; $exec_dir =~ s@/[^/]*$@@; my $exec_dir2 = "$exec_dir/../utils/"; my $data_dir = $exec_dir; my $verbose = 0; my $debug_p = 0; # this means "don't alter any files, print diffs instead" my $calendar_data_file = "$data_dir/calendar.txt"; my $calendar_upcoming_file = "upcoming.html"; my $calendar_overview_file = "overview.html"; my $calendar_rss_file = "dnalounge.rss"; my $calendar_ical_file = "dnalounge.ics"; my $calendar_crontab_file = "crontab.txt"; my $calendar_weekly_file = "weekly.txt"; my $calendar_weekly_prolog = "prolog.txt"; my $calendar_infoline_file = "infoline.txt"; my $calendar_ledger_file = "ledger.html"; my $calendar_names_file = "names.txt"; my $calendar_tickets_file = "../tickets/index.html"; my $store_file = "../store/index.html"; my $template_file = "$exec_dir2/template.html"; my @menuify_cmd = ("$exec_dir2/menuify.pl", $template_file); my $calendar_date_block_bg = "#004400"; # for the box with the date in it my $calendar_grid_dim_fg = "#666"; # de-emphasized dotms in cal overview my $fill_column = 70; # no plain-text lines longer than this # For the calendar html pages. my $page_title = "DNA Lounge Calendar"; my $url_base = "http://www.dnalounge.com/"; my $ticket_form_url = "https://cart.dnalounge.com/order/"; my $ticket_url_base = "$ticket_form_url?item="; my $vip_price = '$275'; my $css_html = "\n"; my $colorless_css_html = # Style sheet used in email and RSS. "\n"; my $xml_link_tag = " \n"; my $calendar_top_blurb = ("

\n" . "

\n" . "
\n" . "All events are 21+, and a valid photo ID required.\n" . "

\n" . "Having a birthday party?\n" . "VIP table service is available!\n" . "

\n" . "You can have these events automatically show up in your\n" . "calendar or PDA by subscribing to our\n" . "iCal feed.\n" . "

\n" . "There is also an\n" . "alphabetical list\n" . "of all bands and djs who have ever performed here.\n" . "

\n" . "
\n" ); my $tickets_top_blurb = ("

\n" . "

\n" . "Advance tickets are available for the following events!\n" . "Tickets for all other events will be available at the door only.\n" . "Please see our " . "calendar for details of other events.\n" . "

\n" . "All events are 21+, and a valid photo ID required.\n" . "

\n" . "Having a birthday party?\n" . "VIP table service is available!\n" . "

\n" . "

\n" ); my $rss_channel_title = "DNA Lounge"; my $rss_channel_url = $url_base; my $rss_channel_desc = "Upcoming events at the DNA Lounge nightclub " . "in San Francisco."; my $rss_channel_desc2 = ("Upcoming events at the DNA Lounge nightclub:\n" . "375 Eleventh Street, San Francisco.\n" . "All events are 21+, and a valid photo ID is " . "required."); my $rss_channel_loc = "DNA Lounge: 375 Eleventh Street, San Francisco."; my $rss_channel_lang = "en"; my $rss_channel_tz = "US/Pacific"; my $rss_logo_url = "${url_base}logo-thumb.gif"; my $rss_logo_width = 100; my $rss_logo_height = 34; my $rss_max_links = 60; my $overview_max_links = 10; my $rss_entity_mode = 2; # 0 = raw chars; 1 = entities; 2 = ascii only. my $rss_custom_tags = 1; my $rss_include_whatsnew = 0; ############################################################################## # # Data structures and stuff # ############################################################################## my %calendar = (); # keys are "YYYY-MM-DD" # values are references to lists. # elements of lists are hashes with these keys: # date ("dotw, dd-mmm-yy") # time ("DD:DD - DD:DD" start/end time) # times (yyyy mm dd dotw start_min end_min) # event_ord (overall event number, from start) # day_ord (event_number of_n_events_today) # title (the short name, without "presents") # repeat ("every first monday") # holiday (if this is a holiday, which one) # vacationp (1 if tomorrow is a day off) # url (the event's home page) # flyer (flyer url for the event, if any) # genre (listref of genres of the event) # photos (photos url) # ticket (url to buy a ticket) # vipticket (url to buy a VIP ticket) # event_ids (listref of external site links) # onsale ("dd-mm-yy" when tickets on sale) # main_room (summary of main room DJs) # lounge (summary of lounge DJs) # webcast (webcast state: main, lounge, off) # video (youtube url to be embedded) # videoname (caption of video) # html_src (raw calendar.txt source) my @repeaters = (); # elements are lists: # (dotw, repeat_code, repeat_desc_string, # from_year, from_month, from_dotm, # to_year, to_month, to_dotm, # lineno, event-hash) # dotw: 0-6 # repeat: 0, 1-5, -1, or -666. # the event-hash is as in %calendar my %shared_flyers = (); # keys and values are "YYYY-MM-DD". my @future_events; # list of event hashes that lie in the future my $yesterday_event; # the last event that didn't go on @future_events my $total_event_count; # how many events are listed in the calendar my $total_live_count; # how many of those were live shows my @event_ords; # index of events by overall event number. my %genres; # count of event genres. my %inline_flyers; # maps date to tags for flyer thumbnails. my $body_template = undef; ############################################################################## # # Date parsing and manipulation utilities # ############################################################################## my @months = ("January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"); my @days = ("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"); my @days_per_month = (31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31); # matches various forms of month names (in lower case.) # my $month_re = ('\b(?:jan(?:uary)?|feb(?:ruary)?|mar(?:ch)?|apr(?:il)?|' . 'may|june?|july?|aug(?:ust)?|sep(?:tember)?|sept|' . 'oct(?:ober)?|nov(?:ember)?|dec(?:ember)?)\b'); # matches various forms of day-of-the-week names (in lower case.) # my $days_re = ('\b(?:sun(?:day)?|mon(?:day)?|tue(?:sday)?|tues|' . 'wed(?:nesday)?|thu(?:rsday)?|thurs?|fri(?:day)?|' . 'sat(?:urday)?)\b'); # maps lower cased month names to numbers (1-12). # my %monthvals = ( 'jan' => 1, 'january' => 1, 'february' => 2, 'feb' => 2, 'march' => 3, 'mar' => 3, 'april' => 4, 'apr' => 4, 'may' => 5, 'jun' => 6, 'june' => 6, 'jul' => 7, 'july' => 7, 'august' => 8, 'aug' => 8, 'sep' => 9, 'sept' => 9, 'september' => 9, 'oct' => 10, 'october' => 10, 'nov' => 11, 'november' => 11, 'dec' => 12, 'december' => 12 ); # maps lower cased day names to numbers (0-6, sun-sat). # my %dayvals = ( 'sun' => 0, 'sunday' => 0, 'mon' => 1, 'monday' => 1, 'tue' => 2, 'tues' => 2, 'tuesday' => 2, 'wed' => 3, 'wednesday' => 3, 'thu' => 4, 'thur' => 4, 'thurs' => 4, 'thursday' => 4, 'fri' => 5, 'friday' => 5, 'sat' => 6, 'saturday' => 6 ); # Given a date, returns the day of the week on which it falls (0-6, sun-sat) # sub dotw($$$) { my ($dotm, $month, $year) = @_; # From the sci.math FAQ: # # The following formula, which is for the Gregorian calendar only, may be # more convenient for computer programming. Note that in some programming # languages the remainder operation can yield a negative result if given # a negative operand, so "mod 7" may not translate to a simple remainder. # # W == (k + [2.6m - 0.2] - 2C + Y + [Y/4] + [C/4]) mod 7 # where [] denotes the integer floor function (round down), # k is day (1 to 31) # m is month (1 = March, ..., 10 = December, 11 = Jan, 12 = Feb) # Treat Jan & Feb as months of the preceding year # C is century (1987 has C = 19) # Y is year (1987 has Y = 87 except Y = 86 for Jan & Feb) # W is week day (0 = Sunday, ..., 6 = Saturday) # # Here the century & 400 year corrections are built into the formula. # The [2.6m-0.2] term relates to the repetitive pattern that the 30-day # months show when March is taken as the first month. my $k = $dotm; my $m = 1 + (($month + 9) % 12); # (1-12) => (11, 12, 1-10) my $y = $year - ($m >= 11 ? 1 : 0); # subtract a year for jan/feb my $C = int ($y/100); my $Y = int ($y%100); my $W = (($k + int ((2.6 * $m) - 0.2) - (2 * $C) + $Y + int ($Y / 4) + int ($C / 4) + 7) % 7); return $W; } # Returns info describing a day's position in the month. # For the given date, tells you that it is, e.g., the 2nd wednesday; # and that this month contains 5 wednesdays. # sub dotw_count($$$) { my ($year, $month, $dotm) = @_; my $days = days_per_month ($month, $year); my $dotw = dotw ($dotm, $month, $year); my $index = undef; my $total = 0; my $idotw = dotw (1, $month, $year); for (my $i = 1; $i <= $days; $i++) { if ($idotw == $dotw) { $total++; $index = $total if ($i == $dotm); } $idotw = ($idotw + 1) % 7; } error ("$dotm does not occur in $month-$year?") unless ($index); error ("only $total $days[$dotw]s in $month-$year?") if ($total < 4); return ($index, $total); } # Whether the given 4-digit year is a leap year. # sub leap_year_p($) { my ($year) = @_; return ((($year % 4 == 0) && # divisible by 4 (($year % 100) || # ...unless divisble by 100 ($year % 400 == 0))) # ...and not divisible by 400 ? 1 : 0); } # Returns the number of days in the given month, considering leap years. # sub days_per_month($$) { my ($month, $year) = @_; my $days = $days_per_month[$month-1]; $days++ if ($month == 2 && leap_year_p($year)); # feb leapday return $days; } # Converts a day number to "Saturday", "Saturday Afternoon", etc. # $event_count is how many events there are on this day; # $event_number is which of those events this is. # The "evening" suffix is only used if there are multiple events, # but the "morning" and "afternoon" suffixes are used even if # there is only one event on this day. # sub describe_day($$$$) { my ($dotw, $start_minute, $event_count, $event_number) = @_; my $afternoon_threshold = (12 * 60); # noon my $evening_threshold = (17 * 60); # 5 PM my $day = $days[$dotw]; if ($event_count > 1 || $start_minute < $evening_threshold) { my $suffix = undef; if ($start_minute < $afternoon_threshold) { $suffix = "Morning"; } elsif ($start_minute < $evening_threshold) { $suffix = "Afternoon"; } elsif ($event_number > 0) { # no "Night" suffix if it's the first event (so we don't get two) $suffix = "Night"; } $day .= " $suffix" if (defined ($suffix)); } return $day; } ############################################################################## # # HTML utilities # ############################################################################## my %entity_table = ( "quot" => '"', "amp" => '&', "lt" => '<', "gt" => '>', "nbsp" => ' ', "iexcl" => '¡', "cent" => '¢', "pound" => '£', "curren" => '¤', "yen" => '¥', "brvbar" => '¦', "sect" => '§', "uml" => '¨', "copy" => '©', "ordf" => 'ª', "laquo" => '«', "not" => '¬', "shy" => '­', "reg" => '®', "macr" => '¯', "deg" => '°', "plusmn" => '±', "sup2" => '²', "sup3" => '³', "acute" => '´', "micro" => 'µ', "para" => '¶', "middot" => '·', "cedil" => '¸', "sup1" => '¹', "ordm" => 'º', "raquo" => '»', "frac14" => '¼', "frac12" => '½', "frac34" => '¾', "iquest" => '¿', "Agrave" => 'À', "Aacute" => 'Á', "Acirc" => 'Â', "Atilde" => 'Ã', "Auml" => 'Ä', "Aring" => 'Å', "AElig" => 'Æ', "Ccedil" => 'Ç', "Egrave" => 'È', "Eacute" => 'É', "Ecirc" => 'Ê', "Euml" => 'Ë', "Igrave" => 'Ì', "Iacute" => 'Í', "Icirc" => 'Î', "Iuml" => 'Ï', "ETH" => 'Ð', "Ntilde" => 'Ñ', "Ograve" => 'Ò', "Oacute" => 'Ó', "Ocirc" => 'Ô', "Otilde" => 'Õ', "Ouml" => 'Ö', "times" => '×', "Oslash" => 'Ø', "Ugrave" => 'Ù', "Uacute" => 'Ú', "Ucirc" => 'Û', "Uuml" => 'Ü', "Yacute" => 'Ý', "THORN" => 'Þ', "szlig" => 'ß', "agrave" => 'à', "aacute" => 'á', "acirc" => 'â', "atilde" => 'ã', "auml" => 'ä', "aring" => 'å', "aelig" => 'æ', "ccedil" => 'ç', "egrave" => 'è', "eacute" => 'é', "ecirc" => 'ê', "euml" => 'ë', "igrave" => 'ì', "iacute" => 'í', "icirc" => 'î', "iuml" => 'ï', "eth" => 'ð', "ntilde" => 'ñ', "ograve" => 'ò', "oacute" => 'ó', "ocirc" => 'ô', "otilde" => 'õ', "ouml" => 'ö', "divide" => '÷', "oslash" => 'ø', "ugrave" => 'ù', "uacute" => 'ú', "ucirc" => 'û', "uuml" => 'ü', "yacute" => 'ý', "thorn" => 'þ', "yuml" => 'ÿ', "ndash" => '-', "mdash" => "--" ); my %entity_table2; # maps Latin1 back to Entities foreach (keys (%entity_table)) { $entity_table2{$entity_table{$_}} = $_; } my %asciify_table = ( '¡' => '!', '¢' => '[c]', '£' => '#', '¤' => '#', '¥' => 'Y', '¦' => '|', '§' => 'SS', '¨' => '_', '©' => '(c)', 'ª' => '?', '«' => '<<', '¬' => '=', '­' => '-', '®' => '(r)', '¯' => '_', '°' => '[?]','±' => '+/-', '²' => '[2]', '³' => '[3]', '´' => '\'', 'µ' => 'u', '¶' => 'PP', '·' => '.', '¸' => ',', '¹' => '[1]', 'º' => '[o]','»' => '>>', '¼' => ' 1/4 ', '½' => ' 1/2 ', '¾' => ' 3/4 ', '¿' => '?', 'À' => 'A', 'Á' => 'A', 'Â' => 'A', 'Ã' => 'A', 'Ä' => 'A', 'Å' => 'A', 'Æ' => 'AE', 'Ç' => 'C', 'È' => 'E', 'É' => 'E', 'Ê' => 'E', 'Ë' => 'E', 'Ì' => 'I', 'Í' => 'I', 'Î' => 'I', 'Ï' => 'I', 'Ð' => 'D', 'Ñ' => 'N', 'Ò' => 'O', 'Ó' => 'O', 'Ô' => 'O', 'Õ' => 'O', 'Ö' => 'O', '×' => 'x', 'Ø' => '0', 'Ù' => 'U', 'Ú' => 'U', 'Û' => 'U', 'Ü' => 'U', 'Ý' => 'Y', 'Þ' => 'p', 'ß' => 'S', 'à' => 'a', 'á' => 'a', 'â' => 'a', 'ã' => 'a', 'ä' => 'a', 'å' => 'a', 'æ' => 'ae', 'ç' => 'c', 'è' => 'e', 'é' => 'e', 'ê' => 'e', 'ë' => 'e', 'ì' => 'i', 'í' => 'i', 'î' => 'i', 'ï' => 'i', 'ð' => 'o', 'ñ' => 'n', 'ò' => 'o', 'ó' => 'o', 'ô' => 'o', 'õ' => 'o', 'ö' => 'o', '÷' => '/', 'ø' => 'o', 'ù' => 'u', 'ú' => 'u', 'û' => 'u', 'ü' => 'u', 'ý' => 'y', 'þ' => 'p', 'ÿ' => 'y' ); # Converts any Latin1 characters to HTML entities. # Also converts &, < and > to entities. # sub entitify($) { my ($s) = @_; $s =~ s/([&<>\177-\377])/ { my $ent = $entity_table2{$1}; error ("no entity for character: $1\n") unless (defined ($ent)); print STDERR "$progname: $1 -> $ent\n" if ($verbose > 3);; "&$ent;"; }/xegs; return $s; } # Convert any HTML entities to Latin1 characters. # sub de_entify($) { my ($text) = @_; $text =~ s/(&([[:alpha:]]+);?)/ { my $c = $entity_table{$2}; print STDERR "$progname: warning: unknown HTML character entity \"$1\"\n" unless $c; ($c ? $c : "[$2]"); } /gexi; return $text; } # Converts any Latin1 characters to the closest ASCII equivalent. # Note that you probably also need to convert & to & # sub asciify($) { my ($s) = @_; $s =~ s/([\177-\377])/ { my $asc = $asciify_table{$1}; error ("no ASCII for character: $1\n") unless (defined ($asc)); print STDERR "$progname: $1 -> $asc\n" if ($verbose > 3); $asc; }/xegs; return $s; } # Convert the given HTML to plain text. # If $do_urls is true, insert the URLs of BAND HREFs on the ends of # their lines. # sub de_htmlify($$) { my ($txt, $do_urls_p) = @_; my $tabstop = 20; $_ = $txt; s@ @ @g; # handle nbsp early # convert HREFs to text -- yyy --> "yyy -- xxx" # do this for but not for or . # Also, upcase the band names. # s@<(BAND)([^>]*)>(.*?) @{ my ($u, $b) = ($2, $3); if (! ($u =~ s/^.*\bHREF=\"([^\"]+)\".*$/$1/si)) { $u = undef; } $u =~ s/\&/&/g if ($u); $b = de_entify($b); $b =~ s/(.*)/\U$1/g; (($do_urls_p && $u) ? "$b\002$u" : $b); }@xegsi; # our magic "block" tags. s@]*>@

@gsi; s@
@\001@gi; # save linebreaks s@]*>@\001\001@gi; # save paragraphs s@<[^>]+>@@g; # delete all tags s@\s+@ @g; # compress all whitespace s@\001@\n@g; # convert lines/paras back if (m/\002/) { # hack tabstops my @lines = split(/\n/, $_); # find the longest string in column 1 my $max = 0; foreach my $line (@lines) { # only one tab per line! 1 while ($line =~ s/^([^\002]*\002[^\002]*)\002(.*)$/$1 -- $2/s); if ($line =~ m/^(.*)\002/) { my $n = length($1); $max = $n if ($n > $max); } } # now indent the text in column 2 foreach my $line (@lines) { if ($line =~ m/^(.*)\002(.*)$/) { $line = $1 . (' ' x ($max - length($1))) . " -- " . $2; } } $_ = join ("\n", @lines); } s@(http:[^\s]+) *--.*$@$1@mg # kludge for "-- performing live!" BS... if ($do_urls_p); # If there's stuff after the URL on the line, break the line. s@(http:[^\s]+)([ \t]+[^ \t])@$1\n $2@mg; s@^[ \t]+@@mg; # lose whitespace at beginning of lines s@[ \t]+$@@mg; # lose whitespace at end of lines s@\n\n\n+@\n\n@g; # compress blank lines s@^\n+@@sg; # lose leading blank lines s@\n+$@@sg; # lose trailing blank lines $_ = de_entify($_); # translate character entities return $_; } # Upcase the *text* in the HTML string, being careful not to touch # tags and to do the right thing with character entities. # sub upcase_html($) { my ($str) = @_; $str =~ s/(<[^<>]*>)/\001$1\001/gsi; my @segs = split (/\001/, $str); foreach (@segs) { next if (m/^[ \t]*\n?//gs; # strip leading/trailing whitespace, compress blank lines and spaces. $html =~ s/^\s+//g; $html =~ s/\s+$//g; $html =~ s/^\n\n+/\n\n/gs; $html =~ s/^\n+//gs; $html =~ s/\n+$//gs; $html =~ s/\t/ /gs; $html =~ s/ +/ /gs; # strip paragraph/line breaks before and after blank lines 1 while ($html =~ s@(\n\n)@$2@igs); 1 while ($html =~ s@(\n\n)@$1@igs); # strip redundant BRs 1 while ($html =~ s@
\s*(\s*(]*>)\s*@$1@igs; return $html; } # The HTML validator generates an error if we use a tag that isn't # in this list (in order to detect typos.) # Mostly duplicated in utils/menuify.pl # my %allowed_tags = ( "A" => 1, "AFF" => 1, "B" => 1, "BR" => 2, "BAND" => 1, "DJ" => 1, "ETITLE" => 1, "FLYER" => 2, "GENRE" => 1, "I" => 1, "LIVE" => 2, "OTHER" => 1, "P" => 2, "PRES" => 1, "PRICE" => 1, "STATS" => 1, "STRIKE" => 1, "TABLE" => 1, "TD" => 1, "TICKET" => 2, "TIME" => 1, "TR" => 1, "U" => 1, "UL" => 1, "WEBCAST" => 2, "VIDEO" => 2, "BLURB" => 1, "DIV" => 1, "SPAN" => 1, "EVENT" => 2 ); # Does some simple syntax-checking on the HTML: makes sure tags are # balanced, etc. # Mostly duplicated in utils/menuify.pl # We need our own copy here to give sane error messages about the tags: # we want to do validation *before* those are transformed to
, , etc. # sub validate_html($$$$$) { my ($html, $lineno, $year, $month, $dotm) = @_; # Note: line numbers in error messages aren't quite right, since any # lines commented out with "#" have been deleted before we get here... my $filename = (defined($year) ? "$dotm $months[$month-1] $year" : "no date"); error ("$filename: $lineno: no newline between and <$2>$3") if ($html =~ m@ \s* <(DJ|BAND|OTHER) \b [^<>]* > \s* ([^<>]*?) \s* <@six); my @stack = (); my $debug_html_p = 0; # lose text inside comments (but keep the newlines, for line numbering)... # $html =~ s@()@{ my ($a, $b, $c) = ($1, $2, $3); $b =~ s/^.*$//gm; "$a$b$c"; }@gse; $html =~ s/()//g; # lose comment tags themselves $html =~ s/\n/\n\001/gs; $html =~ s/\s]+)/; next unless defined ($tag); $tag =~ tr/a-z/A-Z/; $tag = 'P' if ($tag eq '/P'); # kludge... my $code = $allowed_tags{$tag}; if ($tag =~ m@^/@) { # closing a tag my $otag = pop @stack; print STDERR "$lineno: POP " . ('. ' x ($#stack+1)) . "/$otag\n" if ($debug_html_p); if (!defined ($otag)) { error ("$filename: $lineno: extranious <$tag>"); } elsif ($tag ne "/$otag") { my $t2 = $tag; $t2 =~ s@^/@@; $code = $allowed_tags{$t2}; if ($code && $code == 2) { error ("$filename: $lineno: unexpected close-tag form: <$tag>"); } else { error ("$filename: $lineno: <$otag> closed by <$tag>"); } } } elsif (! $code) { error ("$filename: $lineno: unknown tag <$tag>"); } else { my $otag = $stack[$#stack] || ''; if (($tag eq 'TD' && $otag ne 'TR') || ($tag eq 'TH' && $otag ne 'TR') || ($tag eq 'TR' && $otag ne 'TABLE') || ($otag eq 'TR' && $tag ne 'TD' && $tag ne 'TH') || ($otag eq 'TABLE' && $tag ne 'TR') || ($otag eq 'OPTION' && $tag eq 'OPTION') || ($otag eq 'LI' && $tag eq 'LI') || ($tag eq 'ETITLE' && $otag ne '') || ($tag eq 'PRES' && $otag ne '') || ($tag eq 'STATS' && $otag ne '') || ($tag eq 'GENRE' && $otag ne 'STATS') || ($tag eq 'TIME' && $otag ne 'STATS') || ($tag eq 'PRICE' && $otag ne 'STATS') || (($tag eq 'BR' || $tag eq 'P') && ($otag eq 'GENRE' || $otag eq 'PRES' || $otag eq 'ETITLE')) ) { error ("$filename: $lineno: <$tag> not allowed inside <$otag>"); } if ($code == 2) { # ignore these -- don't insist on closing tags. print STDERR "$lineno: SKIP " . ('. ' x ($#stack+1)) . "$tag\n" if ($debug_html_p); } else { # opening a new tag print STDERR "$lineno: PUSH " . ('. ' x ($#stack+1)) . "$tag\n" if ($debug_html_p); push @stack, $tag; } } } if ($#stack >= 0) { error ("$filename: $lineno: unclosed tags: <" . join (">, <", @stack) . ">"); } } # Return a string with leading whitespace added so as to center the given # text within 72 columns. # sub center_line($) { my ($line) = @_; my $cols = 72; # $fill_column ? $line = (' ' x int(($cols - length($line)) / 2)) . $line; return $line; } # Decides which lines in the text are "headings" and which are "bodies" # and adds indentation. This is for the YYYY/MM.txt files. # sub indentify($) { my ($text) = @_; $text =~ s/[ \t]+$//gm; # lose trailing whitespace $text =~ s/\n\n\n+/\n\n/gs; # compress blank lines $text =~ s/^\n+//s; # lose leading blank lines $text =~ s/\n\n+$/\n/s; # lose trailing blank lines my @lines = split ("\n", $text); my $level = 0; my $para_head_p = 1; my $last_indented_p = 0; foreach (@lines) { s/^/' ' x (4 * $level)/e; # increase next line's indent if this line is at the head of a paragraph # and ends in a colon. my $indent_p = (m/:\s*$/ && $para_head_p); $level++ if ($indent_p); # decrease next line's indent if this line is blank, # unless we just indented on the previous non-blank line. $level-- if (m/^\s*$/ && $level > 0 && !$last_indented_p); # next line is the first line in a paragraph if this line is blank. $para_head_p = (m/^\s*$/); $last_indented_p = $indent_p; # Finally, wrap this line if it's long. $Text::Wrap::columns = 65; $_ = wrap ('', (' ' x (4 * $level)), $_) if (length($_) > $Text::Wrap::columns); } return join ("\n", @lines); } ############################################################################## # # Parsing the calendar.txt file # ############################################################################## sub parse_calendar_entry($$@) { my ($lineno, $fast_p, @lines) = @_; $_ = $lines[0]; if (m/^(\d\d?)[-\s]+($month_re)[-\s]/io) { # "3-Jan-2001" parse_static_entry ($lineno, $fast_p, @lines); } elsif ( m/^EVERY\s+/io ) { # "Every Nth X" parse_repeating_entry ($lineno, $fast_p, @lines); } elsif ( m/^HOLIDAY\s+/io ) { # "Holiday 2-Feb" parse_holiday_entry ($lineno, @lines); } else { error ("$lineno: unparsable calendar entry heading: $_"); } } my $prev_year = 0; # for detecting misordered entries when parsing my $prev_month = 0; # (which usually means cut-and-paste went wrong.) my $prev_dotm = 0; sub parse_static_entry($$@) { my ($lineno, $fast_p, @lines) = @_; my $slineno = $lineno; $_ = shift @lines; if (! m/^(\d\d?) [-\s]+ ($month_re) [-\s]+ (\d{4}) \s* \(?($days_re)?\)? \s* (\#2)? \s* $/iox ) { error ("$lineno: unparsable DD-MMM-YYYY line: $_\n"); } my $dotm = 0 + $1; my $month = $2; my $year = 0 + $3; my $dotw = $4; my $second_event_same_day = defined($5); $month =~ tr/A-Z/a-z/; $month = $monthvals{$month}; $dotw =~ tr/A-Z/a-z/ if defined($dotw); $dotw = $dayvals{$dotw} if defined($dotw); error ("$lineno: $dotm-$months[$month-1]-$year appears more than once!\n") if (($prev_year == $year && $prev_month == $month && $prev_dotm == $dotm) && !$second_event_same_day); error ("$lineno: $dotm-$months[$month-1]-$year should be a second event " . "but isn't!\n") if ($second_event_same_day && ($prev_year != $year || $prev_month != $month || $prev_dotm != $dotm)); error ("$lineno: year $prev_year preceeds $year!\n") if ($prev_year > $year); error ("$lineno: $months[$prev_month-1] $prev_year preceeds " . "$months[$month-1] $year!\n") if ($prev_year == $year && $prev_month > $month); error ("$lineno: $prev_dotm-$months[$prev_month-1]-$prev_year preceeds " . "$dotm-$months[$month-1]-$year!\n") if ($prev_year == $year && $prev_month == $month && $prev_dotm > $dotm); my $real_dotw = dotw ($dotm, $month, $year); if (defined ($dotw)) { error ("$lineno: $dotm-$months[$month-1]-$year is a " . "$days[$real_dotw], not a $days[$dotw]") if ($real_dotw != $dotw); } else { $dotw = $real_dotw; my $m = $months[$month-1]; $m =~ s/^(...).*$/$1/; my $d = $days[$dotw]; $d =~ s/^(...).*$/$1/; print STDERR ("$progname: $lineno: $dotm-$m-$year: " . "dotw unspecified (should be $d)\n"); } $lineno++; $prev_year = $year; $prev_month = $month; $prev_dotm = $dotm; my $entry = ""; while ($#lines >= 0) { my $line = shift @lines; $line =~ s/^\s+//; $entry .= "\n$line"; } cache_inline_flyers ($year, $month); my $key = sprintf ("%04d-%02d-%02d", $year, $month, $dotm); my $entry_hash = parse_calendar_html ($dotm, $month, $year, $slineno, $entry, $fast_p); my $listref = $calendar{$key}; my @list = (defined($listref) ? @$listref : ()); push @list, \$entry_hash; $listref = \@list; $calendar{$key} = $listref; if ($verbose > 2) { $_ = $entry; s/\\/\\\\/g; s/\"/\\\"/g; s/\n/\\n/g; s/^(.{20}).*$/$1.../; print STDERR "$progname: $slineno: stored $key = \"$_\"\n"; } } sub parse_repeating_entry($$@) { my ($lineno, $fast_p, @lines) = @_; my $desc = shift @lines; my $entry_html = ""; foreach (@lines) { s/^\s+//; $entry_html .= "$_\n"; } my $repeat_words = "(?:first|second|third|fourth|fifth|last|" . "1st|2nd|3rd|4th|5th)"; my $alter_words = "(?:other)"; if ($desc =~ m/^every\s+ ($alter_words)?\s* ($repeat_words(?:\s*,\s*$repeat_words)*)?\s* ((?:$days_re)(?:\s*,\s*(?:$days_re))*)\s* (?:from\s+(.*?))?\s* (?:to\s+(.*?))?\s* $/xio) { my $alts = $1 || ''; my $reps = $2 || ''; my $days = $3; my $from = $4; my $to = $5; my $from_year = -1; my $from_month = -1; my $from_day = -1; my $to_year = 9999; my $to_month = 99; my $to_day = 99; if ($alts ne '' && $reps ne '') { error ("$lineno: can't specify 'other' along with repetition: $desc\n"); } if ($from) { if ($from =~ m/^(\d\d?)\s*[-\s]\s*($month_re)\s*[-\s]\s*(\d\d\d\d)/io) { $from_day = 0 + $1; $_ = $2; tr/A-Z/a-z/; $from_month = $monthvals{$_}; $from_year = 0 + $3; } else { error ("$lineno: unparsable from date: \"$from\"" . " (should be DD-MMM-YYYY)\n"); } } if ($to) { if ($to =~ m/^(\d\d?)\s*[-\s]\s*($month_re)\s*[-\s]\s*(\d\d\d\d)/io) { $to_day = 0 + $1; $to_year = 0 + $3; $_ = $2; tr/A-Z/a-z/; $to_month = $monthvals{$_}; } else { error ("$lineno: unparsable to date: \"$to\"" . " (should be DD-MMM-YYYY)\n"); } } $from_year = 0 unless defined($from_year); $from_month = 0 unless defined($from_month); $from_day = 0 unless defined($from_day); $to_year = 9999 unless defined($to_year); $to_month = 13 unless defined($to_month); $to_day = 32 unless defined($to_day); my $rdesc = "Every"; my @values = (); if ($alts ne '') { # this can only be "every other DOTW". push @values, -666; $rdesc .= " Other"; } else { my @rdescs = (); foreach my $rep (split(/,/, $reps)) { $rep =~ s/^\s+//; $rep =~ s/\s+$//; if ($rep =~ m/^(first|1st)$/i) { push @values, 1; push @rdescs, "1st"; } elsif ($rep =~ m/^(second|2nd)$/i) { push @values, 2; push @rdescs, "2nd"; } elsif ($rep =~ m/^(third|3rd)$/i) { push @values, 3; push @rdescs, "3rd"; } elsif ($rep =~ m/^(fourth|4th)$/i) { # error ("$lineno: no such number as \"fourth\": " . # "did you mean \"last\" or \"fifth\"?"); push @values, 4; push @rdescs, "4th"; } elsif ($rep =~ m/^(fifth|5th)$/i) { push @values, 5; push @rdescs, "5th"; } elsif ($rep =~ m/^last$/i) { push @values, -1; push @rdescs, "Last"; } else { error ("$lineno: INTERNAL ERROR: unknown repeater: $rep\n"); } } $_ = join (", ", @rdescs); s/^(.*), ([^,]+)$/$1 and $2/; $rdesc .= " $_"; } push (@values, 0) if ($#values < 0); { my @rdescs = (); foreach (split(/,/, $days)) { s/^\s+//; s/\s+$//; tr/A-Z/a-z/; push @rdescs, $days[$dayvals{$_}]; } $_ = join (", ", @rdescs); s/^(.*), ([^,]+)$/$1 and $2/; $rdesc .= " $_"; } $rdesc =~ s/\s+/ /gs; foreach (split(/,/, $days)) { s/^\s+//; s/\s+$//; tr/A-Z/a-z/; my $day = $dayvals{$_}; foreach my $code (@values) { my $entry_hash = parse_calendar_html (undef, undef, undef, $lineno, $entry_html, $fast_p); my @day_ord = (0, 1); $entry_hash->{day_ord} = \@day_ord; my @val = ($day, $code, $rdesc, $from_year, $from_month, $from_day, $to_year, $to_month, $to_day, $lineno, $entry_hash); push @repeaters, \@val; if ($verbose > 2) { $_ = $entry_html; s/\\/\\\\/g; s/\"/\\\"/g; s/\n/\\n/g; my $d = $days[$day]; my $w = ($code == 0 ? "" : $code == 1 ? "1st " : $code == 2 ? "2nd " : $code == 3 ? "3rd " : $code == 4 ? "4th " : $code == 5 ? "5th " : $code == -1 ? "Last " : $code == -666 ? "Other " : "<>"); $d =~ s/^(...).*/$1/; s/^(.{20}).*$/$1.../; print STDERR "$progname: $lineno: stored ". sprintf ("%04d-%02d-%02d - %04d-%02d-%02d", $from_year, $from_month, $from_day, $to_year, $to_month, $to_day) . " Every $w$d = \"$_\" (\"$rdesc\")\n" } } } } else { error ("$lineno: unparsable repeating entry: $desc\n"); } } sub parse_holiday_entry($@) { my ($lineno, @lines) = @_; my $entry = shift @lines; shift @lines while ($#lines != -1 && $lines[0] eq ''); error ("$lineno: multi-line holiday?") if ($#lines != -1); $entry =~ s/\s*\#.*$//s; if (! ($entry =~ m/^[a-z]+\s+(\d\d?)[-\s]+($month_re)([-\s]+(\d{4}))?\s*(.*)$/i)) { error ("$lineno: unparsable holiday line: $entry\n"); } my $dotm = 0 + $1; my $month = $2; my $year = defined($4) ? (0 + $4) : 0; my $desc = $5; $month =~ tr/A-Z/a-z/; $month = $monthvals{$month}; my $key = sprintf ("%04d-%02d-%02d-H", $year, $month, $dotm); $calendar{$key} = $desc; print STDERR "$progname: $lineno: stored $key = \"$desc\"\n" if ($verbose > 2); } my %ticket_dups; # for detecting duplicate IDs my %event_dups; # for detecting duplicate URLs # Parse the HTML into the various fields in the event hash; # returns a hash describing this event. # sub parse_calendar_html($$$$$$) { my ($dotm, $month, $year, $lineno, $html, $fast_p) = @_; my $static_p = defined ($dotm); if ($static_p) { my $key = sprintf ("%04d-%02d-%02d", $year, $month, $dotm); parse_shared_flyer ($key, $lineno, $html); } # these are kinda slow, and not needed in --summarize-only mode. validate_html ($html, $lineno, $year, $month, $dotm) unless ($fast_p); parse_dj_tags ($html, $dotm, $month, $year) if ($static_p && !$fast_p); $html =~ s/^\s*//; my ($title_col, $body_col) = ($html =~ m/^(.*?)\n\n+(.*?)\s*$/s); $title_col = $html unless ($title_col); $body_col = '' unless ($body_col); if ($body_col =~ m/\n[ \t]*\n/s) { my $m = $months[$month-1]; $m =~ s/^(...).*/$1/; print STDERR "$progname: WARNING: " . sprintf ("%02d %s %04d: ", $dotm, $m, $year) . "$lineno: blank line\n"; } my $title = extract_event_name ($title_col); my $main_room = extract_event_desc ($body_col) || ''; my $lounge; $_ = $html; if (s@^.*?(Lounge( DJs)?:\s*<(BR|P)>)@$1@is) { # cut before "Lounge" s/\n\n.*$//s; # delete all after blank line s/

.*$//s; # delete all after para break $lounge = extract_event_desc ($_); } $_ = $title_col; # #### Note: this only saves away the first (non-VIP) ticket on sale for # the event. # my $ticket = undef; my $vipticket = undef; my $onsale = undef; my $junk = $_; $junk =~ s/\n/ /gs; $junk =~ s/(]*)>@si); my $args = $1; my $url_p = 0; my $this_ticket = undef; my $this_onsale = undef; my $this_vip_p = 0; if ($args =~ m@\b((HREF)|ID)\s*=\s*(\"([^\"]*)\"|([^<>\"\s]+))@si) { $url_p = $2; $this_ticket = $4 || $5; error ("$lineno: unparsable ticket URL: $this_ticket") if ($url_p && !($this_ticket =~ m/^https?:/)); error ("$lineno: unparsable ticket ID: $this_ticket") if (!$url_p && !($this_ticket =~ m/^\d{4,}$/)); } error ("unparsable TICKETS tag") unless $this_ticket; $this_ticket = $ticket_url_base . $this_ticket unless ($url_p); if ($args =~ m@\bONSALE\s*=\s*\"([^\"<>]+)\"@si) { $this_onsale = $1; error ("$lineno: unparsable ONSALE date: $this_onsale") unless ($this_onsale =~ m/\d\d?-[a-z]{3}-\d\d\d\d/si); } $args =~ s/\s*"[^"]*"\s*/ /gsi; # lose quoted strings if ($args =~ m@\bVIP\b@si) { $this_vip_p = 1; } error ("$lineno: duplicate ticket: $this_ticket") if (defined ($ticket_dups{$this_ticket})); $ticket_dups{$this_ticket} = 1; if (! $this_vip_p) { if (! defined($ticket)) { $ticket = $this_ticket; $onsale = $this_onsale; } } else { if (! defined($vipticket)) { $vipticket = $this_ticket; error ("no ONSALE allowed in a VIP ticket") if (defined ($onsale)); } } } $junk = $_; $junk =~ s/\n/ /gs; $junk =~ s/(]*)>@si); my $args = $1; my ($site, $id); if ($args =~ m@\bSITE=(\"([^<>\"]+)\"|([^\s<>\"]+))@si) { $site = $2 || $3; } if ($args =~ m@\bID=(\"([^<>\"]+)\"|([^\s<>\"]+))@si) { $id = $2 || $3; } error ("$lineno: unparsable") unless ($site && $id); my $key = "$site=$id"; error ("$lineno: duplicate event ID: $key") if (defined ($ticket_dups{$key})); $ticket_dups{$key} = 1; push @event_ids, $key; } my $genre = undef; if (m@\s*(.*?)@si) { $genre = $1; $genre =~ s/<[^<>]+>/ /gs; $genre =~ s/\s+/ /gs; $genre =~ s/\s*\.*\s*$//gs; $genre = de_entify($genre); my @g = split (m/\. +/, $genre); # normalize the nine billion names of house. my %gg; foreach my $g (@g) { $g =~ s/^.* (house)$/$1/i unless ($g =~ m/\bacid\b/i); $g = "breaks" if ($g =~ m/break|broken/i); $g = "gothic" if ($g =~ m/deathrock|dark retro/i); $g = "industrial" if ($g =~ m/\bebm\b|industrialize/i); $g = "mashups" if ($g =~ m/bastard pop|remix|mashup|mash-up/i); $g = "80s" if ($g =~ m/^80s/i); $gg{$g} = 1; } @g = keys (%gg); foreach my $g (@g) { $genres{$g} = 1 + ($genres{$g} || 0); } $genre = \@g; } $_ = $html; my $webcast = undef; if (m@]*)>@si) { my $args = $1; my $url_p = 0; if ($args =~ m@\bSRC\s*=\s*(\"([^\"]*)\"|([^<>\"\s]+))@si) { $webcast = $2 || $3; } error ("unparsable WEBCAST tag") unless $webcast; } else { $webcast = 'main'; } $webcast =~ tr/A-Z/a-z/; if (! ($webcast =~ m/^(main|lounge|off)$/s)) { error ("WEBCAST SRC=\"$webcast\": " . "must be \"main\", \"lounge\" or \"off\""); } $_ = $html; my $embed_video = undef; my $embed_title = undef; if (m@]*)>@si) { my $args = $1; my $url_p = 0; if ($args =~ m@\bSRC\s*=\s*(\"([^\"]*)\"|([^<>\"\s]+))@si) { $embed_video = $2 || $3; } if ($args =~ m@\bTITLE\s*=\s*(\"([^\"]*)\"|([^<>\"\s]+))@si) { $embed_title = $2 || $3; } error ("unparsable VIDEO tag: no SRC") unless $embed_video; error ("unparsable VIDEO tag: no TITLE") unless $embed_title; } my @urls = find_event_urls ($title_col, 0); my $url = $urls[0]; my %hash = (); $hash{title} = $title; #$hash{repeat} = $repeat; # computed in parse_calendar_event_final #$hash{holiday} = $holiday; # computed in parse_calendar_event_final #$hash{vacationp} = $vacationp; # computed in parse_calendar_event_final #$hash{day_ord} = $day_ord; # computed in parse_calendar_event_final $hash{url} = $url; $hash{ticket} = $ticket; $hash{vipticket} = $vipticket; $hash{event_ids} = \@event_ids; $hash{onsale} = $onsale; $hash{genre} = $genre; $hash{main_room} = $main_room; $hash{lounge} = $lounge; $hash{webcast} = $webcast; $hash{video} = $embed_video; $hash{videoname} = $embed_title; $hash{html_src} = $html; # fill in some more fields now, if we can. parse_calendar_html_2 (\%hash, $dotm, $month, $year) if (defined ($dotm)); return \%hash; } # Some parsing we can only do once we're sure of the exact date of the # event. We know this right away for static events, but only later for # repeating events. # sub parse_calendar_html_2($$$$) { my ($eventref, $dotm, $month, $year) = @_; my $dotw = dotw ($dotm, $month, $year); my $monstr = $months[$month-1]; $monstr =~ s/^(...).*$/$1/; my $date = "$days[$dotw], $dotm $monstr $year"; my $html = $eventref->{html_src}; $html =~ s/^\s*//; $html =~ s/\n\n.*$//s; # keep only "title" column my $flyer = find_flyer ($eventref, $year, $month, $dotm); my $photo = find_photo ($year, $month, $dotm); my ($start_minute, $end_minute) = extract_hours ($html, $dotw, $dotm, $month, $year, 0, 1); my $time_str = sprintf("%d:%02d - %d:%02d", $start_minute/60, $start_minute%60, $end_minute/60, $end_minute%60); my @time_list = ($year, $month, $dotm, $dotw, $start_minute, $end_minute); $eventref->{date} = $date; # altered in parse_calendar_event_final $eventref->{time} = $time_str; $eventref->{times} = \@time_list; $eventref->{flyer} = $flyer; $eventref->{photos} = $photo; return $eventref; } # After the whole calendar has been parsed, fill in some fields in the # event hashes that we can't know until we've seen everything. # E.g., we can't know if there are multiple events on the given day # until after we've seen all of them. # sub parse_calendar_event_final($$$) { my ($dotm, $month, $year) = @_; my $key = sprintf ("%04d-%02d-%02d", $year, $month, $dotm); my $listref = $calendar{$key}; return unless defined ($listref); my @list = @$listref; my $nevents = $#list+1; my $i = -1; foreach my $event (@list) { $i++; parse_calendar_event_final_1 ($$event, $nevents, $i, 0, 1); } } sub parse_calendar_event_final_1($$$$$) { my ($event, $nevents, $event_number, $known_repeater, $real_event_p) = @_; my $title = $event->{title}; my $times = $event->{times}; my $date = $event->{date}; my ($year, $month, $dotm, $dotw, $start_minute, $end_minute) = @$times; my @day_ord = ($event_number, $nevents); $event->{day_ord} = \@day_ord; ####################################################################### # Update the day of the week to, e.g., "Sunday Morning" if appropriate. ####################################################################### # Compute the appropriate description of this (time of) day... my $day = describe_day ($dotw, $start_minute, $nevents, $event_number); # Splice it back in, and store the result back in the hash. $date =~ s/($days_re)/$day/i; $event->{date} = $date; ####################################################################### # Decide whether this event is a holiday, or just before a holiday. ####################################################################### my $holiday = lookup_holiday ($year, $month, $dotm); my $monday_off_p = (defined($holiday) && $holiday =~ m/^\*/); # Mark the sunday of 3 day weekends as holidays, too, if the # holiday on the sunday or adjascent monday are federal. # if ($dotw == 0 && # today is sunday !($month == 12 && $dotm == 30) # not day-before-NYE ) { if (!defined ($holiday)) { $holiday = lookup_holiday_tomorrow ($year, $month, $dotm); $monday_off_p = (defined($holiday) && $holiday =~ m/^\*/); # Tomorrow is a holiday, but not a federal holiday. Darn. $holiday = undef unless ($monday_off_p); } # Only mention "3 day weekend" on the last event of the day. $holiday = undef unless ($event_number == $nevents-1); if (defined ($holiday)) { $holiday =~ s/^\*//s; if ($monday_off_p) { $holiday = "3 Day Weekend: $holiday"; $event->{vacationp} = 1; } } } if (defined ($holiday)) { $holiday =~ s/^\*//sx; # take off Fed marker $event->{holiday} = $holiday; } ####################################################################### # determine whether this is a repeating event. ####################################################################### if ($known_repeater) { $event->{repeat} = $known_repeater; } else { my ($dotw_number, $total_dotws) = dotw_count ($year, $month, $dotm); my $repeater = lookup_repeating_entry ($year, $month, $dotm, $dotw, $dotw_number, $total_dotws); if (defined ($repeater)) { my $rtitle = $repeater->{title}; error ("unparsable event name in repeater: $repeater") unless ($rtitle); if ($rtitle eq $title) { print STDERR "$progname: $title repeats: $title\n" if ($verbose > 2); $event->{repeat} = $repeater->{repeat}; } } } ####################################################################### # What event number is this, and how many events are there? # # Note that this does not see "repeating" events that are visible # in the calendar, but do not yet have "real" data filled in. # So the event ordinals will change as those get filled in. ####################################################################### if ($real_event_p && !($event->{title} =~ m/^\(?CLOSED|CANCELL?ED\)?$/si)) { $event->{event_ord} = $total_event_count; $event_ords[$total_event_count] = \$event; $total_event_count++; $total_live_count++ if ($event->{html_src} =~ m/<(BAND|LIVE)\b/i); } } # Try to find the short name of the given event, by digging it out of # the HTML. # sub extract_event_name($) { my ($text) = @_; my $otext = $text; my ($title) = ($text =~ m@]*>\s*(.*?)\s*@si); error ("no title: $text") unless ($title && $title =~ m/[^\s]/); $title =~ s@\s*
\s*@ @gsi; error ("tags in title: $title") if ($title =~ m/[<>]/); $title =~ s/\s+/ /gsi; $title = de_entify($title); # now do some error checking on what's left. # if ($text =~ m@.*<\1>@si) { error ("multiple <$1> tags:\n\n$otext"); } # $text =~ s@@@gsi; $text =~ s@<(ETITLE|PRES|STATS|GENRE|TIME|PRICE)\b[^<>]*>.*@@gsi; $text =~ s@<(FLYER|TICKET)\b[^<>]*>@@gsi; # $text =~ s@<(P|BR)>@ @gsi; if (! ($text =~ m/^\s*$/s)) { print STDERR "$progname: junk in event name: $otext\n\n"; } # strip trailing punctuation $title =~ s/[-:;]+$//gsi; return $title; } # Returns text briefly describing the event: basically, just the DJ/band # names in the front room (or, if that can't be found, the back room.) # Puts a "*" in front of band names. # sub extract_event_desc($) { my ($text) = @_; return undef unless $text; my $result = ''; my $count = 0; $text =~ s/\s+/ /gs; $text =~ s/]*>\s*([^\n]+?)\s*$@is) { my $bandp = defined($2); my $who = de_htmlify($3, 0); $who = "*$who" if ($bandp); $result .= "$who\n"; $count++; } elsif (m/^.*?Lounge( DJs)?:/is) { last if ($count > 0); } } return $result; } # Extracts and returns the starting and ending time of the event from # its textual description, measured in minutes past midnight. Handles # "after hours", and adds heuristic slop based on the day of the week. # sub extract_hours($$$$$$$) { my ($text, $dotw, $dotm, $month, $year, $exact_p, $warn_p) = @_; my ($start, $end); $text = de_htmlify ($text, 0); my $otext = $text; # first look for something of these forms: # # 8pm - 2am # midnight - 1AM # 10 pm - after hours # while ($text =~ m/ \b\s* ((\d\d?(:\d\d)?)? \s* ([AP]\.?M\.?|noon|midnight|midnite)) \s*-\s* ([[:alnum:]:.]+) \b(.*) $/xsi) { my $start1 = $1; my $end1 = $5; $text = $6; $start1 = parse_hour ($start1, 0); next unless defined ($start1); $end1 = parse_hour ($end1, 1); next unless defined ($end1); if (defined($start1) && defined($end1)) { $start = $start1; $end = $end1; last; } } # if that didn't work, at least try to find a start time... # if (!defined ($start)) { $text = $otext; while ($text =~ m/ \b\s* ((\d\d?(:\d\d)?)? \s* ([AP]\.?M\.?|noon|midnight|midnite)) \b(.*) $/xsi) { my $start1 = $1; $text = $5; $start1 = parse_hour ($start1, 0); next unless defined ($start1); if (defined($start1)) { $start = $start1; last; } } } # If we had a start time but no end time, assume 2am. # # if (defined ($start) && !defined ($end)) { # $end = (60 * 2); # } # if we didn't find a start or end time, warn. # my $need_warning_p = ($warn_p && (!defined ($start) || !defined ($end))); # Bur don't warn if there's no time specified for cancelled events. if ($need_warning_p && $text =~ /\b(CANCELL?ED|POSTPONED|CLOSED)\b/i) { $need_warning_p = 0; } $start = (60 * 22) unless defined($start); # default to 10pm $end = (60 * 3) unless defined($end); # default to 3am if ($need_warning_p) { my $h1 = int ($start/60); my $m1 = int ($start%60); my $h2 = int ($end/60); my $m2 = int ($end%60); my $m = $months[$month-1]; my $d = $days[$dotw]; $m =~ s/^(...).*/$1/; $d =~ s/^(...).*/$1/; print STDERR "$progname: WARNING: " . sprintf ("%s, %02d $m %04d: ", $d, $dotm, $year) . "guessing: " . sprintf ("%2d:%02d %s - %d:%02d %s\n", ($h1 == 0 ? 12 : $h1 % 12), $m1, ($h1 < 12 ? "AM" : "PM"), ($h2 == 0 ? 12 : $h2 % 12), $m2, ($h2 < 12 ? "AM" : "PM")); # print STDERR "$otext\n\n"; } my $weekend_p = ($dotw == 5 || # fri $dotw == 6 || # sat $dotw == 0 # sun (treat sunday as a weekend, since we # are usually only open on sunday if # it's a three day weekend.) ); if ($end eq 'LATE') { if ($weekend_p) { $end = (60 * 5) + 30; # on weekends, "after hours" means 5:30 AM } else { $end = (60 * 4) + 30; # on week nights, "after hours" means 4:30 AM } } elsif (!$exact_p) { # # If an explicit end time was listed in the calendar, add some slack onto # that. Add half and hour on weeknights, and an hour on the weekend. # Or an hour and a half if it's a weekend set to end at 3am or earlier. if (!$weekend_p) { # weeknight $end += 30; } elsif ($end <= (60 * 3)) { # <= 3AM on weekend $end += 90; } else { # > 3AM on weekend $end += 60; } } $end += (60 * 24) if ($end < $start); # end is "tomorrow" return ($start, $end); } sub parse_hour($$) { my ($h, $endp) = @_; if ($h =~ m/\b(\d\d?)(:(\d\d))?\s*(A\.?M\.?)/i) { return undef if ($1 < 1 || $1 > 12); return undef if (defined($3) && ($3 < 0 || $3 > 60)); $h = 60 * $1; $h += $3 if (defined($3)); return $h; } elsif ($h =~ m/\b(\d\d?)(:(\d\d))?\s*(P\.?M\.?)/i) { return undef if ($1 < 1 || $1 > 12); return undef if (defined($3) && ($3 < 0 || $3 > 60)); $h = 60 * ($1 == 12 ? 12 : (12 + $1)); $h += $3 if (defined($3)); return $h; } elsif ($h =~ m/(\b12\s*)?\s*noon\b/i) { return 60 * 12; } elsif ($h =~ m/(\b12\s*)?\s*mid\s*-?\s*(nite|night)\b\b/i) { return 0; } elsif ($endp && $h =~ m/\b(after\b|after\s*-?\s*hours\b)/i) { return 'LATE'; } else { return undef; } } ############################################################################## # # , , and tags # ############################################################################## # Whether "DD-MMM-YYYY" has yet arrived. # sub ticket_on_sale_p($) { my ($date) = @_; my ($dd, $mm, $yyyy) = ($date =~ m/(\d\d?)-([a-z]{3})-(\d\d\d\d)/si); error ("unparsable ONSALE date: $date") unless ($yyyy); $mm =~ tr/A-Z/a-z/; $mm = $monthvals{$mm}; my ($esec, $emin, $ehour, $edotm, $emon, $eyear) = localtime (time); $emon++; $eyear += 1900; my $d0 = $yyyy * 10000 + $mm * 100 + $dd; my $d1 = $eyear * 10000 + $emon * 100 + $edotm; return ($d0 <= $d1); } # We use certain magic tags in the calendar.txt file, to save typing and # enable certain other semantic hacks. # sub rewrite_dj_tags($$$$$) { my ($html, $show_tickets_p, $onsale_date, $lineno, $tickets_form_p) = @_; # # xyz ==> xyz # xyz ==> xyz # xyz ==> xyz # xyz ==> xyz # ==> deleted # ==> deleted #
xyz # xyz ==> xyz # xyz ==> xyz # # xyz ==> # XYZ # xyz ==>

XYZ
# # xyz ==>

xyz

# xyz ==>

xyz
# xyz ==>

xyz # ==>

xyz # xyz ==>

xyz # ==> deleted # # ==>

buy tickets now!
# or

depending on $show_tickets_p # # ==> slightly different text # # use "foo" instead of "buy tickets now!" # don't link until this date # deleted $html =~ s@]*>\s*@@igs; $html =~ s@]*>\s*@@igs; $html =~ s@]*>\s*@@igs; $html =~ s@\"]+\")>(.*?) @$3@xigs; $html =~ s@\"]+\")>(.*?) @$3@xigs; $html =~ s@\"]+\")>(.*?) @$3@xigs; $html =~ s@\"]+\")>(.*?) @{ "" . upcase_html($2) . "" }@exigs; $html =~ s@(.*?)@{ "

" . upcase_html($1) . "
" }@exigs; $html =~ s@@@igs; $html =~ s@<(/?)BAND(\s+DUP)?>@<$1B>@igs; $html =~ s@@@igs; $html =~ s@@@igs; $html =~ s@@@igs; $html =~ s@\s*@

@igs; $html =~ s@\s*@

@igs; $html =~ s@\s*@@igs; $html =~ s@\s*

@igs; $html =~ s@\s*@@igs; $html =~ s@\s*@

@igs; $html =~ s@\s*@@igs; $html =~ s@\s*@

@igs; $html =~ s@\s*@

@igs; $html =~ s@]*>\s*@@igs; error ("BLURB wasn't handled") if ($html =~ m@]*)>@i) { my $args = $1; my $url_p = 0; my $vip_p = 0; my $ticket = undef; my $text = undef; if ($args =~ m@\b((HREF)|ID)\s*=\s*(\"([^\"]*)\"|([^<>\"\s]+))@si) { $url_p = $2; $ticket = $4 || $5; } if ($args =~ m@\bTEXT\s*=\s*\"([^\"]*)\"@si) { $text = $1; } $args =~ s/\s*"[^"]*"\s*/ /gsi; # lose quoted strings if ($args =~ m@\bVIP\b@si) { $vip_p = 1; } my $blurb = "VIP service info"; if ($tickets_form_p) { $blurb = "\n
\n $blurb\n"; } else { $blurb = "\n
($blurb)\n"; } my $target; if (! $show_tickets_p) { $target = ''; } elsif ($onsale_date && !ticket_on_sale_p ($onsale_date)) { my ($dd, $mm, $yyyy) = ($onsale_date =~ m/(\d\d?)-([a-z]{3})-(\d\d\d\d)/si); $mm =~ s/^(.)/{uc($1)}/xe; $target = "Tickets on sale $mm $dd."; } elsif ($tickets_form_p && !$url_p) { if (! $vip_p) { $text = "Buy Tickets" unless ($text); $blurb = ''; } else { $text = "VIP Service: $vip_price" unless $text; } $target = ("

\n" . " \n" . " \n" . "  \n" . " \n" . $blurb . "
\n"); } else { $ticket = $ticket_url_base . $ticket unless ($url_p); if (! $vip_p) { $text = "Buy tickets now!"; $blurb = ''; } else { $text = "VIP Service: $vip_price"; } $text = ($vip_p ? "VIP Service: $vip_price" : "buy tickets now!") unless ($text); $target = ("" . "$text" . $blurb . "
"); $target = "

$target" if ($vip_p); } $target = "

$target" if ($ticket_count == 0); $html =~ s@]*>\s*@$target@is; # delete first copy of tag! $ticket_count++; } $html =~ s@]*>\s*@@gsi; if ($html =~ m@<(/?(DJ|AFF|STATS|PRES|BAND|FLYER|TICKET|EVENT|\001))@i) { error ("$lineno: unparsable <$1> tag"); } return $html; } my %performers = (); # keys are the name of the band/dj, downcased # values are references to lists: # "name" # type ('dj' | 'band' | 'other') # url # ordinal (e.g., Nth dj to play here) # reference to a list of dates: # each element is an int, YYYYMMDD my $performers_band_tick = 0; my $performers_dj_tick = 0; my $performers_other_tick = 0; # Parse the , , and tags in the HTML, and populate the # %performers table with the result. (Date args are for error messages.) # sub parse_dj_tags($$$$) { my ($html, $dotm, $month, $year) = @_; foreach (split ('<', $html)) { next unless m@^((DJ)|(BAND)|OTHER) (\s+DUP\b)? \s* (HREF=\"([^<>\"]+)\")?\s*>(.*)$@isx; my $tag = $1; my $djp = defined($2); my $bandp = defined($3); my $dupp = defined($4); my $url = $6 || ''; my $name = $7; my $type = ($djp ? 'dj' : $bandp ? 'band' : 'other'); $name =~ s/^\s+//gs; $name =~ s/\s+$//gs; error ("$dotm $months[$month-1] $year: unparsable <$tag>") if ($name eq ''); my $key = $name; $key = asciify (de_entify ($key)); # simplify accented characters $key =~ tr/A-Z/a-z/; # downcase $key =~ s/\b(the|and|dj|mc|mr)\b//g; # noise words $key =~ s/\s//g; # lose whitespace $key =~ s/^!$/shift1/s; # kludge for stupid "DJ !" $key =~ s/[^a-z\d]//g; # lose non-alphanumeric $key =~ s/([^aeiouy])\1+/$1/g; # merge consecutive consonants error ("can't canonicalize $name") if ($key eq ''); if ($dupp) { # kludge for when there's a DJ and a BAND with the same name: # one of them looks like to suppress the complaint. $key .= " _DUP_"; } my $listref = $performers{$key}; my @list = (defined($listref) ? @$listref : ()); my @dates = (); my $ord = undef; if (defined($listref)) { # there was an entry already my ($oname, $otype, $ourl, $oord, $odatesref) = @list; if ($otype ne $type) { # warn about type mismatch my $m = $months[$month-1]; $m =~ s/^(...).*$/$1/; error ("$dotm $m $year: is $name a DJ, BAND, or OTHER?"); } if ($oname ne $name && # warn about name spelling change length($name) > 2) { my $m = $months[$month-1]; $m =~ s/^(...).*$/$1/; print STDERR "$progname: $dotm $m $year: \"$oname\" " . "changed to \"$name\"\n"; } if ($ourl ne $url) { # warn about URL change my $m = $months[$month-1]; $m =~ s/^(...).*$/$1/; print STDERR "$progname: $dotm $m $year: $name: " . "\"$ourl\" changed to \"$url\"\n"; } $ord = $oord; @dates = @$odatesref; error ("$key: no dates?") if ($#dates < 0); } my $d = $year * 10000 + $month * 100 + $dotm; if ($#dates == -1 || $dates[$#dates] != $d) { # avoid dups push @dates, $d; } my $datesref = \@dates; if (! defined($ord)) { $ord = ($bandp ? $performers_band_tick++ : $djp ? $performers_dj_tick++ : $performers_other_tick++); } @list = ($name, $type, $url, $ord, $datesref); $listref = \@list; $performers{$key} = $listref; if (0) { $listref = $performers{$key}; @list = @$listref; ($name, $type, $url, $ord, $datesref) = @list; @dates = @$datesref; print STDERR "## $key -- $name -- $type -- $url -- $ord -- " . join (", ", @dates) . "\n"; } } } # Write the calendar/ledger.html file. # sub generate_ledger_html($$) { my ($dir, $topten_p) = @_; my $bands = ''; # html for the bands/djs themselves my $djs = ''; my $others = ''; my $dj_idx = ''; # html for the A-Z indexes my $band_idx = ''; my $other_idx = ''; my $dj_count = 0; # totals my $band_count = 0; my $other_count = 0; my $dj_letter = ''; # previous first-letter-of-name encountered my $band_letter = ''; my $other_letter = ''; my $ties_p = 1; foreach my $key (sort (keys (%performers))) { my $listref = $performers{$key}; my @list = @$listref; my ($name, $type, $url, $ord, $datesref) = @list; my @dates = @$datesref; $name =~ s/^(the|dj|mc|mr\.?)\s+//i; # lose leading noise words my $letter = $key; $letter =~ s/^(.).*$/$1/; $letter =~ tr/a-z/A-Z/; $letter = 'A' unless $letter =~ m/[A-Z]/i; my $ccount; if ($type eq 'dj') { $ccount = ++$dj_count; my $letter2 = $letter; $letter = '' if ($letter eq $dj_letter); $dj_letter = $letter2; } elsif ($type eq 'band') { $ccount = ++$band_count; my $letter2 = $letter; $letter = '' if ($letter eq $band_letter); $band_letter = $letter2; } elsif ($type eq 'other') { $ccount = ++$other_count; my $letter2 = $letter; $letter = '' if ($letter eq $other_letter); $other_letter = $letter2; } else { error ("unknown ledger type: $type"); } my ($colorL, $colorR) = (((($ccount-1) / 3) & 1) ? (" CLASS=\"LL\"", " CLASS=\"LO\"") : ("", " CLASS=\"LE\"")); my $desc = ""; my $n = "$name"; $n = "$n" if ($url ne ''); if ($letter eq '') { $desc .= ""; } else { my $anchor = "${type}_$letter"; $desc .= ("" . "" . "$letter" . "" . ""); my $idx = "$letter "; if ($type eq 'dj') { $dj_idx .= $idx; } elsif ($type eq 'band') { $band_idx .= $idx; } elsif ($type eq 'other') { $other_idx .= $idx; } else { error ("unknown ledger type: $type"); } } $desc .= "$n"; $desc .= ""; my $count = 0; foreach (@dates) { my $year = int($_/10000); my $month = int($_/100) % 100; my $dotm = $_ % 100; my $u = sprintf("%04d/%02d.html\#%02d", $year, $month, $dotm); $desc .= ",\n" unless ($count == 0); my $m = $months[$month-1]; $m =~ s/^(...).*$/$1/; $desc .= "$dotm-$m-$year"; $count++; } $desc .= " (" . ($#dates+1) . ")" if ($#dates > 9); $desc .= "\n"; if ($type eq 'dj') { $djs .= $desc; } elsif ($type eq 'band') { $bands .= $desc; } elsif ($type eq 'other') { $others .= $desc; } else { error ("unknown ledger type: $type"); } } # Compute the top ten. # if ($topten_p) { my $n = 25; my $i; my @sorted = sort { my $aa = $performers{$a}->[4]; my $bb = $performers{$b}->[4]; return ($#$bb == $#$aa ? ($performers{$a}->[0] cmp $performers{$b}->[0]) : $#$bb <=> $#$aa); } (keys (%performers)); { my $n1 = $total_event_count; my $n2 = $total_live_count; my ($first, $last) = find_scheduled_range (); $_ = $first; my ($first_year, $first_month) = m/^(\d{4})-(\d{2})$/; $_ = $last; my ($last_year, $last_month) = m/^(\d{4})-(\d{2})$/; my $start = mktime (0,0,0, 1, $first_month-1, $first_year-1900, 0,0,-1); my $end = mktime (0,0,0, 30, $last_month-1, $last_year-1900, 0,0,-1); my $days = ($end - $start) / (60 * 60 * 24); my $weeks = $days / 7; my $months = $days / 30; my $years = $days / 365.25; print STDOUT sprintf ("\nScheduled range: %s %d - %s %d:\n", $months[$first_month-1], $first_year, $months[$last_month-1], $last_year); print STDOUT sprintf (" " . "%d days; %d weeks; %.1f months; %.2f years.\n\n", $days, $weeks, $months, $years); print STDOUT sprintf ("Total events: %4d\n", $n1); print STDOUT sprintf ("Total live shows: %4d\n", $n2); print STDOUT sprintf ("Events / month: %4.1f ", ($n1/$months)); print STDOUT sprintf ("Live shows / month: %4.1f\n", ($n2/$months)); print STDOUT sprintf ("Events / week: %4.1f ", ($n1/$weeks)); print STDOUT sprintf ("Live shows / week: %4.1f\n", ($n2/$weeks)); } $i = 1; print STDOUT "\nMost frequent bands:\n\n"; my $last_n = -1; my $last_i = -1; foreach my $key (@sorted) { my $listref = $performers{$key}; my ($name, $type, $url, $ord, $datesref) = @$listref; next unless ($type eq 'band'); my $nd = $#$datesref+1; next if ($nd == 1); my $ii = ($ties_p && $nd == $last_n ? $last_i : $i); $last_n = $nd; $last_i = $ii; print STDOUT sprintf("%5d: %s (%d)\n", $ii, de_entify($name), $nd); last if (++$i > $n && $n != 0); } $last_n = -1; $last_i = -1; $i = 1; print STDOUT "\nMost frequent DJs:\n\n"; foreach my $key (@sorted) { my $listref = $performers{$key}; my ($name, $type, $url, $ord, $datesref) = @$listref; next unless ($type eq 'dj'); my $nd = $#$datesref+1; next if ($nd == 1); my $ii = ($ties_p && $nd == $last_n ? $last_i : $i); $last_n = $nd; $last_i = $ii; print STDOUT sprintf("%5d: %s (%d)\n", $ii, de_entify($name), $nd); last if (++$i > $n && $n != 0); } if (0) { $last_n = -1; $last_i = -1; $i = 1; print STDOUT "\nMost frequent \"others\":\n\n"; foreach my $key (@sorted) { my $listref = $performers{$key}; my ($name, $type, $url, $ord, $datesref) = @$listref; next unless ($type eq 'other'); my $nd = $#$datesref+1; next if ($nd == 1); my $ii = ($ties_p && $nd == $last_n ? $last_i : $i); $last_n = $nd; $last_i = $ii; print STDOUT sprintf("%5d: %s (%d)\n", $ii, de_entify($name), $nd); last if (++$i > $n && $n != 0); } } { my $all_genres_p = 0; $last_n = -1; $last_i = -1; $i = 1; print STDOUT "\n" . ($all_genres_p ? "All genres, by frequency: " . "All genres, alphabetical:" : "Most frequent genres:") . "\n\n"; my @out = (); foreach my $key (sort { my $aa = $genres{$a}; my $bb = $genres{$b}; if ($aa == $bb) { return ($a cmp $b); } else { return ($bb <=> $aa); } } keys (%genres)) { my $val = $genres{$key}; my $ii = ($ties_p && $val == $last_n ? $last_i : $i); $last_n = $val; $last_i = $ii; push @out, sprintf("%5d: %s (%d)", $ii, $key, $val); $i++; last if (!$all_genres_p && $i > $n && $n != 0); } if ($all_genres_p) { my @keys = sort (keys (%genres)); foreach my $line (@out) { my $key = shift @keys; my $val = $genres{$key}; $line = sprintf("%-38s %s (%d)", $line, $key, $val); } } foreach (@out) { print STDOUT "$_\n"; } } print STDOUT "\nInteresting Nths:\n"; my @cool_numbers = (1, 10, 50, 69, 100, 500, 666, 999, 1000, 1024, 1500, 2000, 2048, 2500, 3000, 3500, 4000, 4096, 4500, 5000, 5500, 6000, 6500, 7000, 7500, 8000, 8500, 9000, 9500, 10000); my @cool_numbers_2 = (150, 200, 250, 300, 400, 600, 700, 800, 900); foreach my $type ('dj', 'band') { print STDOUT "\n"; my @out = (); foreach my $key (@sorted) { my $listref = $performers{$key}; my ($name, $ttype, $url, $ord, $datesref) = @$listref; next unless ($ttype eq $type); my @dates = @$datesref; my $date = $dates[0]; my $cool = 0; my @nums = @cool_numbers; push @nums, @cool_numbers_2 if ($type eq 'band'); foreach my $c (sort { $a <=> $b } (@nums)) { if ($c == $ord+1) { $cool = $ord+1; last; } } next unless ($cool); $date =~ s@^(\d{4})(\d\d)(\d\d)$@ my ($y, $m, $d) = ($1, $2, $3); $m = $months[$2-1]; $m =~ s/^(...).*$/$1/; "$d-$m-$y";@gsex; $cool = "$cool" . ($cool == 1 ? "st" : "th"); $cool .= " " . ($type eq 'dj' ? 'DJ' : $type); $name = de_entify($name); $name =~ s/^(.{20}).*/$1/; push @out, sprintf("%13s: %-20s %s\n", $cool, $name, $date); } @out = sort { my ($aa, $bb) = ($a, $b); $aa =~ s/^\s*(\d+).*$/$1/; $bb =~ s/^\s*(\d+).*$/$1/; return $aa <=> $bb; } @out; foreach (@out) { print STDOUT $_; } } print STDOUT "\n"; foreach my $c (sort { $a <=> $b } (@cool_numbers, @cool_numbers_2)) { next if ($c == 1); my $event = $event_ords[$c-1]; next unless defined ($event); my $title = $$event->{title}; my $date = $$event->{date}; $date =~ s/^.*, //; $date =~ s/ /-/g; $date = "0$date" unless ($date =~ m/^\d\d/); my $cool = "$c" . ($c == 1 ? "st" : "th") . " event"; $title =~ s/^(.{20}).*/$1/; print STDOUT sprintf("%13s: %-20s %s\n", $cool, $title, $date); } print STDOUT "\n"; return; # don't bother writing ledger.html in topten mode } { my $a = "

"; my $b = "
"; $band_idx = $a . $band_idx . $b; $dj_idx = $a . $dj_idx . $b; } my $output = ''; $output .= "$page_title: Bands and DJs\n"; my $nevents = $total_event_count; $nevents -= ($#future_events + 1); # $band_count =~ s/(\d)(\d\d\d)$/$1,$2/; # $dj_count =~ s/(\d)(\d\d\d)$/$1,$2/; # $nevents =~ s/(\d)(\d\d\d)$/$1,$2/; $output .= ("\n" . "\n" . "\n"); $output .= "\n"; $output .= "


\n"; $output .= "

\n"; $output .= "
\n"; $output .= "$band_count bands
\n"; $output .= "$dj_count djs
\n"; $output .= "$nevents events
\n"; $output .= "
\n"; $output .= "
\n"; $output .= "

\n"; $output .= "\n"; $output .= "\n"; $output .= "\n"; $output .= "Bands and DJs\n\n"; $output .= "


\n"; $output .= ( "This is a list of all the bands and DJs who have performed\n" . "at DNA Lounge since we re-opened in 2001. Clicking on a\n" . "name will take you to that performer's home page (if we\n" . "know it) and clicking on a date will take you to the\n" . "appropriate entry in the\n" . "calendar.\n" . "From there, you can find the corresponding\n" . "flyers,\n" . "photo galleries,\n" . "etc.\n" . "

\n" . "If you have any corrections to the URLs or names below,\n" . " please \n" . "let us know!\n" . "

\n" ); $output .= ("

\n" . "\n"); $output .= (" \n" . " \n" . " \n"); $output .= $bands; $output .= (" \n" . " \n" . " \n"); $output .= $djs; # $output .= (" \n" . # " \n" . # " \n"); # $output .= $others; # This kludge turns this: Into this: # # ----------------- ----------------- # | X | ABC | | X | ABC | # ----------------- | |---------| # | | DEF | | | DEF | # ----------------- | |---------| # | | GHI | | | GHI | # ----------------- ----------------- { $_ = $output; s@(@i, $chunk); my $count = $#junk; if ($count > 0) { $count++; $chunk =~ s@(ROWSPAN=)(\d+)@$1$count@gsi; $chunk =~ s@@@gsi; } } $output = join ('', @chunks); } $output .= "
\n" . "
\n" . "\n" . "Bands\n\n" . $band_idx . "


\n" . "

\n" . "

\n" . "
\n" . "\n" . "DJs, MCs, etc.\n\n" . $dj_idx . "


\n" . "

\n" . # "

\n" . # "
\n" . # "\n" . # "\"Others\"\n\n" . # $other_idx . # "


\n" . # "

]* ROWSPAN=)@\001$1@gs; my @chunks = split (/\001/, $_); foreach my $chunk (@chunks) { my @junk = split (m@
\n"; $output .= "\n"; my $outfile = "$dir/$calendar_ledger_file"; 1 while $outfile =~ s@[^/]+/\.\./@@g; write_file_if_changed ($outfile, $output, 1); } # Write the calendar/names.txt file. # This contains a line for each event, "YYYY-MM-DD \t NAME" # The name begins with an asterisk if it's a "live" event. # Only the last event on each day is listed. # sub generate_names_file($) { my ($dir) = @_; my %output; foreach my $key (sort (keys %calendar)) { my ($year, $month, $dotm) = ($key =~ m/^(\d\d\d\d)-(\d\d)-(\d\d)$/); next unless defined ($dotm); my $listref = $calendar{$key}; if (defined ($listref)) { foreach my $event (@$listref) { my $title = $$event->{title}; my $live_p = ($$event->{html_src} =~ m/<(BAND|LIVE)\b/i); $title = "*$title" if ($live_p); $output{$key} = "$key\t$title\n"; } } } my $output = ''; foreach my $key (sort (keys %output)) { $output .= $output{$key}; } my $outfile = "$dir/$calendar_names_file"; write_file_if_changed ($outfile, $output, 0); } ############################################################################## # # Flyers, Photos # ############################################################################## # Search for a tag in the given HTML, which means # "this entry shares a flyer with the entry on the given other date." # sub parse_shared_flyer($$$) { my ($date, $lineno, $html) = @_; return unless ($html =~ m@]*)>@i); $_ = $1; my ($val) = m/\bDATE=\"([^<>\"]*)\"/i; if ($val =~ m/^NONE$/i) { print STDERR "$progname: $lineno: skipped flyer $date = NONE\n" if ($verbose > 2); } elsif ($val =~ m/^(\d\d?)[-\s]+($month_re)[-\s]+(\d{4})\s*$/io) { my $ndotm = 0 + $1; my $nmonth = $2; my $nyear = 0 + $3; $nmonth =~ tr/A-Z/a-z/; $nmonth = $monthvals{$nmonth}; my $nnumeric = ($nyear * 10000) + ($nmonth * 100) + $ndotm; my $ndate = sprintf ("%04d-%02d-%02d", $nyear, $nmonth, $ndotm); $date =~ m/^(\d{4})-(\d{2})-(\d{2})$/; my $odotm = 0 + $3; my $omonth = $2; my $oyear = 0 + $1; my $onumeric = ($oyear * 10000) + ($omonth * 100) + $odotm; if ($nnumeric >= $onumeric) { $omonth = $months[$omonth-1]; $omonth =~ s/^(...).*$/$1/; $nmonth = $months[$nmonth-1]; $nmonth =~ s/^(...).*$/$1/; $date = "$odotm-$omonth-$oyear"; error ("$lineno: flyer target ($ndate) >= date ($date)"); } $shared_flyers{$date} = $ndate; print STDERR "$progname: $lineno: stored flyer $date = $ndate\n" if ($verbose > 2); } else { error ("$lineno: unparsable FLYER tag"); } } # Returns the relative HREF to the flyer for this date, if any. # sub find_flyer($$$$) { my ($event, $year, $month, $dotm) = @_; my $title = $event->{title}; my $html = $event->{html_src}; if ($html =~ m@@i) { return undef; } my $date = sprintf ("%04d-%02d-%02d", $year, $month, $dotm); my $shared = $shared_flyers{$date}; my $result = find_flyer_1 ($year, $month, $dotm, $title); if (defined ($result)) { if (defined($shared)) { error ("flyer exists for $date (expected shared $shared)"); } return $result; } return undef unless defined($shared); ($shared =~ m/^(\d{4})-(\d{2})-(\d{2})$/) || error ("unparsable shared flyer: $shared for $date"); ($year, $month, $dotm) = ($1, $2, $3); $result = find_flyer_1 ($year, $month, $dotm, $title); if (!defined ($result)) { error ("expected flyer $shared (for $date) not found"); } elsif ($verbose > 2) { print STDERR "$progname: using flyer of $shared for $date\n"; } return $result; } # Reads the flyer thumbnail index for the given month, and caches the HTML # describing the first thumbnail of each date (the and tags, # including width and height). # sub cache_inline_flyers($$) { my ($year, $month) = @_; my $donep = sprintf ("%04d/%02d-done", $year, $month); return if ($inline_flyers{$donep}); $inline_flyers{$donep} = 1; my $file = sprintf ("flyers/%04d/%02d/index.html", $year, $month); local *IN; if (open (IN, "<$file")) { print STDERR "$progname: reading $file\n" if ($verbose > 2); my $body = ''; while () { $body .= $_; } close IN; $body =~ s/\s+/ /gs; $body =~ s/(]*>) \s* ()@xi) { my $html = "$1$2"; my $dotm = $3; my $key = sprintf ("%04d/%02d/%02d", $year, $month, $dotm); my $prefix = sprintf ("../../flyers/%04d/%02d/", $year, $month); $html =~ s@((SRC|HREF)\s*=\s*\")@$1$prefix@gi; my $valP = $inline_flyers{$key}; my @val = ( $valP ? @$valP : ()); push @val, $html; $valP = \@val; $inline_flyers{$key} = $valP; print STDERR "$progname: cached flyer image $key\n" if ($verbose > 3); } } } } # Given the file name of a flyer URL, returns the HTML describing the # first thumbnail of that flyer. (Assuming cache_inline_flyers() has # already been called for this month). # sub find_inline_flyer ($) { my ($flyer_file) = @_; my ($year, $month, $dotm) = ($flyer_file =~ m@(\d{4})/(\d\d)/(\d\d)-@); my $key = sprintf ("%04d/%02d/%02d", $year, $month, $dotm); my $valP = $inline_flyers{$key}; error ("missing flyer? $year-$month-$dotm") unless ($valP); foreach my $html (@$valP) { my $re = qr/$flyer_file/; return $html if ($html =~ m/\b$re\b/); } error ("INTERNAL ERROR: no match for $flyer_file"); } # Given a date, returns the HTML describing the first flyer thumbnail of # that date. (Assuming cache_inline_flyers() has already been called for # this month). # sub find_flyer_1($$$$) { my ($year, $month, $dotm, $title) = @_; my $key = sprintf ("%04d/%02d/%02d", $year, $month, $dotm); my $valP = $inline_flyers{$key}; return undef unless defined($valP); $title = lc (asciify($title)); $title =~ s/[^a-z\d]+//g; my $url = undef; my $utitle = undef; my $count = 0; foreach my $html (@$valP) { ($url) = ($html =~ m@]+)"@i); error ("no href in flyer html? $year-$month-$dotm - $html") unless ($url); 1 while $url =~ s@[^/]+/\.\./@@g; ($utitle) = ($url =~ m@/\d\d-([^.]+)\.html$@); # consider it a match if the flyer name is a substring of the event name. return $url if ($title =~ m/$utitle/); $count++; } # Only warn when there is more than one flyer for this date print STDERR "WARNING: name mismatch: $key: $title != $utitle\n" unless ($count == 1); return $url; } # Returns the relative HREF to the photo gallery for this date, if any. # sub find_photo($$$) { my ($year, $month, $dotm) = @_; my $f = sprintf ("gallery/$year/%02d-%02d", $month, $dotm); return "$f/" if (-d $f); return undef; } # Returns a list of the the URLs present in the given HTML. # if $all_p is true, returns all of them; # else, returns only the "important" looking ones. # sub find_event_urls($$) { my ($head_html, $all_p) = @_; my @result = (); $head_html =~ s@]*>@ @gsi; $head_html =~ s/\s+/ /gs; # squash whitespace $head_html =~ s@(<[^/])@\n$1@gs; # break at open tags foreach (split ('\n', $head_html)) { next unless (m@^\s*<(A|ETITLE)\s+HREF="([^\"]+)">\s*([^<>]+?)\s*@i); my ($tag, $url, $text) = ($1, $2, $3); if ($all_p || $tag =~ m/ETITLE/i) { push @result, $url; } } return @result; } ############################################################################## # # Looking up entries in the calendar # ############################################################################## # Returns a string describing the holiday on this date, if any. # sub lookup_holiday($$$) { my ($year, $month, $dotm) = @_; my $key = sprintf ("%04d-%02d-%02d-H", $year, $month, $dotm); my $key2 = sprintf ("%04d-%02d-%02d-H", 0, $month, $dotm); my $val = $calendar{$key}; $val = undef if ($val && $val =~ m/^\s*$/s); if (!defined($val)) { $key = $key2; $val = $calendar{$key}; } $val = undef if ($val && $val =~ m/^\s*$/s); if ($val) { $_ = $val; s/\\/\\\\/g; s/\"/\\\"/g; s/\n/\\n/g; print STDERR "$progname: holiday: $key: \"$_\"\n" if ($verbose > 1); return $val; } else { return undef; } } # Returns a string describing the holiday on the *following* day, if any. # sub lookup_holiday_tomorrow($$$) { my ($year, $month, $dotm) = @_; my $dpm = days_per_month ($month, $year); if (++$dotm > $dpm) { $dotm = 1; if (++$month == 12) { $year++; } } return lookup_holiday ($year, $month, $dotm); } # Returns an entry-hash describing a repeating entry for this day, if any. # # $dotw is the day of the week (0-6) # $dotw_number is the number of that day in this month # (e.g., 2 for "2nd wednesday") # $total_dotws is how many of that dotw this month has # (e.g., 5 for "there are 5 wednesdays"); # sub lookup_repeating_entry($$$$$$) { my ($year, $month, $dotm, $dotw, $dotw_number, $total_dotws) = @_; my $value = undef; my $vlineno = undef; $_ = $days[$dotw]; s/^(...).*/$1/; foreach my $ptr (@repeaters) { my @entry = @$ptr; my ($edotw, $rep, $rdesc, $from_year, $from_month, $from_dotm, $to_year, $to_month, $to_dotm, $lineno, $entry_hash) = @entry; next unless ($edotw == $dotw); # skip if out of range. my $from_i = ($from_year * 10000 + $from_month * 100 + $from_dotm); my $to_i = ($to_year * 10000 + $to_month * 100 + $to_dotm); my $i = ($year * 10000 + $month * 100 + $dotm); next if ($i < $from_i || $i > $to_i); # today is within the range of this repeater. my @nths = ("1st", "2nd", "3rd", "4th", "5th"); if ($rep == 0) { # an "every week" entry # always matches. } elsif ($rep >= 1 && $rep <= 4) { # an "every [1-4] week" entry next unless ($dotw_number == $rep); } elsif ($rep == 5) { # an "every 5th week" entry # the 4th day in a month with 5 of them. # (note that "4th friday" and "5th friday" indicate the same day # in a month that has 5 fridays.) next unless ($dotw_number == 4 && $total_dotws == 5); } elsif ($rep == -1) { # an "every last week" entry next unless ($dotw_number == $total_dotws); } elsif ($rep == -666) { # an "every other week" entry error ("\"every other week\" not supported yet"); } else { error ("INTERNAL ERROR: unknown repeater $rep\n"); } # If we made it here, this repeater matches this day. if (defined($value)) { print STDERR "$progname: WARNING: duplicate repeating matches: " . "line $lineno and line $vlineno\n"; } elsif ($verbose > 1) { my $key = sprintf ("%04d-%02d-%02d", $year, $month, $dotm); $_ = $entry_hash->{title}; print STDERR "$progname: repeater: $key: \"$_\"\n"; } my %new_hash = %$entry_hash; # copy it $value = \%new_hash; $vlineno = $lineno; parse_calendar_html_2 ($value, $dotm, $month, $year); parse_calendar_event_final_1 ($value, 1, 0, $rdesc, 0); } return $value; } ############################################################################## # # Start the parser going # ############################################################################## sub load_calendar($$) { my ($file, $fast_p) = @_; local *IN; open(IN, "$file") || error ("$file: $!"); my $slineno = 0; my $lineno = 0; my @lines = (); $total_event_count = 0; $total_live_count = 0; while () { my $line = $_; my $contp = 0; $lineno++; $_ = undef; next if ($line =~ m/^\s*\#/); # skip fully commented lines $line =~ s/\s*\#+\s.*//g; # lose comments at end of line $contp = 1 if ($line =~ m/^\s/ || $line =~ m/^$/); $contp = 0 if ($line =~ m/^HOLIDAY/i); # kludge if (!$contp) { if ($#lines >= 0) { parse_calendar_entry ($slineno, $fast_p, @lines); @lines = (); } $slineno = $lineno; } $line =~ s/\n$//s; if ($#lines >= 0 || !$contp) { push @lines, $line; } } if ($#lines >= 0) { # last entry parse_calendar_entry ($slineno, $fast_p, @lines); } # Second pass, to finish parsing things. # foreach (sort (keys %calendar)) { my ($year, $month, $dotm) = m/^(\d\d\d\d)-(\d\d)-(\d\d)$/; parse_calendar_event_final ($dotm, $month, $year) if defined($year); } close IN; } # returns ("YYYY-MM" "YYYY-MM") of the range that the calendar covers. # sub find_scheduled_range() { my $min = 9999999; my $max = 0; foreach (keys (%calendar)) { if (m/^(\d+)-(\d+)-(\d+)$/) { my ($year, $month, $dotm) = ($1, $2, $3); my $i = $year * 1000 + $month; $min = $i if ($i < $min); $max = $i if ($i > $max); } } error ("no calendar events scheduled at all!") if ($max <= 0); my $first = sprintf ("%04d-%02d", $min/1000, $min%100); my $last = sprintf ("%04d-%02d", $max/1000, $max%100); print STDERR "$progname: scheduled range: $first - $last\n" if ($verbose > 2 && ($first || $last)); return ($first, $last); } ############################################################################## # # Generating calendar overview grids # ############################################################################## # returns a list of five items: # the html for the cal grid header; # the URLs that are in the "prev" and "next" links (or undef). # the pretty names of the "prev" and "next" links (or undef). # sub build_cal_header($$$$) { my ($year, $month, $prev_p, $next_p) = @_; my $output = ""; my ($prev_name, $next_name); my $last_month = $months[($month+10) % 12]; my $this_month = $months[$month-1]; my $next_month = $months[$month % 12]; my $last_month_year = ($month == 1 ? $year-1 : $year); my $next_month_year = ($month == 12 ? $year+1 : $year); my $last_month_href = sprintf("%02d", ((($month+10) % 12) + 1)) . ".html"; my $next_month_href = sprintf("%02d", ((($month) % 12) + 1)) . ".html"; $last_month_href = "../$last_month_year/$last_month_href" if ($year != $last_month_year); $next_month_href = "../$next_month_year/$next_month_href" if ($year != $next_month_year); $last_month_href = undef unless ($prev_p); $next_month_href = undef unless ($next_p); my ($prev_href1, $prev_href2, $next_href1, $next_href2); if ($prev_p) { $prev_name = "$last_month $last_month_year"; $prev_href1 = ""; $prev_href2 = ""; } else { $prev_href1 = "

"; $prev_href2 = "
"; } if ($next_p) { $next_name = "$next_month $next_month_year"; $next_href1 = ""; $next_href2 = ""; } else { $next_href1 = "
"; $next_href2 = "
"; } $last_month =~ s/^(...).*$/$1/; $this_month =~ s/^(...).*$/$1/; $next_month =~ s/^(...).*$/$1/; $output .= "" . "\n" . " \n" . " \n" . " \n" . " \n" . " \n" . "
${prev_href1}<< " . "$last_month${prev_href2}" . "$this_month" . "${next_href1}$next_month " . ">>${next_href2}
"; $output =~ s/^/ /gm; $output =~ s/^\s+//s; return ($output, $last_month_href, $next_month_href, $prev_name, $next_name); } # Returns the HTML for a calendar grid, with embedded hrefs. # # returns: # the html for the calendar grid; # the URLs that are in the "prev" and "next" links (or undef). # the pretty names of the "prev" and "next" links (or undef). # sub make_calendar_grid_html($$$) { my ($year, $month, $include_ids_p) = @_; my $output = ("\n" . " \n" . " \n" . " \n" . " \n" . " \n" . " \n" . " \n" . " \n" . " \n" . " \n"); my $days = days_per_month ($month, $year); my $dotm = 1; my $dotw = dotw ($dotm, $month, $year); $output .= " \n"; for (my $i = 0; $i < $dotw; $i++) { $output .= " \n"; } my $cdotw = $dotw; for (my $week = 0; $week < 6; $week++) { for (; $cdotw <= 6; $cdotw++, $dotm++) { if ($dotm > $days) { $output .= " \n"; } $output .= " \n"; $output .= " \n" unless ($week == 5); $cdotw = 0; } # if the last row of the table is all empty cells, nuke it. 1 while ($output =~ s@\s*]*>\s*(]*>\s*\s*)+\s*$@\n@si); $output .= "
SunMonTueWedThuFriSat
"; } else { my $key = sprintf ("%04d-%02d-%02d", $year, $month, $dotm); my $event = $calendar{$key}; my $repeat_p = 0; # If there wasn't a static event, see if there was a repeating event. if (!defined($event)) { my ($dotw_number, $total_dotws) = dotw_count ($year, $month, $dotm); $event = lookup_repeating_entry ($year, $month, $dotm, $cdotw, $dotw_number, $total_dotws); $repeat_p = defined($event); } # Kludge: If this event just says "closed", then this is just there to # shadow a repeating event that would otherwise show up here. E.g., # when there's an "every thu" event but one specific day is dark. # # Likewise, omit it from the calendar grid if it is "CANCELLED". # if (!$repeat_p && defined($event)) { my @L = @$event; my $e0 = $L[0]; my $title = $$e0->{title}; if (defined ($title) && $title =~ m/\b(CANCELL?ED|POSTPONED|CLOSED)\b/i) { $event = undef; } } $output .= " {title} ); } else { foreach my $e0 (@$event) { push @titles, $$e0->{title}; } } $output .= (""; } else { $output .= ("
" . $dotm . "
"); } } $output .= "
\n"; return $output; } ############################################################################## # # Generating full calendar body text and html # ############################################################################## my $last_dotm_written; # kludge for filling in all the dotm anchors. # You can't have two tags pointing at the same text: the last # one wins and the others are ignored. So, this puts them on subsequent # characters... Kludge kludge... # sub splice_anchors($$$) { my ($str, $from, $to) = @_; my $head = ''; # Put the first over the digits of the date, and make it be # a link to itself, in class "dotm", so that it only displays as # a link when the mouse hovers over it. # # Links for the other days (preceeding days for which there is no # event) come one on each character for the rest of the line. # # The reason we do this at all is that the /calendar/latest.html # redirector always redirects to today's date, even if there isn't # and event on that date -- so arbitrary #NN anchors need to work. # if ($from <= $to) { ($head, $str) = ($str =~ m/^(\d+)(.*)$/s); $head = sprintf ("%s", $to, $to, $head); $to--; } my @chars = split (//, $str); my $i = $from; foreach (@chars) { if ($i <= $to) { # only die if we've gone far enough to hit it -- else, allow it. error ("can't anchorify: string contains HTML: $str") if ($_ eq '<' || $_ eq '>'); $_ = sprintf ("$_", $i); } $i++; } $str = $head . join ('', @chars); return $str; } # Constructs an EMBED tag from a YouTube URL. # sub make_embed_tag($$$$) { my ($url, $title, $w, $h) = @_; if ($url =~ m@^http://www\.youtube\.com/@) { $url =~ s@&.*$@@; # lose args $url =~ s@/(watch)?\?v=@/v/@; # "/watch?v=XXX" => "/v/XXX" } else { error ("VIDEO URL is not YouTube: $url"); } return ("
" . "" . "
\n"); } # Constructs HTML for linking to this event on social networking sites. # sub make_event_id_link($$) { my ($site, $id) = @_; my ($title, $url); $site = lc($site); if ($site eq 'going') { $title = "Going"; $url = "http://sanfrancisco.going.com/$id"; } elsif ($site eq 'upcoming') { $title = "Upcoming"; $url = "http://upcoming.yahoo.com/event/$id/"; } elsif ($site eq 'lastfm') { $title = "Last.FM"; $url = "http://www.last.fm/event/$id"; } elsif ($site eq 'sonicliving') { $title = "SonicLiving"; $url = "http://sonicliving.com/event/$id/"; } elsif ($site eq 'facebook') { $title = "Facebook"; $url = "http://www.facebook.com/event.php?eid=$id"; } elsif ($site eq 'myspace') { $title = "MySpace"; $url = ("http://events.myspace.com/index.cfm" . "?fuseaction=events.detail" . "&eventID=$id"); } elsif ($site eq 'sfstation') { # Don't include this one in the calendar. #$title = "SFStation"; #$url = "http://www.sfstation.com/$id"; } else { error ("unknown event site: $site ($id)"); } return ($url ? "$title" : undef); } sub make_event_id_links($) { my ($event) = @_; my @elinks; foreach my $key (@{$$event->{event_ids}}) { my ($site, $id) = ($key =~ m/^(.*?)=(.*)$/si); error ("unparsable event_id: $key") unless ($id); my $e = make_event_id_link ($site, $id); push @elinks, $e if ($e); } return '' if ($#elinks < 0); my $html = "


Are you going? Let your friends know on "; for (my $i = 0; $i <= $#elinks; $i++) { $html .= ($i > 0 && $i == $#elinks ? " and " : $i > 0 && $i <= $#elinks ? ", " : ""); $html .= $elinks[$i]; } $html .= '.'; return $html; } # Returns the HTML for a single calendar event. # sub build_event_html($$$$) { my ($event, $splice_anchors_p, $future_p, $tickets_form_p) = @_; my $html = $$event->{html_src}; my $date = $$event->{date}; my $times = $$event->{times}; my $holiday = $$event->{holiday}; my $repeat = $$event->{repeat}; my $flyer = $$event->{flyer}; my $photo = $$event->{photos}; my $ticket = $$event->{ticket}; my $vipticket = $$event->{vipticket}; my $onsale = $$event->{onsale}; my $embed = $$event->{video}; my $embedname = $$event->{videoname}; if ($tickets_form_p == 2) { # this means "allow advance ticket sales" $tickets_form_p = 0; $onsale = undef; } my $vspacing = 24; my $repeat_span = ($repeat ? " ROWSPAN=2" : ""); my ($year, $month, $dotm, $dotw, $start_minute, $end_minute) = @$times; my $event_output = ""; error ("$date: <$1> tag in body") if ($html =~ m@^\s*.*\n\n.* block. my $embed_right_p = ($html =~ m@.*\s*)?\s*(.*?)\s*\s*(

\s*)?@@si) { $blurb = $2; } $html = rewrite_dj_tags ($html, ($future_p && ($ticket || $vipticket)), $onsale, $$event->{date}, $tickets_form_p); $html = clean_html ($html); if (defined ($holiday)) { $holiday =~ s/^(.*?):\s+(.*?)$/$1\n($2)/si; $date .= "\n\n$holiday"; } # break the line before "afternoon". $date =~ s/ (Morning|Afternoon|Night)\b/\n$1/i; my ($title_col, $body_col) = ($html =~ m/^(.*?)\n\n+(.*?)\s*$/s); $title_col = $html unless ($title_col); $body_col = '' unless ($body_col); if ($embed) { my $w = 200; my $h = 172; my $tag = make_embed_tag ($embed, $embedname, $w, $h); $tag = "

$tag\n"; if ($embed_right_p) { $blurb .= $tag; } else { $body_col .= $tag; } } my $id_links = make_event_id_links ($event); if ($id_links) { # Put it just inside the last closing DIV. $title_col =~ s@^(.*)(.*?)$@$1$id_links$2@si; } $title_col =~ s/^\s*(<(P|BR)>\s*)*//gsi; $event_output .= (" \n" . " "); if ($splice_anchors_p == 1) { $date = splice_anchors ($date, $last_dotm_written+1, $dotm); $last_dotm_written = $dotm; } elsif ($splice_anchors_p == 2) { # When writing the $calendar_tickets_file, we put anchors on the # date that point back to the "real" calendar entry. # That's the "permalink". my $cal_url = sprintf ("../calendar/%04d/%02d.html#%02d", $year, $month, $dotm); my $anchor = sprintf ("%04d-%02d-%02d", $year, $month, $dotm); $date =~ s@^(\d\d?)\b @$1@six; } my $inline_flyer = ($flyer ? find_inline_flyer ($flyer) : undef); $date =~ s/\n/
/g; $event_output .= "$date

\n"; if ($photo) { $photo = "../../$photo"; $event_output .= " "; $event_output .= "[ photos ]
\n"; } if ($inline_flyer) { $inline_flyer =~ s/\s+[HV]SPACE=\"?\d+\"?//gsi; # $inline_flyer =~ s/(]*)>/$1 HSPACE=6 VSPACE=2>/gsi; $event_output .= "

$inline_flyer

"; } $event_output =~ s@\s+$ @@sx; # lose newline before /TD $event_output .= ("\n" . " \n" . " \n" . "$title_col" . "\n"); # This cell must be here even if $body_col is empty, or else the # page stretches stupidly on mostly-empty months. # $event_output .= (" \n" . " \n" . $body_col . "\n"); if ($blurb) { $event_output .= (" \n" . " \n" . $blurb . "\n"); } $event_output .= " \n"; if ($repeat) { if ($repeat =~ m/^([^\s]+)\s(.*\s.*\s.*)\s([^\s]+)$/) { # if there are 5+ words, break line after 1st word and before last word. $repeat = "$1
$2
$3"; } elsif ($repeat =~ m/^(.*\s.*)\s+([^\s]+)$/) { # if there are 3+ words, break line before last word. $repeat = "$1
$2"; } $event_output .= (" \n" . " \n" . " $repeat" . "\n" . " \n"); } $event_output .= (" \n". # extra vertical space " \n" . " \n"); $event_output = clean_html ($event_output); # one last time return $event_output; } # Returns the plain text for a single calendar event. # sub build_event_text($$$$$$$) { my ($event, $date_p, $future_p, $infoline_p, $long_lines_p, $day_prefix, $day_suffix) = @_; my $name = $$event->{title}; my $html = $$event->{html_src}; my $date = $$event->{date}; my $holiday = $$event->{holiday}; my $flyer = $$event->{flyer}; my $photo = $$event->{photos}; my $times = $$event->{times}; my $ticket = $$event->{ticket}; my $vipticket = $$event->{vipticket}; my $onsale = $$event->{onsale}; my $embed = $$event->{video}; my ($year, $month, $dotm, $dotw, $start_minute, $end_minute) = @$times; my $output = ""; my $do_urls_p = !$infoline_p; my $m = $months[$month-1]; $m =~ s/^(...).*/$1/ unless ($infoline_p); my $blurb = undef; if ($html =~ s@\s*\s*(.*?)\s*\s*(

\s*)?@@si) { $blurb = $1; } $blurb = undef if ($infoline_p); $html =~ s@(]*>)(.*?)() @{ $1 . upcase_html($2) . $3 }@xeigs; $html =~ s@<(STRIKE)>.*?@@gsi; # omit this! $html = reformat_infoline ($html) if ($infoline_p); my ($head, $tail) = ($html =~ m/^(.*?)\n\n+(.*?)\s*$/s); $head = $html unless ($head); $tail = '' unless ($tail); my $reformat_p = ($tail =~ m/^\s*(<[^>]*>\s*)*Performing live:/si); $_ = $date; # the dotw desc (e.g., "Sunday Night") is my ($d) = m/^([^,]+),/; # the first set of words before the comma. $d = "$day_prefix $d" if (defined ($day_prefix)); $d = "$d -- $day_suffix" if (defined ($day_suffix)); if (defined($holiday) && !$infoline_p) { $holiday =~ s@
@ @gi; $holiday =~ s@<[^<>]+>@@g; # strip any tags in holiday text $d .= " -- $holiday"; } # slightly rewrite if there's a day suffix and a holiday. $d =~ s/^(.*) -- (.*) -- (.*)$/$1 -- $2, $3/; if ($infoline_p) { $output .= sprintf("\n$d, $m $dotm.\n\n"); } elsif ($date_p) { $output .= "\n" . ('-' x 72) . "\n\n"; # $fill_column ? $output .= sprintf("%02d $m $year ($d)\n\n", $dotm); } my @event_urls = ($infoline_p ? () : find_event_urls ($head, 0)); # throw away everything but the STATS section if reformatting. $head =~ s@^.*(.*?).*$@$1@gsi if ($reformat_p); $head = de_htmlify ($head, 0); $tail = de_htmlify ($tail, $do_urls_p); $blurb = de_htmlify ($blurb, 0) if ($blurb); if (! $infoline_p) { $tail = indentify ($tail); # Blah. $tail =~ s/([ \t]--)[ \t]*\n[ \t]+/$1 /gs; $head =~ s/\n\n+/\n/sg; # delete blank lines # but put one back after capwords (including cap acronyms and posessives.) $head =~ s/([[:upper:]][.\']?[[:upper:]] [ \t]*[[:upper:]\d]* [ \t]*[.,+&]?:?)$/$1\n/xmg; # Total kludge for a night with a single character name... $head =~ s/^(Q)$/$1\n/mg; } # put some URLs between the head and the tail. # the flyer URL always come first. if (! $infoline_p) { my $flyer = find_flyer ($$event, $year, $month, $dotm); if ($flyer) { $flyer = "${url_base}$flyer"; unshift @event_urls, $flyer; } push @event_urls, $embed if ($embed); } if ($reformat_p) { #$head =~ s/^/ /gm; $tail .= "\n\n$head"; $head = ''; } if ($#event_urls >= 0) { my $u = ''; # only include each URL once # (in case the event URL is the same as one of the band URLs.) foreach (@event_urls) { my $re = qr/$_/; $u .= "$_\n" unless ($tail =~ m/$re/i); } if ($reformat_p) { $tail = "$tail\n\n$u"; } else { $tail = "$u\n\n$tail"; } } # #### Note: this only handles the first ticket (of each type) on sale # for the event. # if ($ticket && $future_p) { if ($infoline_p) { $tail .= "\n\nAdvance tickets available at dnalounge.com.\n"; } elsif ($onsale && !ticket_on_sale_p ($onsale)) { my ($dd, $mm, $yyyy) = ($onsale =~ m/(\d\d?)-([a-z]{3})-(\d\d\d\d)/si); $mm =~ s/^(.)/{uc($1)}/xe; $tail .= "\n\nTickets on sale $mm $dd.\n"; } else { $tail .= "\n\nTickets:"; $tail .= ((length ($ticket) > 59) ? "\n" : " "); $tail .= "$ticket\n"; } } if ($vipticket && $future_p && !$infoline_p) { if (!$onsale || ticket_on_sale_p ($onsale)) { $tail .= "\n\nVIP Service:"; $tail .= ((length ($vipticket) > 59) ? "\n" : " "); $tail .= "$vipticket\n"; } } $html = "$head\n\n$tail"; if ($infoline_p) { # capitalize upper-cased words of 3 letters or more. $html =~ s/([[:upper:]])([[:upper:]][[:upper:]]+(\'S)?)\b/$1\L$2/gs; $html =~ s/Dna/DNA/gs; } # put linebreaks after URLs not at end of line # $html =~ s@(https?:[^\s]+)([ \t]+)(.+)$@$1\n$3@gm; $html =~ s/^(https?)/ $1/mg; # Indented URLs right before non-indented "Tickets:" lines look funny. # 1 while ($html =~ s/^ +(http[^\s]+\n)\n*(http:|Tickets:)/$1$2/m); # Re-wrap the blurb text, and tack it on the end. # if ($blurb) { $Text::Wrap::columns = ($long_lines_p ? 1024 : 65); $Text::Wrap::huge = 'wrap'; my @paras = split(/\n *\n+/, $blurb); foreach my $p (@paras) { $p = wrap ('', '', $p); } $blurb = join ("\n\n", @paras); $html .= "\n\n$blurb"; } $html =~ s/^/ /mg; $html =~ s/\s+$//s; $html =~ s/[ \t]+$//gm; $html =~ s/(\n\n)\n+/$1/gs; # compress multiple blank lines $html =~ s/^\n+//s; $html =~ s/\n+$//s; $output .= $html; $output .= "\n"; my @lines = split (/\n/, $output); if (! $long_lines_p) { foreach my $line (@lines) { # if line is longer than 72 characters, strip spaces after/before "--" 1 while ($line =~ m/^.{$fill_column}/s && $line =~ s/(-- ) /$1/s); 1 while ($line =~ m/^.{$fill_column}/s && $line =~ s/ ( --)/$1/s); # if line is still longer than 72 characters, wrap it. if ($line =~ m/^ (.{$fill_column})/s) { my ($pre) = ($line =~ m/^(\s+)/s); $Text::Wrap::columns = $fill_column; $Text::Wrap::huge = 'overflow'; $line = wrap ('', $pre, $line); } } } return join ("\n", @lines) . "\n"; } # Returns the crontab text for a single calendar event. # sub build_event_crontab($) { my ($event) = @_; my $name = $$event->{title}; my $webcast = $$event->{webcast}; my $desc = ($webcast eq 'lounge' ? $$event->{lounge} : $$event->{main_room}) || ''; my $times = $$event->{times}; my ($year, $month, $dotm, $dotw, $start_minute, $end_minute) = @$times; my $days_in_month = days_per_month ($month, $year); my $flyer = find_flyer ($$event, $year, $month, $dotm); return '' if ($webcast eq 'off'); $name =~ s/\s+/ /gs; $name =~ s/([[:lower:]])/\U$1/g; $desc =~ s/(^| +)-- .*$//gm; # delete stuff after "--" on a line $desc =~ s/\s*\(.*$//gm; # delete parentheticals # lose that "With live vocal performances by" shit $desc =~ s/^with\b.*\bby:?\n//gmi; $desc = "$name\n$desc"; $desc =~ s/^\n+//gs; $desc =~ s/\n+$//gs; $desc =~ s/\n\n+/\n/gs; # if first and second line are the same, collapse them. $desc =~ s/^([^\n]+)(\n\*?\1)/$1/si; # Wrap lines. # my @lines = split (/\n/, $desc); foreach my $line (@lines) { my $uscale = 0.5; # upper case letters take up 1.5x as much space. my $max_col = 21; my $L = length ($line); my $u = $line; $u =~ s/[^[:upper:]]//gs; $L += length($u) * $uscale; if ($L > $max_col) { my $star = (($line =~ m/^\*/s) ? '*' : ''); my @words = split (/\s+/, $line); $line = ''; my $col = 0; foreach my $word (@words) { $L = length ($word); $u = $word; $u =~ s/[^[:upper:]]//gs; $L += length($u) * $uscale; $col += $L; $col++ if ($line); if ($col > $max_col) { $col = $L; $line .= "\n$star$word"; } else { $line .= " " if ($line); $line .= $word; } } $line =~ s/^\*\*+/\*/s; # oops, long words end up multiply starred } } $desc = join ("\n", @lines); $desc =~ s/[\\\"]//g; # no double-quotes or backslashes please... if (defined ($flyer)) { $flyer =~ s@^.*/@@; $desc = "$flyer\n$desc"; } $desc = entitify ($desc); # don't let the description be too long. I'm not sure what the actual # limit is, but cron and/or sh start barfing at some point... # $desc =~ s/^(.{140}).*$/$1 .../s; # quote newlines $desc =~ s/\n/\\n/g; my $length = $end_minute - $start_minute; my $shour = int($start_minute / 60); my $smin = int($start_minute % 60); $length = sprintf("%d:%02d", int($length / 60), int($length % 60)); my $ct = sprintf ("%2d %2d %2d %2d * ", $smin, $shour, $dotm, $month); $ct .= "LENGTH=$length DESC=\"$desc\"\n"; # mktime: dotm: 1-31; month: 0-11; year: -1900; dotw: 0-6. # cron: dotm: 1-31; month: 1-12; year: n/a. dotw: 0-6; my $stime = mktime (0, $smin, $shour, $dotm, $month-1, $year-1900, 0, 0, -1); my $etime = $stime + (($end_minute - $start_minute) * 60); my ($esec, $emin, $ehour, $edotm, $emon, $eyear) = localtime ($etime); my $ct2 = sprintf ("%2d %2d %2d %2d * ", $emin, $ehour, $edotm, $emon+1); $ct2 .= "END\n"; $ct .= $ct2; if ($verbose > 2) { my $d = "$dotm-$months[$month-1]-$year"; my $s = sprintf("%d:%02d %s", ($shour == 0 ? 12 : $shour > 12 ? $shour - 12 : $shour), $smin, ($shour < 12 ? "AM" : "PM")); $_ = $length; my ($lh, $lm) = m/^(\d\d?):(\d\d)$/; my $ee = ($shour * 60) + $smin + ($lh * 60) + $lm; my $ehour = int ($ee / 60) % 24; my $emin = int ($ee % 60); my $e = sprintf("%d:%02d %s", ($ehour == 0 ? 12 : $ehour > 12 ? $ehour - 12 : $ehour), $emin, ($ehour < 12 ? "AM" : "PM")); $_ = $ct; s/^/$progname: crontab: /gm; s/\n$/ ($d: $s - $e)\n/s; print STDERR "$_"; } return $ct; } # first TR in calendar pages sets the spacing and minimum widths for columns. # my $calendar_listing_column_top = (" \n" . " \n" . # dbox " \n" . # CSS "min-width: 60em" does not work in MSIE 6, and # does not work on TD in Safari 2 (works on DIV.) # This has the same effect, pretty much. "
\n" . # tbox " \n" . "
\n" . # ibox " \n" . "
\n" . # bbox " \n"); # Constructs files describing the given month. Returns: # - the body of an HTML document; # - the body of a text document. # If $summarize_only_p, then only generates the @future_events list. # sub build_month_bodies($$$$$) { my ($year, $month, $prev_p, $next_p, $summarize_only_p) = @_; my $html = ""; my $text = ""; my ($csec, $cmin, $chour, $cdotm, $cmon, $cyear) = localtime; $cmon++; $cyear += 1900; my $now_i = ($cyear * 100000000 + $cmon * 1000000 + $cdotm * 10000 + $chour * 100 + $cmin); my $mon_i = ($year * 100000000 + $month * 1000000 + 32 * 10000); my $past_month_p = ($mon_i < $now_i); ###################### # Generate html header ###################### $html .= "$page_title: $months[$month-1] $year\n"; my ($header, $prev, $next, $prev_name, $next_name) = build_cal_header ($year, $month, $prev_p, $next_p); my $grid = make_calendar_grid_html ($year, $month, !$past_month_p); my $links = ''; $links .= " \n"; $links .= " \n"; $links .= " \n" if ($prev); $links .= " \n" if ($next); $links .= $xml_link_tag; $html .= $links; $html .= ("\n" . $css_html . "\n" . "\n" . "$header\n" . "\n" . "\n" . "

\n" . $grid . $calendar_top_blurb . "
\n" . "\n" . "\n" . "\n" . $calendar_listing_column_top ); # update the "d30" IDs in the CSS, in the same manner as # calendar/update-calboxes.pl... # { my ($csec, $cmin, $chour, $cdotm, $cmon, $cyear) = localtime; $cmon++; $cyear += 1900; my $dd = (($cmon == $month && $cyear == $year) ? $cdotm : 0); $html =~ s/^( *#d)\d\d?\b/$1$dd/gm; } ############################ # Generate plain text header ############################ my $hr = center_line('-' x 24) . "\n"; $text .= "$hr\n"; $text .= center_line($page_title) . "\n"; $text .= center_line($months[$month-1] . " $year") . "\n"; $text .= "\n$hr\n"; $text .= center_line(sprintf ("${url_base}calendar/$year/%02d.html", $month)) . "\n"; ################################ # Loop over body events for both ################################ my $days = days_per_month ($month, $year); $last_dotm_written = 0; my @future_events_1 = (); # the event just before the "today" cutoff time my $month_yesterday_event = undef; for (my $dotm = 1; $dotm <= $days; $dotm++) { my $key = sprintf ("%04d-%02d-%02d", $year, $month, $dotm); my $listref = $calendar{$key}; my @list = (); my $dotw = dotw ($dotm, $month, $year); if (defined ($listref)) { @list = @$listref; # Kludge: If this event just says "closed", then this is just there to # shadow a repeating event that would otherwise show up here. E.g., # when there's an "every thu" event but one specific day is dark. # my $e0 = $list[0]; my $title = $$e0->{title}; if (defined ($title) && $title =~ m/^\(?CLOSED\)?$/si) { next; } } else { my ($dotw_number, $total_dotws) = dotw_count ($year, $month, $dotm); my $repeater = lookup_repeating_entry ($year, $month, $dotm, $dotw, $dotw_number, $total_dotws); @list = (\$repeater) if (defined ($repeater)); } next if ($#list < 0); foreach my $event (@list) { my $times = $$event->{times}; my ($igy, $igm, $igdm, $igdw, $start_minute, $end_minute) = @$times; my $hour = int ($start_minute / 60); my $min = int ($start_minute % 60); # Events go on @future_events if the event starts today or later. # That controls what goes on the front page. # # But if the event has already started, or if it starts in less # than an hour, we blow away its "tickets" link so that the "buy" # links don't show up in the calendar or on the front page. # my $ticket_i = ($year * 100000000 + $month * 1000000 + $dotm * 10000 + ($hour-1) * 100 + $min); my $event_i = $year * 100000000 + $month * 1000000 + ($dotm+1) * 10000; my $future_p = ($event_i >= $now_i); my $tickets_expired_p = ($now_i >= $ticket_i); if ($tickets_expired_p) { $$event->{ticket} = undef; $$event->{vipticket} = undef; } push @future_