#!/usr/bin/perl -w # Copyright © 2000-2008 Jamie Zawinski . # # Permission to use, copy, modify, distribute, and sell this software and its # documentation for any purpose is hereby granted without fee, provided that # the above copyright notice appear in all copies and that both that # copyright notice and this permission notice appear in supporting # documentation. No representations are made about the suitability of this # software for any purpose. It is provided "as is" without express or # implied warranty. # # Generates DNA Lounge calendar pages. # depends on "utils/menuify.pl", "cmp". # Created: 29-Nov-2000. ############################################################################ # # Syntax of the calendar.txt file: # # DOTM-MONTH-YEAR # DESCRIPTION # # Every DOTW from START-DATE to END-DATE # DESCRIPTION # # Every NTH DOTW from START-DATE to END-DATE # DESCRIPTION # e.g., "every first,third friday" of each month. # "first", "second", and "third" mean the obvious thing. # "last" is always the last one in the month (4th or 5th.) # "fifth" is the second-to-last one, in months that have # 5 of them (it is the "extra friday".) # "fourth" is never used. # # Holiday DOTM-MONTH[-YEAR] DESCRIPTION # # Holidays whose text begins with an asterisk are assumed to be federal # holidays, meaning if it falls on a sunday or monday, we note Sunday # events as being on a 3-day weekend. # # If there is an entry for a specific date, it overrides any "Every" # entries that might also apply. If multiple "every" entries apply # to a given date, the later one in the file wins. # # Descriptions are HTML, and can be multiple lines. # Each line of a description must be indented at least one space. # # The left column (title/genre/times/price) and right column (performers) # are separated by a blank line. # # The HTML text within a column does not wrap: use
and

liberally. # # There are various custom HTML-like tags that are used in the HTML that # are converted to "real" HTML when the actual files are generated, like # to indicate DJ names. # # Holiday descriptions show up over in the "Date" box. (Really this ought # to be done more automatically, but I don't know of a command-line program # that will compute those dates for me.) # # # More precisely: # # file := [ entry ]* # entry := static_event | recurring_event | holiday # # static_event := day_of_month_line event_data # day_of_month_line := dotm month year '(' dotw ')' '\n' event_data # event_data := [ hspace text '\n' ]* # # recurring_event := recurrence_line [ recurring_event_data ]* range # recurrence_line := 'every' [ repeat_words ]? day_words '\n' # day_words := day_word [ ',' day_word ]* # repeat_words := nth_of_month_words | alternating_word # nth_of_month_words := nth_of_month_word [ ',' nth_of_month_word ]* # nth_of_month_word := 'first' | 'second' | 'third' | 'fifth' | 'last' # alternating_word := 'other' # (note that there is no way to express # "every 3 weeks"; one can only express # "every 3rd week of the month".) # range := 'from' range_date [ 'to' range_date ]? # range_date := dotm '-' month '-' year # recurring_event_data := hspace text '\n' # # holiday := 'holiday' holiday_date text # holiday_date := dotm '-' month [ '-' year ] # # month := 'january' | 'february' | ... | 'december' | # 'jan' | 'feb' | ... | 'dec' # dotm := '1' | '2' | ... | '31' # year := '2000' | '2001' | ... # hspace := [ ' ' | '\t' ]+ # # # Examples: # # 2 January 2001 (tue) # blah blah blah # 18 January 2001 (thu) # All work and no play makes Jack a dull boy. # All work and no play makes Jack a dull boy. # # 27 feb 2001 (tue) # something else # # Every tuesday from 3-Jan-2001 # blah blah blah # # Every other friday from 3-Jan-2001 -- NOT YET IMPLEMENTED # blah blah blah # # Every first,third wednesday from 3-Jan-2001 # blah blah blah # blah blah blah # # Every fifth saturday from 3-Jan-2001 to 3-March-2001 # blah blah blah # # Holiday 02-Feb Groundhog Day # # Note that, in a month that has 5 saturdays, they are numbered # "first", "second", "third", "fifth", "last", because the "extra" # saturday goes before the "last" saturday. I think this is a pretty # stupid way to go about it, but I'm told that's how it's done. # # Specifying the day of the week on the line for static entries isn't # strictly necessary, but it functions as a checksum (we can error if # the dotm and dotw don't match.) # ############################################################################ require 5; use diagnostics; use strict; use bytes; use Text::Wrap; # Utter foulness! Without this, [:upper:] doesn't work on Latin1 characters. use locale; use POSIX qw(locale_h mktime strftime); setlocale(LC_ALL, "en_US"); ############################################################################## # # Configuration and stuff # ############################################################################## my $progname = $0; $progname =~ s@.*/@@g; my $version = q{ $Revision: 1.476 $ }; $version =~ s/^[^0-9]+([0-9.]+).*$/$1/; my $exec_dir = $0; $exec_dir =~ s@/[^/]*$@@; my $exec_dir2 = "$exec_dir/../utils/"; my $data_dir = $exec_dir; my $verbose = 0; my $debug_p = 0; # this means "don't alter any files, print diffs instead" my $calendar_data_file = "$data_dir/calendar.txt"; my $calendar_upcoming_file = "upcoming.html"; my $calendar_overview_file = "overview.html"; my $calendar_rss_file = "dnalounge.rss"; my $calendar_ical_file = "dnalounge.ics"; my $calendar_crontab_file = "crontab.txt"; my $calendar_weekly_file = "weekly.txt"; my $calendar_weekly_prolog = "prolog.txt"; my $calendar_topten_file = "topten.txt"; my $calendar_infoline_file = "infoline.txt"; my $calendar_ledger_file = "ledger.html"; my $calendar_names_file = "names.txt"; my $calendar_eighties_file = "1985-1999.txt"; my $calendar_tickets_file = "../tickets/index.html"; my $store_file = "../store/index.html"; my $template_file = "$exec_dir2/template.html"; my @menuify_cmd = ("$exec_dir2/menuify.pl", $template_file); my $calendar_date_block_bg = "#004400"; # for the box with the date in it my $calendar_grid_dim_fg = "#666"; # de-emphasized dotms in cal overview my $fill_column = 70; # no plain-text lines longer than this # For the calendar html pages. my $page_title = "DNA Lounge Calendar"; my $url_base = "http://www.dnalounge.com/"; my $ticket_form_url = "https://cart.dnalounge.com/order/"; my $ticket_url_base = "$ticket_form_url?item="; my $vip_price = '$295'; my $age_text = '+ with ID.'; my $age_aa_text = 'all ages.'; my $css_html = "\n"; my $colorless_css_html = # Style sheet used in email and RSS. "\n"; my $xml_link_tag = " \n"; my $calendar_top_blurb = ("Birthday party?\n" . "VIP table service is available!\n" . "

\n" . "Have our events show up in your\n" . "calendar or PDA with our iCal feed.\n" ); my $tickets_top_blurb = ("

\n" . "

\n" . "Advance tickets are available for the following events!\n" . "Tickets for all other events will be available at the door only.\n" . "Please see our " . "calendar for details of other events.\n" . "

\n" . "Having a birthday party?\n" . "VIP table service is available!\n" . "

\n" . "

\n" ); my $eighties_top_blurb = ("The list of bands who have performed at DNA Lounge since we re-opened\n" . "in 2001 is well documented on our\n" . "calendar. However, DNA Lounge has existed since\n" . "1985, and I've been making an effort to document the bands that played\n" . "here before I was running things. This is the list I've been able\n" . "to construct so far.\n" . "

If you have any updates or corrections, please let\n" . "me know!\n" ); my $rss_channel_title = "DNA Lounge"; my $rss_channel_url = $url_base; my $rss_channel_desc = "Upcoming events at the DNA Lounge nightclub " . "in San Francisco."; my $rss_channel_desc2 = ("Upcoming events at the DNA Lounge nightclub:\n" . "375 Eleventh Street, San Francisco.\n" . "All events are 21+ unless otherwise noted.\n" . "A valid photo ID required."); my $rss_channel_loc = "DNA Lounge: 375 Eleventh Street, San Francisco."; my $rss_latlong = "37.771007;-122.412694"; my $rss_channel_lang = "en"; my $rss_channel_tz = "US/Pacific"; my $rss_logo_url = "${url_base}logo-thumb.gif"; my $rss_logo_width = 100; my $rss_logo_height = 34; my $rss_max_links = 80; my $overview_max_links = 10; ############################################################################## # # Data structures and stuff # ############################################################################## my %calendar = (); # keys are "YYYY-MM-DD" # values are references to lists. # elements of lists are hashes with these keys: # date ("dotw, dd-mmm-yy") # time ("DD:DD - DD:DD" start/end time) # times (yyyy mm dd dotw # start_min end_min real_end_min) # event_ord (overall event number, from start) # day_ord (event_number of_n_events_today) # title (the short name, without "presents") # repeat ("every first monday") # holiday (if this is a holiday, which one) # vacationp (1 if tomorrow is a day off) # url (the event's home page) # flyer (flyer url for the event, if any) # genre (listref of genres of the event) # photos (photos url) # age (21, 18, AA) # ticket (url to buy a ticket) # vipticket (url to buy a VIP ticket) # event_ids (listref of external site links) # onsale ("dd-mm-yy" when tickets on sale) # main_room (summary of main room DJs) # lounge (summary of lounge DJs) # webcast (webcast state: main, lounge, off) # video (youtube url to be embedded) # videoname (caption of video) # html_src (raw calendar.txt source) my @repeaters = (); # elements are lists: # (dotw, repeat_code, repeat_desc_string, # from_year, from_month, from_dotm, # to_year, to_month, to_dotm, # lineno, event-hash) # dotw: 0-6 # repeat: 0, 1-5, -1, or -666. # the event-hash is as in %calendar my %shared_flyers = (); # keys and values are "YYYY-MM-DD" or "YYYY-MM-DDb". my @future_events; # list of event hashes that lie in the future my $yesterday_event; # the last event that didn't go on @future_events my @event_ords; # index of events by overall event number. my %genres; # count of event genres. my %inline_flyers; # maps date to tags for flyer thumbnails. my $total_event_count; # how many events are listed in the calendar my $total_live_count; # how many of those were live shows my $event_count_12mo; # how many events in the last 12 months my $live_count_12mo; # how many of those were live shows my $event_count_6mo; # how many events in the last 6 months my $live_count_6mo; # how many of those were live shows my $one_year_ago; my $six_months_ago; my $stats_cutoff; my $body_template = undef; ############################################################################## # # Date parsing and manipulation utilities # ############################################################################## my @months = ("January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"); my @days = ("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"); my @days_per_month = (31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31); # matches various forms of month names (in lower case.) # my $month_re = ('\b(?:jan(?:uary)?|feb(?:ruary)?|mar(?:ch)?|apr(?:il)?|' . 'may|june?|july?|aug(?:ust)?|sep(?:tember)?|sept|' . 'oct(?:ober)?|nov(?:ember)?|dec(?:ember)?)\b'); # matches various forms of day-of-the-week names (in lower case.) # my $days_re = ('\b(?:sun(?:day)?|mon(?:day)?|tue(?:sday)?|tues|' . 'wed(?:nesday)?|thu(?:rsday)?|thurs?|fri(?:day)?|' . 'sat(?:urday)?)\b'); # maps lower cased month names to numbers (1-12). # my %monthvals = ( 'jan' => 1, 'january' => 1, 'february' => 2, 'feb' => 2, 'march' => 3, 'mar' => 3, 'april' => 4, 'apr' => 4, 'may' => 5, 'jun' => 6, 'june' => 6, 'jul' => 7, 'july' => 7, 'august' => 8, 'aug' => 8, 'sep' => 9, 'sept' => 9, 'september' => 9, 'oct' => 10, 'october' => 10, 'nov' => 11, 'november' => 11, 'dec' => 12, 'december' => 12 ); # maps lower cased day names to numbers (0-6, sun-sat). # my %dayvals = ( 'sun' => 0, 'sunday' => 0, 'mon' => 1, 'monday' => 1, 'tue' => 2, 'tues' => 2, 'tuesday' => 2, 'wed' => 3, 'wednesday' => 3, 'thu' => 4, 'thur' => 4, 'thurs' => 4, 'thursday' => 4, 'fri' => 5, 'friday' => 5, 'sat' => 6, 'saturday' => 6 ); # Given a date, returns the day of the week on which it falls (0-6, sun-sat) # sub dotw($$$) { my ($dotm, $month, $year) = @_; # From the sci.math FAQ: # # The following formula, which is for the Gregorian calendar only, may be # more convenient for computer programming. Note that in some programming # languages the remainder operation can yield a negative result if given # a negative operand, so "mod 7" may not translate to a simple remainder. # # W == (k + [2.6m - 0.2] - 2C + Y + [Y/4] + [C/4]) mod 7 # where [] denotes the integer floor function (round down), # k is day (1 to 31) # m is month (1 = March, ..., 10 = December, 11 = Jan, 12 = Feb) # Treat Jan & Feb as months of the preceding year # C is century (1987 has C = 19) # Y is year (1987 has Y = 87 except Y = 86 for Jan & Feb) # W is week day (0 = Sunday, ..., 6 = Saturday) # # Here the century & 400 year corrections are built into the formula. # The [2.6m-0.2] term relates to the repetitive pattern that the 30-day # months show when March is taken as the first month. my $k = $dotm; my $m = 1 + (($month + 9) % 12); # (1-12) => (11, 12, 1-10) my $y = $year - ($m >= 11 ? 1 : 0); # subtract a year for jan/feb my $C = int ($y/100); my $Y = int ($y%100); my $W = (($k + int ((2.6 * $m) - 0.2) - (2 * $C) + $Y + int ($Y / 4) + int ($C / 4) + 7) % 7); return $W; } # Returns info describing a day's position in the month. # For the given date, tells you that it is, e.g., the 2nd wednesday; # and that this month contains 5 wednesdays. # sub dotw_count($$$) { my ($year, $month, $dotm) = @_; my $days = days_per_month ($month, $year); my $dotw = dotw ($dotm, $month, $year); my $index = undef; my $total = 0; my $idotw = dotw (1, $month, $year); for (my $i = 1; $i <= $days; $i++) { if ($idotw == $dotw) { $total++; $index = $total if ($i == $dotm); } $idotw = ($idotw + 1) % 7; } error ("$dotm does not occur in $month-$year?") unless ($index); error ("only $total $days[$dotw]s in $month-$year?") if ($total < 4); return ($index, $total); } # Whether the given 4-digit year is a leap year. # sub leap_year_p($) { my ($year) = @_; return ((($year % 4 == 0) && # divisible by 4 (($year % 100) || # ...unless divisble by 100 ($year % 400 == 0))) # ...and not divisible by 400 ? 1 : 0); } # Returns the number of days in the given month, considering leap years. # sub days_per_month($$) { my ($month, $year) = @_; my $days = $days_per_month[$month-1]; $days++ if ($month == 2 && leap_year_p($year)); # feb leapday return $days; } # Converts a day number to "Saturday", "Saturday Afternoon", etc. # $event_count is how many events there are on this day; # $event_number is which of those events this is. # The "evening" suffix is only used if there are multiple events, # but the "morning" and "afternoon" suffixes are used even if # there is only one event on this day. # sub describe_day($$$$) { my ($dotw, $start_minute, $event_count, $event_number) = @_; my $afternoon_threshold = (12 * 60); # noon my $evening_threshold = (17 * 60); # 5 PM my $day = $days[$dotw]; if ($event_count > 1 || $start_minute < $evening_threshold) { my $suffix = undef; if ($start_minute < $afternoon_threshold) { $suffix = "Morning"; } elsif ($start_minute < $evening_threshold) { $suffix = "Afternoon"; } elsif ($event_number > 0) { # no "Night" suffix if it's the first event (so we don't get two) $suffix = "Night"; } $day .= " $suffix" if (defined ($suffix)); } return $day; } ############################################################################## # # HTML utilities # ############################################################################## my %entity_table = ( "quot" => '"', "amp" => '&', "lt" => '<', "gt" => '>', "nbsp" => ' ', "iexcl" => '¡', "cent" => '¢', "pound" => '£', "curren" => '¤', "yen" => '¥', "brvbar" => '¦', "sect" => '§', "uml" => '¨', "copy" => '©', "ordf" => 'ª', "laquo" => '«', "not" => '¬', "shy" => '­', "reg" => '®', "macr" => '¯', "deg" => '°', "plusmn" => '±', "sup2" => '²', "sup3" => '³', "acute" => '´', "micro" => 'µ', "para" => '¶', "middot" => '·', "cedil" => '¸', "sup1" => '¹', "ordm" => 'º', "raquo" => '»', "frac14" => '¼', "frac12" => '½', "frac34" => '¾', "iquest" => '¿', "Agrave" => 'À', "Aacute" => 'Á', "Acirc" => 'Â', "Atilde" => 'Ã', "Auml" => 'Ä', "Aring" => 'Å', "AElig" => 'Æ', "Ccedil" => 'Ç', "Egrave" => 'È', "Eacute" => 'É', "Ecirc" => 'Ê', "Euml" => 'Ë', "Igrave" => 'Ì', "Iacute" => 'Í', "Icirc" => 'Î', "Iuml" => 'Ï', "ETH" => 'Ð', "Ntilde" => 'Ñ', "Ograve" => 'Ò', "Oacute" => 'Ó', "Ocirc" => 'Ô', "Otilde" => 'Õ', "Ouml" => 'Ö', "times" => '×', "Oslash" => 'Ø', "Ugrave" => 'Ù', "Uacute" => 'Ú', "Ucirc" => 'Û', "Uuml" => 'Ü', "Yacute" => 'Ý', "THORN" => 'Þ', "szlig" => 'ß', "agrave" => 'à', "aacute" => 'á', "acirc" => 'â', "atilde" => 'ã', "auml" => 'ä', "aring" => 'å', "aelig" => 'æ', "ccedil" => 'ç', "egrave" => 'è', "eacute" => 'é', "ecirc" => 'ê', "euml" => 'ë', "igrave" => 'ì', "iacute" => 'í', "icirc" => 'î', "iuml" => 'ï', "eth" => 'ð', "ntilde" => 'ñ', "ograve" => 'ò', "oacute" => 'ó', "ocirc" => 'ô', "otilde" => 'õ', "ouml" => 'ö', "divide" => '÷', "oslash" => 'ø', "ugrave" => 'ù', "uacute" => 'ú', "ucirc" => 'û', "uuml" => 'ü', "yacute" => 'ý', "thorn" => 'þ', "yuml" => 'ÿ', # HTML 4 entities that do not have 1:1 Latin1 mappings. "bull" => "*", "hellip"=> "...", "prime" => "'", "Prime" => "\"", "frasl" => "/", "trade" => "[tm]", "larr" => "<-", "rarr" => "->", "harr" => "<->", "lArr" => "<=", "rArr" => "=>", "hArr" => "<=>", "empty" => "Ø", "minus" => "-", "lowast"=> "*", "sim" => "~", "cong" => "=~", "asymp" => "~", "ne" => "!=", "equiv" => "==", "le" => "<=", "ge" => ">=", "lang" => "<", "rang" => ">", "loz" => "<>", "OElig" => "OE", "oelig" => "oe", "Yuml" => "Y", "circ" => "^", "tilde" => "~", "ensp" => " ", "emsp" => " ", "thinsp"=> " ", "ndash" => "-", "mdash" => "--", "lsquo" => "`", "rsquo" => "'", "sbquo" => "'", "ldquo" => "\"", "rdquo" => "\"", "bdquo" => "\"", "lsaquo"=> "<", "rsaquo"=> ">", ); my %entity_table2; # maps Latin1 back to Entities foreach (keys (%entity_table)) { $entity_table2{$entity_table{$_}} = $_; } my %asciify_table = ( '¡' => '!', '¢' => '[c]', '£' => '#', '¤' => '#', '¥' => 'Y', '¦' => '|', '§' => 'SS', '¨' => '_', '©' => '(c)', 'ª' => '?', '«' => '<<', '¬' => '=', '­' => '-', '®' => '(r)', '¯' => '_', '°' => '[?]','±' => '+/-', '²' => '[2]', '³' => '[3]', '´' => '\'', 'µ' => 'u', '¶' => 'PP', '·' => '.', '¸' => ',', '¹' => '[1]', 'º' => '[o]','»' => '>>', '¼' => ' 1/4 ', '½' => ' 1/2 ', '¾' => ' 3/4 ', '¿' => '?', 'À' => 'A', 'Á' => 'A', 'Â' => 'A', 'Ã' => 'A', 'Ä' => 'A', 'Å' => 'A', 'Æ' => 'AE', 'Ç' => 'C', 'È' => 'E', 'É' => 'E', 'Ê' => 'E', 'Ë' => 'E', 'Ì' => 'I', 'Í' => 'I', 'Î' => 'I', 'Ï' => 'I', 'Ð' => 'D', 'Ñ' => 'N', 'Ò' => 'O', 'Ó' => 'O', 'Ô' => 'O', 'Õ' => 'O', 'Ö' => 'O', '×' => 'x', 'Ø' => '0', 'Ù' => 'U', 'Ú' => 'U', 'Û' => 'U', 'Ü' => 'U', 'Ý' => 'Y', 'Þ' => 'p', 'ß' => 'S', 'à' => 'a', 'á' => 'a', 'â' => 'a', 'ã' => 'a', 'ä' => 'a', 'å' => 'a', 'æ' => 'ae', 'ç' => 'c', 'è' => 'e', 'é' => 'e', 'ê' => 'e', 'ë' => 'e', 'ì' => 'i', 'í' => 'i', 'î' => 'i', 'ï' => 'i', 'ð' => 'o', 'ñ' => 'n', 'ò' => 'o', 'ó' => 'o', 'ô' => 'o', 'õ' => 'o', 'ö' => 'o', '÷' => '/', 'ø' => 'o', 'ù' => 'u', 'ú' => 'u', 'û' => 'u', 'ü' => 'u', 'ý' => 'y', 'þ' => 'p', 'ÿ' => 'y' ); # Converts any Latin1 characters to HTML entities. # Also converts &, < and > to entities. # sub entitify($) { my ($s) = @_; $s =~ s/([&<>\"\177-\377])/ { my $ent = $entity_table2{$1}; error ("no entity for character: $1\n") unless (defined ($ent)); print STDERR "$progname: $1 -> $ent\n" if ($verbose > 3);; "&$ent;"; }/xegs; return $s; } # Convert any HTML entities to Latin1 characters. # sub de_entify($) { my ($text) = @_; $text =~ s/(&([[:alpha:]]+);?)/ { my $c = $entity_table{$2}; print STDERR "$progname: warning: unknown HTML character entity \"$1\"\n" unless $c; ($c ? $c : "[$2]"); } /gexi; return $text; } # Converts any Latin1 characters to the closest ASCII equivalent. # Note that you probably also need to convert & to & # sub asciify($) { my ($s) = @_; $s =~ s/([\177-\377])/ { my $asc = $asciify_table{$1}; error ("no ASCII for character: $1\n") unless (defined ($asc)); print STDERR "$progname: $1 -> $asc\n" if ($verbose > 3); $asc; }/xegs; return $s; } # Convert the given HTML to plain text. # If $do_urls is true, insert the URLs of BAND HREFs on the ends of # their lines. # sub de_htmlify($$) { my ($txt, $do_urls_p) = @_; my $tabstop = 20; $_ = $txt; s@ @ @g; # handle nbsp early # convert HREFs to text -- yyy --> "yyy -- xxx" # do this for but not for or . # Also, upcase the band names. # s@<(BAND)([^>]*)>(.*?) @{ my ($u, $b) = ($2, $3); if (! ($u =~ s/^.*\bHREF=\"([^\"]+)\".*$/$1/si)) { $u = undef; } # $u =~ s/\&/&/g if ($u); $b = de_entify($b); $b =~ s/(.*)/\U$1/g; (($do_urls_p && $u) ? "$b\002$u" : $b); }@xegsi; # inside

@gsi; # no blank line before this one s@

($1@gsi; s@]*>(18|21)\s*@$1$age_text

@gsi; s@]*>AA\s*@$age_aa_text

@gsi; error ("failed to clean AGE tag: \"$1\"") if (m@()@si); s@
@\001@gi; # save linebreaks s@]*>@\001\001@gi; # save paragraphs s@<[^>]+>@@g; # delete all tags s@\s+@ @g; # compress all whitespace s@\001@\n@g; # convert lines/paras back if (m/\002/) { # hack tabstops my @lines = split(/\n/, $_); # find the longest string in column 1 my $max = 0; foreach my $line (@lines) { # only one tab per line! 1 while ($line =~ s/^([^\002]*\002[^\002]*)\002(.*)$/$1 -- $2/s); if ($line =~ m/^(.*)\002/) { my $n = length($1); $max = $n if ($n > $max); } } # now indent the text in column 2 foreach my $line (@lines) { if ($line =~ m/^(.*)\002(.*)$/) { $line = $1 . (' ' x ($max - length($1))) . " -- " . $2; } } $_ = join ("\n", @lines); } s@(http:[^\s]+) *--.*$@$1@mg # kludge for "-- performing live!" BS... if ($do_urls_p); # If there's stuff after the URL on the line, break the line. s@(http:[^\s]+)([ \t]+[^ \t])@$1\n $2@mg; s@^[ \t]+@@mg; # lose whitespace at beginning of lines s@[ \t]+$@@mg; # lose whitespace at end of lines s@\n\n\n+@\n\n@g; # compress blank lines s@^\n+@@sg; # lose leading blank lines s@\n+$@@sg; # lose trailing blank lines $_ = de_entify($_); # translate character entities return $_; } # Upcase the *text* in the HTML string, being careful not to touch # tags and to do the right thing with character entities. # sub upcase_html($) { my ($str) = @_; $str =~ s/(<[^<>]*>)/\001$1\001/gsi; my @segs = split (/\001/, $str); foreach (@segs) { next if (m/^[ \t]*\n?//gs; # strip leading/trailing whitespace, compress blank lines and spaces. $html =~ s/^\s+//g; $html =~ s/\s+$//g; $html =~ s/^\n\n+/\n\n/gs; $html =~ s/^\n+//gs; $html =~ s/\n+$//gs; $html =~ s/\t/ /gs; $html =~ s/ +/ /gs; # strip paragraph/line breaks before and after blank lines 1 while ($html =~ s@(\n\n)@$2@igs); 1 while ($html =~ s@(\n\n)@$1@igs); # strip redundant BRs 1 while ($html =~ s@
\s*(\s*(]*>)\s*@$1@igs; return $html; } # The HTML validator generates an error if we use a tag that isn't # in this list (in order to detect typos.) # Mostly duplicated in utils/menuify.pl # my %allowed_tags = ( "A" => 1, "AFF" => 1, "B" => 1, "BR" => 2, "BAND" => 1, "DJ" => 1, "ETITLE" => 1, "FLYER" => 2, "GENRE" => 1, "I" => 1, "LIVE" => 2, "OTHER" => 1, "P" => 2, "PRES" => 1, "PRICE" => 1, "STATS" => 1, "STRIKE" => 1, "TABLE" => 1, "TD" => 1, "TICKET" => 2, "TIME" => 1, "TR" => 1, "U" => 1, "UL" => 1, "WEBCAST" => 2, "VIDEO" => 2, "BLURB" => 1, "DIV" => 1, "SPAN" => 1, "EVENT" => 2, "AGE" => 1, ); # Does some simple syntax-checking on the HTML: makes sure tags are # balanced, etc. # Mostly duplicated in utils/menuify.pl # We need our own copy here to give sane error messages about the tags: # we want to do validation *before* those are transformed to
, , etc. # sub validate_html($$$$$) { my ($html, $lineno, $year, $month, $dotm) = @_; # Note: line numbers in error messages aren't quite right, since any # lines commented out with "#" have been deleted before we get here... my $filename = (defined($year) ? "$dotm $months[$month-1] $year" : "no date"); error ("$filename: $lineno: no newline between and <$2>$3") if ($html =~ m@ \s* <(DJ|BAND|OTHER) \b [^<>]* > \s* ([^<>]*?) \s* <@six); my @stack = (); my $debug_html_p = 0; # lose text inside comments (but keep the newlines, for line numbering)... # $html =~ s@()@{ my ($a, $b, $c) = ($1, $2, $3); $b =~ s/^.*$//gm; "$a$b$c"; }@gse; $html =~ s/()//g; # lose comment tags themselves # Simpleminded check for stray ampersands and mis-typed entities. { my $ents = $html; $ents =~ s/&/\001&/gi; $ents =~ s/\bHREF=\"[^\"]+\"//gsi; # kludge my @ents = split(m/\001/, $ents); shift @ents; foreach (@ents) { if (! m/^&[a-z]+\d*;/si) { s/^([^\s]*\s+[^\s<>]*)\b.*$/$1/s; error ("$filename: $lineno: non-entity ampersand: \"$_\""); } } } $html =~ s/\n/\n\001/gs; $html =~ s/\s]+)/; next unless defined ($tag); $tag =~ tr/a-z/A-Z/; $tag = 'P' if ($tag eq '/P'); # kludge... my $code = $allowed_tags{$tag}; if ($tag =~ m@^/@) { # closing a tag my $otag = pop @stack; print STDERR "$lineno: POP " . ('. ' x ($#stack+1)) . "/$otag\n" if ($debug_html_p); if (!defined ($otag)) { error ("$filename: $lineno: extranious <$tag>"); } elsif ($tag ne "/$otag") { my $t2 = $tag; $t2 =~ s@^/@@; $code = $allowed_tags{$t2}; if ($code && $code == 2) { error ("$filename: $lineno: unexpected close-tag form: <$tag>"); } else { error ("$filename: $lineno: <$otag> closed by <$tag>"); } } } elsif (! $code) { error ("$filename: $lineno: unknown tag <$tag>"); } else { my $otag = $stack[$#stack] || ''; if (($tag eq 'TD' && $otag ne 'TR') || ($tag eq 'TH' && $otag ne 'TR') || ($tag eq 'TR' && $otag ne 'TABLE') || ($otag eq 'TR' && $tag ne 'TD' && $tag ne 'TH') || ($otag eq 'TABLE' && $tag ne 'TR') || ($otag eq 'OPTION' && $tag eq 'OPTION') || ($otag eq 'LI' && $tag eq 'LI') || ($tag eq 'ETITLE' && $otag ne '') || ($tag eq 'PRES' && $otag ne '') || ($tag eq 'STATS' && $otag ne '') || ($tag eq 'GENRE' && $otag ne 'STATS') || ($tag eq 'TIME' && $otag ne 'STATS') || ($tag eq 'PRICE' && $otag ne 'STATS') || ($tag eq 'AGE' && $otag ne 'STATS') || (($tag eq 'BR' || $tag eq 'P') && ($otag eq 'GENRE' || $otag eq 'PRES' || $otag eq 'ETITLE')) ) { error ("$filename: $lineno: <$tag> not allowed inside <$otag>"); } if ($code == 2) { # ignore these -- don't insist on closing tags. print STDERR "$lineno: SKIP " . ('. ' x ($#stack+1)) . "$tag\n" if ($debug_html_p); } else { # opening a new tag print STDERR "$lineno: PUSH " . ('. ' x ($#stack+1)) . "$tag\n" if ($debug_html_p); push @stack, $tag; } } } if ($#stack >= 0) { error ("$filename: $lineno: unclosed tags: <" . join (">, <", @stack) . ">"); } } # Return a string with leading whitespace added so as to center the given # text within 72 columns. # sub center_line($) { my ($line) = @_; my $cols = 72; # $fill_column ? $line = (' ' x int(($cols - length($line)) / 2)) . $line; return $line; } # Decides which lines in the text are "headings" and which are "bodies" # and adds indentation. This is for the YYYY/MM.txt files. # sub indentify($$) { my ($text, $long_lines_p) = @_; $text =~ s/[ \t]+$//gm; # lose trailing whitespace $text =~ s/\n\n\n+/\n\n/gs; # compress blank lines $text =~ s/^\n+//s; # lose leading blank lines $text =~ s/\n\n+$/\n/s; # lose trailing blank lines my @lines = split ("\n", $text); my $level = 0; my $para_head_p = 1; my $last_indented_p = 0; foreach (@lines) { s/^/' ' x (4 * $level)/e; # increase next line's indent if this line is at the head of a paragraph # and ends in a colon. my $indent_p = (m/:\s*$/ && $para_head_p); $level++ if ($indent_p); # decrease next line's indent if this line is blank, # unless we just indented on the previous non-blank line. $level-- if (m/^\s*$/ && $level > 0 && !$last_indented_p); # next line is the first line in a paragraph if this line is blank. $para_head_p = (m/^\s*$/); $last_indented_p = $indent_p; # Finally, wrap this line if it's long. if (! $long_lines_p) { $Text::Wrap::columns = 65; $_ = wrap ('', (' ' x (4 * $level)), $_) if (length($_) > $Text::Wrap::columns); } } return join ("\n", @lines); } ############################################################################## # # Parsing the calendar.txt file # ############################################################################## sub parse_calendar_entry($$@) { my ($lineno, $fast_p, @lines) = @_; $_ = $lines[0]; if (m/^(\d\d?)[-\s]+($month_re)[-\s]/io) { # "3-Jan-2001" parse_static_entry ($lineno, $fast_p, @lines); } elsif ( m/^EVERY\s+/io ) { # "Every Nth X" parse_repeating_entry ($lineno, $fast_p, @lines); } elsif ( m/^HOLIDAY\s+/io ) { # "Holiday 2-Feb" parse_holiday_entry ($lineno, @lines); } else { error ("$lineno: unparsable calendar entry heading: $_"); } } my $prev_year = 0; # for detecting misordered entries when parsing my $prev_month = 0; # (which usually means cut-and-paste went wrong.) my $prev_dotm = 0; sub parse_static_entry($$@) { my ($lineno, $fast_p, @lines) = @_; my $slineno = $lineno; $_ = shift @lines; if (! m/^(\d\d?) [-\s]+ ($month_re) [-\s]+ (\d{4}) \s* \(?($days_re)?\)? \s* (\#2)? \s* $/iox ) { error ("$lineno: unparsable DD-MMM-YYYY line: $_\n"); } my $dotm = 0 + $1; my $month = $2; my $year = 0 + $3; my $dotw = $4; my $second_event_same_day = defined($5); $month =~ tr/A-Z/a-z/; $month = $monthvals{$month}; $dotw =~ tr/A-Z/a-z/ if defined($dotw); $dotw = $dayvals{$dotw} if defined($dotw); error ("$lineno: $dotm-$months[$month-1]-$year appears more than once!\n") if (($prev_year == $year && $prev_month == $month && $prev_dotm == $dotm) && !$second_event_same_day); error ("$lineno: $dotm-$months[$month-1]-$year should be a second event " . "but isn't!\n") if ($second_event_same_day && ($prev_year != $year || $prev_month != $month || $prev_dotm != $dotm)); error ("$lineno: year $prev_year preceeds $year!\n") if ($prev_year > $year); error ("$lineno: $months[$prev_month-1] $prev_year preceeds " . "$months[$month-1] $year!\n") if ($prev_year == $year && $prev_month > $month); error ("$lineno: $prev_dotm-$months[$prev_month-1]-$prev_year preceeds " . "$dotm-$months[$month-1]-$year!\n") if ($prev_year == $year && $prev_month == $month && $prev_dotm > $dotm); my $real_dotw = dotw ($dotm, $month, $year); if (defined ($dotw)) { error ("$lineno: $dotm-$months[$month-1]-$year is a " . "$days[$real_dotw], not a $days[$dotw]") if ($real_dotw != $dotw); } else { $dotw = $real_dotw; my $m = $months[$month-1]; $m =~ s/^(...).*$/$1/; my $d = $days[$dotw]; $d =~ s/^(...).*$/$1/; print STDERR ("$progname: $lineno: $dotm-$m-$year: " . "dotw unspecified (should be $d)\n"); } $lineno++; $prev_year = $year; $prev_month = $month; $prev_dotm = $dotm; my $entry = ""; while ($#lines >= 0) { my $line = shift @lines; $line =~ s/^\s+//; $entry .= "\n$line"; } cache_inline_flyers ($year, $month); my $key = sprintf ("%04d-%02d-%02d", $year, $month, $dotm); my $entry_hash = parse_calendar_html ($dotm, $month, $year, $slineno, $entry, $fast_p, $second_event_same_day); my $listref = $calendar{$key}; my @list = (defined($listref) ? @$listref : ()); push @list, \$entry_hash; $listref = \@list; $calendar{$key} = $listref; if ($verbose > 2) { $_ = $entry; s/\\/\\\\/g; s/\"/\\\"/g; s/\n/\\n/g; s/^(.{20}).*$/$1.../; print STDERR "$progname: $slineno: stored $key = \"$_\"\n"; } } sub parse_repeating_entry($$@) { my ($lineno, $fast_p, @lines) = @_; my $desc = shift @lines; my $entry_html = ""; foreach (@lines) { s/^\s+//; $entry_html .= "$_\n"; } my $repeat_words = "(?:first|second|third|fourth|fifth|last|" . "1st|2nd|3rd|4th|5th)"; my $alter_words = "(?:other)"; if ($desc =~ m/^every\s+ ($alter_words)?\s* ($repeat_words(?:\s*,\s*$repeat_words)*)?\s* ((?:$days_re)(?:\s*,\s*(?:$days_re))*)\s* (?:from\s+(.*?))?\s* (?:to\s+(.*?))?\s* $/xio) { my $alts = $1 || ''; my $reps = $2 || ''; my $days = $3; my $from = $4; my $to = $5; my $from_year = -1; my $from_month = -1; my $from_day = -1; my $to_year = 9999; my $to_month = 99; my $to_day = 99; if ($alts ne '' && $reps ne '') { error ("$lineno: can't specify 'other' along with repetition: $desc\n"); } if ($from) { if ($from =~ m/^(\d\d?)\s*[-\s]\s*($month_re)\s*[-\s]\s*(\d\d\d\d)/io) { $from_day = 0 + $1; $_ = $2; tr/A-Z/a-z/; $from_month = $monthvals{$_}; $from_year = 0 + $3; } else { error ("$lineno: unparsable from date: \"$from\"" . " (should be DD-MMM-YYYY)\n"); } } if ($to) { if ($to =~ m/^(\d\d?)\s*[-\s]\s*($month_re)\s*[-\s]\s*(\d\d\d\d)/io) { $to_day = 0 + $1; $to_year = 0 + $3; $_ = $2; tr/A-Z/a-z/; $to_month = $monthvals{$_}; } else { error ("$lineno: unparsable to date: \"$to\"" . " (should be DD-MMM-YYYY)\n"); } } $from_year = 0 unless defined($from_year); $from_month = 0 unless defined($from_month); $from_day = 0 unless defined($from_day); $to_year = 9999 unless defined($to_year); $to_month = 13 unless defined($to_month); $to_day = 32 unless defined($to_day); my $rdesc = "Every"; my @values = (); if ($alts ne '') { # this can only be "every other DOTW". push @values, -666; $rdesc .= " Other"; } else { my @rdescs = (); foreach my $rep (split(/,/, $reps)) { $rep =~ s/^\s+//; $rep =~ s/\s+$//; if ($rep =~ m/^(first|1st)$/i) { push @values, 1; push @rdescs, "1st"; } elsif ($rep =~ m/^(second|2nd)$/i) { push @values, 2; push @rdescs, "2nd"; } elsif ($rep =~ m/^(third|3rd)$/i) { push @values, 3; push @rdescs, "3rd"; } elsif ($rep =~ m/^(fourth|4th)$/i) { # error ("$lineno: no such number as \"fourth\": " . # "did you mean \"last\" or \"fifth\"?"); push @values, 4; push @rdescs, "4th"; } elsif ($rep =~ m/^(fifth|5th)$/i) { push @values, 5; push @rdescs, "5th"; } elsif ($rep =~ m/^last$/i) { push @values, -1; push @rdescs, "Last"; } else { error ("$lineno: INTERNAL ERROR: unknown repeater: $rep\n"); } } $_ = join (", ", @rdescs); s/^(.*), ([^,]+)$/$1 and $2/; $rdesc .= " $_"; } push (@values, 0) if ($#values < 0); { my @rdescs = (); foreach (split(/,/, $days)) { s/^\s+//; s/\s+$//; tr/A-Z/a-z/; push @rdescs, $days[$dayvals{$_}]; } $_ = join (", ", @rdescs); s/^(.*), ([^,]+)$/$1 and $2/; $rdesc .= " $_"; } $rdesc =~ s/\s+/ /gs; foreach (split(/,/, $days)) { s/^\s+//; s/\s+$//; tr/A-Z/a-z/; my $day = $dayvals{$_}; foreach my $code (@values) { my $entry_hash = parse_calendar_html (undef, undef, undef, $lineno, $entry_html, $fast_p, 0); my @day_ord = (0, 1); $entry_hash->{day_ord} = \@day_ord; my @val = ($day, $code, $rdesc, $from_year, $from_month, $from_day, $to_year, $to_month, $to_day, $lineno, $entry_hash); push @repeaters, \@val; if ($verbose > 2) { $_ = $entry_html; s/\\/\\\\/g; s/\"/\\\"/g; s/\n/\\n/g; my $d = $days[$day]; my $w = ($code == 0 ? "" : $code == 1 ? "1st " : $code == 2 ? "2nd " : $code == 3 ? "3rd " : $code == 4 ? "4th " : $code == 5 ? "5th " : $code == -1 ? "Last " : $code == -666 ? "Other " : "<>"); $d =~ s/^(...).*/$1/; s/^(.{20}).*$/$1.../; print STDERR "$progname: $lineno: stored ". sprintf ("%04d-%02d-%02d - %04d-%02d-%02d", $from_year, $from_month, $from_day, $to_year, $to_month, $to_day) . " Every $w$d = \"$_\" (\"$rdesc\")\n" } } } } else { error ("$lineno: unparsable repeating entry: $desc\n"); } } sub parse_holiday_entry($@) { my ($lineno, @lines) = @_; my $entry = shift @lines; shift @lines while ($#lines != -1 && $lines[0] eq ''); error ("$lineno: multi-line holiday?") if ($#lines != -1); $entry =~ s/\s*\#.*$//s; if (! ($entry =~ m/^[a-z]+\s+(\d\d?)[-\s]+($month_re)([-\s]+(\d{4}))?\s*(.*)$/i)) { error ("$lineno: unparsable holiday line: $entry\n"); } my $dotm = 0 + $1; my $month = $2; my $year = defined($4) ? (0 + $4) : 0; my $desc = $5; $month =~ tr/A-Z/a-z/; $month = $monthvals{$month}; my $key = sprintf ("%04d-%02d-%02d-H", $year, $month, $dotm); $calendar{$key} = $desc; print STDERR "$progname: $lineno: stored $key = \"$desc\"\n" if ($verbose > 2); } my %ticket_dups; # for detecting duplicate IDs my %event_dups; # for detecting duplicate URLs # Parse the HTML into the various fields in the event hash; # returns a hash describing this event. # sub parse_calendar_html($$$$$$$) { my ($dotm, $month, $year, $lineno, $html, $fast_p, $second_event_same_day) = @_; my $static_p = defined ($dotm); if ($static_p) { my $key = sprintf ("%04d-%02d-%02d", $year, $month, $dotm); parse_shared_flyer ($key, $lineno, $html, $second_event_same_day); } # these are kinda slow, and not needed in --summarize-only mode. validate_html ($html, $lineno, $year, $month, $dotm) unless ($fast_p); parse_dj_tags ($html, $dotm, $month, $year) if ($static_p && !$fast_p); error ("$lineno: non-ASCII character ($1)") if ($html =~ m/([^\s -}])/s); $html =~ s/^\s*//; my ($title_col, $body_col) = ($html =~ m/^(.*?)\n\n+(.*?)\s*$/s); $title_col = $html unless ($title_col); $body_col = '' unless ($body_col); if ($body_col =~ m/\n[ \t]*\n/s) { my $m = $months[$month-1]; $m =~ s/^(...).*/$1/; print STDERR "$progname: WARNING: " . sprintf ("%02d %s %04d: ", $dotm, $m, $year) . "$lineno: blank line\n"; } my $title = extract_event_name ($title_col); my $main_room = extract_event_desc ($body_col) || ''; my $lounge; $_ = $html; if (s@^.*?(Lounge( DJs)?:\s*<(BR|P)>)@$1@is) { # cut before "Lounge" s/\n\n.*$//s; # delete all after blank line s/

.*$//s; # delete all after para break $lounge = extract_event_desc ($_); } $_ = $title_col; # #### Note: this only saves away the first (non-VIP) ticket on sale for # the event. # my $ticket = undef; my $vipticket = undef; my $onsale = undef; my $junk = $_; $junk =~ s/\n/ /gs; $junk =~ s/(]*)>@si); my $args = $1; my $url_p = 0; my $this_ticket = undef; my $this_onsale = undef; my $this_vip_p = 0; if ($args =~ m@\b((HREF)|ID)\s*=\s*(\"([^\"]*)\"|([^<>\"\s]+))@si) { $url_p = $2; $this_ticket = $4 || $5; error ("$lineno: unparsable ticket URL: $this_ticket") if ($url_p && !($this_ticket =~ m/^https?:/)); error ("$lineno: unparsable ticket ID: $this_ticket") if (!$url_p && !($this_ticket =~ m/^\d{4,}$/)); } error ("unparsable TICKETS tag") unless $this_ticket; $this_ticket = $ticket_url_base . $this_ticket unless ($url_p); if ($args =~ m@\bONSALE\s*=\s*\"([^\"<>]+)\"@si) { $this_onsale = $1; error ("$lineno: unparsable ONSALE date: $this_onsale") unless ($this_onsale =~ m/\d\d?-[a-z]{3}-\d\d\d\d/si); } $args =~ s/\s*"[^"]*"\s*/ /gsi; # lose quoted strings if ($args =~ m@\bVIP\b@si) { $this_vip_p = 1; } error ("$lineno: duplicate ticket: $this_ticket") if (defined ($ticket_dups{$this_ticket})); $ticket_dups{$this_ticket} = 1; if (! $this_vip_p) { if (! defined($ticket)) { $ticket = $this_ticket; $onsale = $this_onsale; } } elsif (! defined($vipticket)) { $vipticket = $this_ticket; } } $junk = $_; $junk =~ s/\n/ /gs; $junk =~ s/(]*)>@si); my $args = $1; my ($site, $id); if ($args =~ m@\bSITE=(\"([^<>\"]+)\"|([^\s<>\"]+))@si) { $site = $2 || $3; } if ($args =~ m@\bID=(\"([^<>\"]+)\"|([^\s<>\"]+))@si) { $id = $2 || $3; } error ("$lineno: unparsable") unless ($site && $id); my $key = "$site=$id"; error ("$lineno: duplicate event ID: $key") if (defined ($event_dups{$key})); $event_dups{$key} = 1; push @event_ids, $key; } my $genre = undef; if (m@\s*(.*?)@si) { $genre = $1; $genre =~ s/<[^<>]+>/ /gs; $genre =~ s/\s+/ /gs; $genre =~ s/\s*\.*\s*$//gs; $genre = de_entify($genre); my @g = split (m/\. +/, $genre); # normalize the nine billion names of house. my %gg; foreach my $g (@g) { $g =~ s/^.* (house)$/$1/i unless ($g =~ m/\bacid\b/i); $g = "breaks" if ($g =~ m/break|broken/i); $g = "gothic" if ($g =~ m/deathrock|dark retro/i); $g = "industrial" if ($g =~ m/\bebm\b|industrialize/i); $g = "mashups" if ($g =~ m/bastard pop|remix|mashup|mash-up/i); $g = "80s" if ($g =~ m/^80s/i); $gg{$g} = 1; } @g = keys (%gg); foreach my $g (@g) { $genres{$g} = 1 + ($genres{$g} || 0); } $genre = \@g; } $_ = $html; my $webcast = undef; if (m@]*)>@si) { my $args = $1; my $url_p = 0; if ($args =~ m@\bSRC\s*=\s*(\"([^\"]*)\"|([^<>\"\s]+))@si) { $webcast = $2 || $3; } error ("unparsable WEBCAST tag") unless $webcast; } else { $webcast = 'main'; } $webcast =~ tr/A-Z/a-z/; if (! ($webcast =~ m/^(main|lounge|off)$/s)) { error ("WEBCAST SRC=\"$webcast\": " . "must be \"main\", \"lounge\" or \"off\""); } $_ = $html; my $embed_video = undef; my $embed_title = undef; if (m@]*)>@si) { my $args = $1; my $url_p = 0; if ($args =~ m@\bSRC\s*=\s*(\"([^\"]*)\"|([^<>\"\s]+))@si) { $embed_video = $2 || $3; } if ($args =~ m@\bTITLE\s*=\s*(\"([^\"]*)\"|([^<>\"\s]+))@si) { $embed_title = $2 || $3; } error ("unparsable VIDEO tag: no SRC") unless $embed_video; error ("unparsable VIDEO tag: no TITLE") unless $embed_title; } # Check for using "&foo=bar" in URLs instead of "&foo=bar". # s@(http:[^<>\"]+)@{ my ($u, $ou) = ($1, $1); $u =~ s/&//gs; error ("bogus entity in URL: $ou") if ($u =~ m/&/s); $ou; }@gsexi; my @urls = find_event_urls ($title_col, 0); my $url = $urls[0]; my %hash = (); $hash{title} = $title; #$hash{repeat} = $repeat; # computed in parse_calendar_event_final #$hash{holiday} = $holiday; # computed in parse_calendar_event_final #$hash{vacationp} = $vacationp; # computed in parse_calendar_event_final #$hash{day_ord} = $day_ord; # computed in parse_calendar_event_final $hash{url} = $url; $hash{ticket} = $ticket; $hash{vipticket} = $vipticket; $hash{event_ids} = \@event_ids; $hash{onsale} = $onsale; $hash{genre} = $genre; $hash{main_room} = $main_room; $hash{lounge} = $lounge; $hash{webcast} = $webcast; $hash{video} = $embed_video; $hash{videoname} = $embed_title; $hash{html_src} = $html; # fill in some more fields now, if we can. parse_calendar_html_2 (\%hash, $dotm, $month, $year) if (defined ($dotm)); return \%hash; } # Some parsing we can only do once we're sure of the exact date of the # event. We know this right away for static events, but only later for # repeating events. # sub parse_calendar_html_2($$$$) { my ($eventref, $dotm, $month, $year) = @_; my $dotw = dotw ($dotm, $month, $year); my $monstr = $months[$month-1]; $monstr =~ s/^(...).*$/$1/; my $date = "$days[$dotw], $dotm $monstr $year"; my $html = $eventref->{html_src}; $html =~ s/^\s*//; $html =~ s/\n\n.*$//s; # keep only "title" column my $photo = find_photo ($year, $month, $dotm); my ($age) = ($html =~ m@(.*?)@si); my ($start_minute, $end_minute, $real_end_minute) = extract_hours ($html, $dotw, $dotm, $month, $year, 0, 1); my $time_str = sprintf("%d:%02d - %d:%02d", $start_minute/60, $start_minute%60, $end_minute/60, $end_minute%60); my @time_list = ($year, $month, $dotm, $dotw, $start_minute, $end_minute, $real_end_minute); $eventref->{date} = $date; # altered in parse_calendar_event_final $eventref->{time} = $time_str; $eventref->{times} = \@time_list; $eventref->{photos} = $photo; $eventref->{age} = $age; return $eventref; } # After the whole calendar has been parsed, fill in some fields in the # event hashes that we can't know until we've seen everything. # E.g., we can't know if there are multiple events on the given day # until after we've seen all of them. # sub parse_calendar_event_final($$$) { my ($dotm, $month, $year) = @_; my $key = sprintf ("%04d-%02d-%02d", $year, $month, $dotm); my $listref = $calendar{$key}; return unless defined ($listref); my @list = @$listref; my $nevents = $#list+1; my $i = -1; foreach my $event (@list) { $i++; parse_calendar_event_final_1 ($$event, $nevents, $i, 0, 1); } } sub parse_calendar_event_final_1($$$$$) { my ($event, $nevents, $event_number, $known_repeater, $real_event_p) = @_; my $title = $event->{title}; my $times = $event->{times}; my $date = $event->{date}; my ($year, $month, $dotm, $dotw, $start_minute, $end_minute, $real_end_minute) = @$times; my @day_ord = ($event_number, $nevents); $event->{day_ord} = \@day_ord; # This has to be after 'day_ord' was filled in. $event->{flyer} = find_flyer ($event); ####################################################################### # Update the day of the week to, e.g., "Sunday Morning" if appropriate. ####################################################################### # Compute the appropriate description of this (time of) day... my $day = describe_day ($dotw, $start_minute, $nevents, $event_number); # Splice it back in, and store the result back in the hash. $date =~ s/($days_re)/$day/i; $event->{date} = $date; ####################################################################### # Decide whether this event is a holiday, or just before a holiday. ####################################################################### my $holiday = lookup_holiday ($year, $month, $dotm); my $monday_off_p = (defined($holiday) && $holiday =~ m/^\*/); # Mark the sunday of 3 day weekends as holidays, too, if the # holiday on the sunday or adjascent monday are federal. # if ($dotw == 0 && # today is sunday !($month == 12 && $dotm == 30) # not day-before-NYE ) { if (!defined ($holiday)) { $holiday = lookup_holiday_tomorrow ($year, $month, $dotm); $monday_off_p = (defined($holiday) && $holiday =~ m/^\*/); # Tomorrow is a holiday, but not a federal holiday. Darn. $holiday = undef unless ($monday_off_p); } # Only mention "3 day weekend" on the last event of the day. $holiday = undef unless ($event_number == $nevents-1); if (defined ($holiday)) { $holiday =~ s/^\*//s; if ($monday_off_p) { $holiday = "3 Day Weekend: $holiday"; $event->{vacationp} = 1; } } } if (defined ($holiday)) { $holiday =~ s/^\*//sx; # take off Fed marker $event->{holiday} = $holiday; } ####################################################################### # determine whether this is a repeating event. ####################################################################### if ($known_repeater) { $event->{repeat} = $known_repeater; } else { my ($dotw_number, $total_dotws) = dotw_count ($year, $month, $dotm); my $repeater = lookup_repeating_entry ($year, $month, $dotm, $dotw, $dotw_number, $total_dotws); if (defined ($repeater)) { my $rtitle = $repeater->{title}; error ("unparsable event name in repeater: $repeater") unless ($rtitle); if ($rtitle eq $title) { print STDERR "$progname: $title repeats: $title\n" if ($verbose > 2); $event->{repeat} = $repeater->{repeat}; } } } ####################################################################### # What event number is this, and how many events are there? # # Note that this does not see "repeating" events that are visible # in the calendar, but do not yet have "real" data filled in. # So the event ordinals will change as those get filled in. ####################################################################### if ($real_event_p && !($event->{title} =~ m/^\(?CLOSED|CANCELL?ED\)?$/si)) { $event->{event_ord} = $total_event_count; $event_ords[$total_event_count] = \$event; my $dd = $year * 10000 + $month * 100 + $dotm; $total_event_count++; $event_count_12mo++ if ($dd > $one_year_ago && $dd <= $stats_cutoff); $event_count_6mo++ if ($dd > $six_months_ago && $dd <= $stats_cutoff); if ($event->{html_src} =~ m/<(BAND|LIVE)\b/i) { $total_live_count++; $live_count_12mo++ if ($dd > $one_year_ago && $dd <= $stats_cutoff); $live_count_6mo++ if ($dd > $six_months_ago && $dd <= $stats_cutoff); } } } # Try to find the short name of the given event, by digging it out of # the HTML. # sub extract_event_name($) { my ($text) = @_; my $otext = $text; my ($title) = ($text =~ m@]*>\s*(.*?)\s*@si); error ("no title: $text") unless ($title && $title =~ m/[^\s]/); $title =~ s@\s*
\s*@ @gsi; error ("tags in title: $title") if ($title =~ m/[<>]/); $title =~ s/\s+/ /gsi; $title = de_entify($title); # now do some error checking on what's left. # if ($text =~ m@.*<\1>@si) { error ("multiple <$1> tags:\n\n$otext"); } if ($text =~ m@(.*?)@si) { my $a = $1; error ("unparsable AGE tag: \"$a\"") unless ($a =~ m/^(AA|18|21)$/si); } # $text =~ s@@@gsi; $text =~ s@<(ETITLE|PRES|STATS|GENRE|TIME|PRICE|AGE)\b[^<>]*>.*@@gsi; $text =~ s@<(FLYER|TICKET)\b[^<>]*>@@gsi; # $text =~ s@<(P|BR)>@ @gsi; if (! ($text =~ m/^\s*$/s)) { print STDERR "$progname: junk in event name: $otext\n\n"; } # strip trailing punctuation $title =~ s/[-:;]+$//gsi; return $title; } # Returns text briefly describing the event: basically, just the DJ/band # names in the front room (or, if that can't be found, the back room.) # Puts a "*" in front of band names. # sub extract_event_desc($) { my ($text) = @_; return undef unless $text; my $result = ''; my $count = 0; $text =~ s/\s+/ /gs; $text =~ s/]*>\s*([^\n]+?)\s*$@is) { my $bandp = defined($2); my $who = de_htmlify($3, 0); $who = "*$who" if ($bandp); $result .= "$who\n"; $count++; } elsif (m/^.*?Lounge( DJs)?:/is) { last if ($count > 0); } } return $result; } # Extracts and returns the starting and ending time of the event from # its textual description, measured in minutes past midnight. Handles # "after hours", and adds heuristic slop based on the day of the week. # sub extract_hours($$$$$$$) { my ($text, $dotw, $dotm, $month, $year, $exact_p, $warn_p) = @_; my ($start, $end); $text = de_htmlify ($text, 0); my $otext = $text; # first look for something of these forms: # # 8pm - 2am # midnight - 1AM # 10 pm - after hours # while ($text =~ m/ \b\s* ((\d\d?(:\d\d)?)? \s* ([AP]\.?M\.?|noon|midnight|midnite)) \s*-\s* ([[:alnum:]:.]+) \b(.*) $/xsi) { my $start1 = $1; my $end1 = $5; $text = $6; $start1 = parse_hour ($start1, 0); next unless defined ($start1); $end1 = parse_hour ($end1, 1); next unless defined ($end1); if (defined($start1) && defined($end1)) { $start = $start1; $end = $end1; last; } } # If that didn't work, look for something like this: # # doors @ 7pm; show @ 8pm; # doors at 7pm; show at 8pm; # doors: 7pm; show: 8pm; # # If all we have is this "doors" form, assume end time is 2AM. # if (!defined ($start)) { $text = $otext; while ($text =~ m/ \b\s* doors:? \s+ (\@|at)? \s* ((\d\d?(:\d\d)?)? \s* ([AP]\.?M\.?|noon|midnight|midnite)) \b(.*) $/xsi) { my $start1 = $2; $text = $5; $start1 = parse_hour ($start1, 0); next unless defined ($start1); if (defined($start1)) { $start = $start1; $end = (60 * 2); # 2AM last; } } } # if that didn't work, at least try to find a start time... # if (!defined ($start)) { $text = $otext; while ($text =~ m/ \b\s* ((\d\d?(:\d\d)?)? \s* ([AP]\.?M\.?|noon|midnight|midnite)) \b(.*) $/xsi) { my $start1 = $1; $text = $5; $start1 = parse_hour ($start1, 0); next unless defined ($start1); if (defined($start1)) { $start = $start1; last; } } } # if we didn't find a start or end time, warn. # my $need_warning_p = ($warn_p && (!defined ($start) || !defined ($end))); # Bur don't warn if there's no time specified for cancelled events. if ($need_warning_p && $text =~ /\b(CANCELL?ED|POSTPONED|CLOSED)\b/i) { $need_warning_p = 0; } $start = (60 * 22) unless defined($start); # default to 10pm $end = (60 * 3) unless defined($end); # default to 3am if ($need_warning_p) { my $h1 = int ($start/60); my $m1 = int ($start%60); my $h2 = int ($end/60); my $m2 = int ($end%60); my $m = $months[$month-1]; my $d = $days[$dotw]; $m =~ s/^(...).*/$1/; $d =~ s/^(...).*/$1/; # print STDERR "$progname: WARNING: " . # sprintf ("%s, %02d $m %04d: ", $d, $dotm, $year) . # "guessing: " . # sprintf ("%2d:%02d %s - %d:%02d %s\n", # ($h1 == 0 ? 12 : $h1 % 12), $m1, ($h1 < 12 ? "AM" : "PM"), # ($h2 == 0 ? 12 : $h2 % 12), $m2, ($h2 < 12 ? "AM" : "PM")); # # print STDERR "$otext\n\n"; } my $real_end = $end; my $weekend_p = ($dotw == 5 || # fri $dotw == 6 || # sat $dotw == 0 # sun (treat sunday as a weekend, since we # are usually only open on sunday if # it's a three day weekend.) ); if ($end eq 'LATE') { if ($weekend_p) { $end = (60 * 5) + 30; # on weekends, "after hours" means 5:30 AM } else { $end = (60 * 4) + 30; # on week nights, "after hours" means 4:30 AM } $real_end = $end; } elsif (!$exact_p) { # # If an explicit end time was listed in the calendar, add some slack onto # that. Add half and hour on weeknights, and an hour on the weekend. # Or an hour and a half if it's a weekend set to end at 3am or earlier. if (!$weekend_p) { # weeknight $real_end += 30; } elsif ($end <= (60 * 3)) { # <= 3AM on weekend $real_end += 90; } else { # > 3AM on weekend $real_end += 60; } } $end += (60 * 24) if ($end < $start); # end is "tomorrow" $real_end += (60 * 24) if ($real_end < $start); error("INTERNAL ERROR: end>real_end: $end, $real_end") if ($end > $real_end); error("INTERNAL ERROR: start>=end, $start, $end") if ($start >= $real_end); return ($start, $end, $real_end); } sub parse_hour($$) { my ($h, $endp) = @_; if ($h =~ m/\b(\d\d?)(:(\d\d))?\s*(A\.?M\.?)/i) { return undef if ($1 < 1 || $1 > 12); return undef if (defined($3) && ($3 < 0 || $3 > 60)); $h = 60 * $1; $h += $3 if (defined($3)); return $h; } elsif ($h =~ m/\b(\d\d?)(:(\d\d))?\s*(P\.?M\.?)/i) { return undef if ($1 < 1 || $1 > 12); return undef if (defined($3) && ($3 < 0 || $3 > 60)); $h = 60 * ($1 == 12 ? 12 : (12 + $1)); $h += $3 if (defined($3)); return $h; } elsif ($h =~ m/(\b12\s*)?\s*noon\b/i) { return 60 * 12; } elsif ($h =~ m/(\b12\s*)?\s*mid\s*-?\s*(nite|night)\b\b/i) { return 0; } elsif ($endp && $h =~ m/\b(after\b|after\s*-?\s*hours\b)/i) { return 'LATE'; } else { return undef; } } ############################################################################## # # , , and tags # ############################################################################## # Whether "DD-MMM-YYYY" has yet arrived. # sub ticket_on_sale_p($) { my ($date) = @_; my ($dd, $mm, $yyyy) = ($date =~ m/(\d\d?)-([a-z]{3})-(\d\d\d\d)/si); error ("unparsable ONSALE date: $date") unless ($yyyy); $mm =~ tr/A-Z/a-z/; $mm = $monthvals{$mm}; my ($esec, $emin, $ehour, $edotm, $emon, $eyear) = localtime (time); $emon++; $eyear += 1900; my $d0 = $yyyy * 10000 + $mm * 100 + $dd; my $d1 = $eyear * 10000 + $emon * 100 + $edotm; return ($d0 <= $d1); } # We use certain magic tags in the calendar.txt file, to save typing and # enable certain other semantic hacks. # sub rewrite_dj_tags($$$$$) { my ($html, $show_tickets_p, $onsale_date, $lineno, $tickets_form_p) = @_; # # xyz ==> xyz # xyz ==> xyz # xyz ==> xyz # xyz ==> xyz # ==> deleted # ==> deleted #
xyz # xyz ==> xyz # xyz ==> xyz # # xyz ==> # XYZ # xyz ==>

XYZ
# # xyz ==>

xyz

# xyz ==>

xyz
# xyz ==>

xyz # ==>

xyz # xyz ==>

xyz # xyz ==>
"all ages" | "xyz+ with ID" # ==> deleted # # ==>

buy tickets now!
# or

depending on $show_tickets_p # # ==> slightly different text # # use "foo" instead of "buy tickets now!" # don't link until this date # deleted $html =~ s@]*>\s*@@igs; $html =~ s@]*>\s*@@igs; $html =~ s@]*>\s*@@igs; $html =~ s@\"]+\")>(.*?) @$3@xigs; $html =~ s@\"]+\")>(.*?) @$3@xigs; $html =~ s@\"]+\")>(.*?) @$3@xigs; $html =~ s@\"]+\")>(.*?) @{ "" . upcase_html($2) . "" }@exigs; $html =~ s@(.*?)@{ "

" . upcase_html($1) . "
" }@exigs; $html =~ s@@@igs; $html =~ s@<(/?)BAND(\s+DUP)?>@<$1B>@igs; $html =~ s@@@igs; $html =~ s@@@igs; $html =~ s@@@igs; # inside