#!/usr/bin/perl -w # Copyright © 2005-2013 Jamie Zawinski # # Converts "infoline.txt" to "infoline.sable" for the Festival speech synth. # # Permission to use, copy, modify, distribute, and sell this software and its # documentation for any purpose is hereby granted without fee, provided that # the above copyright notice appear in all copies and that both that # copyright notice and this permission notice appear in supporting # documentation. No representations are made about the suitability of this # software for any purpose. It is provided "as is" without express or # implied warranty. # # Created: 25-Mar-2002. require 5; use diagnostics; use strict; my $progname = $0; $progname =~ s@.*/@@g; my $data_dir = $0; $data_dir =~ s@/[^/]*$@@; my $version = q{ $Revision: 1.45 $ }; $version =~ s/^[^0-9]+([0-9.]+).*$/$1/; use open ":encoding(utf8)"; my $verbose = 0; my $debug = 0; my $max_events = 6; sub cmp_files($$) { my ($file1, $file2) = @_; my @cmd = ("cmp", "-s", "$file1", "$file2"); print STDERR "$progname: executing \"" . join(" ", @cmd) . "\"\n" if ($verbose > 5); system (@cmd); my $exit_value = $? >> 8; my $signal_num = $? & 127; my $dumped_core = $? & 128; error ("$cmd[0]: core dumped!") if ($dumped_core); error ("$cmd[0]: signal $signal_num!") if ($signal_num); return $exit_value; } sub diff_files($$) { my ($file1, $file2) = @_; my @cmd = ("diff", "-NU2", "$file1", "$file2"); print "$progname: executing \"" . join(" ", @cmd) . "\"\n" if ($verbose > 6); system (@cmd); my $exit_value = $? >> 8; my $signal_num = $? & 127; my $dumped_core = $? & 128; error ("$cmd[0]: core dumped!") if ($dumped_core); error ("$cmd[0]: signal $signal_num!") if ($signal_num); return $exit_value; } # If the two files differ: # mv file2 file1 # else # rm file2 # sub rename_or_delete($$) { my ($file, $file_tmp) = @_; my $changed_p = cmp_files ($file, $file_tmp); if ($changed_p && $debug) { print STDOUT "\n" . ('#' x 79) . "\n"; diff_files ("$file", "$file_tmp"); $changed_p = 0; } if ($changed_p) { if (!rename ("$file_tmp", "$file")) { unlink "$file_tmp"; error ("mv $file_tmp $file: $!"); } print STDERR "$progname: wrote $file\n" if ($verbose); } else { unlink "$file_tmp" || error ("rm $file_tmp: $!\n"); print STDERR "$progname: $file unchanged\n" if ($verbose > 2); print STDERR "$progname: rm $file_tmp\n" if ($verbose > 3); } } sub sable_convert($) { my ($body) = @_; $_ = $body; s/[*]//gm; # asterisks are bad! s/\bDr\./Doctor/gi; # avoid sentence end confusion... s/\bMs\./Miss/gi; s/\bMz\./Miss/gi; s/\bMr\./Mister/gi; s/\bMrs\./Missus/gi; s/\bGen\./general/gi; s/\bAdm\./admission/gi; s/\bAdv\./advance/gi; s/\bSt\./Saint/gi; s/\bvs\./versus/gi; # make times be pronouncable. # s/\b([\d:.]+)am\b/$1 A-M/gi; # s/\b([\d:.]+)pm\b/$1 P-M/gi; s/\b([\d:.]+)am\b/$1 AM/gi; s/\b([\d:.]+)pm\b/$1 PM/gi; # time ranges. s/([AP][- \.]?M\.?) +- /$1 to /gi; s/(\d+|midnight|midnite) *- *(\d+|midnight|midnite)/$1 to $2/gi; # convert money ranges "$15 - $20" -> "15 to $20" s/\$(\d+) *- *\$(\d+)/$1 to \$$2/gi; # some latin1 characters... s/[âãäåæ]/a/gi; s/[èéêë]/e/gi; s/[ìíîï]/i/gi; s/[ðòóôõöø]/o/gi; s/[ùúûü]/u/gi; s/[ýÿ]/y/gi; s/[ññ]/n/gi; s/(x)x+/$1/gi; # multiple Xes can't be good. s/[*]//gm; # asterisks are bad! s![_/]!-!g; # no underscores or slashes... s/([a-z])([A-Z])/$1-$2/g; # "xA" -> "x-A" s/(``|'')/\"/g; # use double-quotes # convert three digit numbers: s/\b(\d)0([1-9])\b/$1-oh-$2/gi; # "606" -> "6-oh-6", "600" -> unchanged s/\b(\d)(1\d)\b/$1-$2/gi; # "616" -> "6-16", "610" -> "6-10" s/\b(\d)(\d)([1-9])\b/$1-$2-$3/gi; # "626" -> "6-2-6", "620" -> "6-20" # our address! s/\b3-7-5\b/three-seventy-five/gi; ########################################################################### # s/\bID\b/I-D/gm; s/\bFolsom\b/Foal-some/gm; # s/www\.dnalounge\.com\b/W-W-W dot DNA Lounge dot com/gm; s/www\.dnalounge\.com\b/WWW dot DNA Lounge dot com/gm; s/\bdnalounge\.com\b/DNA Lounge dot com/gm; s/present\b/prezent/gi; s/\bn:code\b/N-Code/gi; s/\bd:code\b/D-Code/gi; s/:code\b/Code/gi; s/Blasthaus/Blast-House/gi; s/\b(joy)(pad)\b/$1-$2/gi; s/\b(pre)(sale)\b/$1-$2/gi; s/\b(nu)(jazz)\b/new-$2/gi; s/\b(ambient)(tech)\b/$1-$2/gi; s/\b(code)(con)\b/$1-$2/gi; s/\b(trip)(hop)\b/$1-$2/gi; s/\b(hip)(hop)\b/$1-$2/gi; s/\s+AKA\b/, AKA/gi; s/^(\s*OM)\.?$/$1 Records presents OM./gm; s/\bOM\b/Ohm/gi; s/\b4Real\b/For-Real/gi; s/\b(worm)(food)\b/$1-$2/gi; s/^\s*(Thump Radio) presents thump\./$1./gim; s/\bQ[OÖ][OÖ]L/Cool/gi; s/T\.R\.I\.P\./Trip/gi; s/\bJen[oö](\b|\s)/Yay-no$1/gi; s/\b70\'?s\b/seventies/gi; s/\b80\'?s\b/eighties/gi; s/\b90\'?s\b/nineties/gi; s/\bde ?luxe/D-Lucks/gi; s/\bpsy[- ]*/Sigh /gi; s/\bPA\b/P-A/g; s/\bBre-ad/Bry Add/gi; s/\biZ\b/Izz/gi; s/\bDiZ\b/Dizz/gi; s/\b(r[&]b|r and b)/are-N-bee/gi; s/\bsushi\b/sue-she/gi; s/\bblowfish\b/Blow-fish/gi; s/\bjorge\b/Hore-hay/gi; s/\bTerez\b/Tear-ez/gi; s/\bD\'Funk\'D\b/Dee-Funk-Dee/gi; s/\bLa Couqui\b/Le-Coke-Wee/gi; s/\bEzzy\b/Essie/gi; s/\bMarques\b/Mark-S/gi; s/\bJZ\b/Jay-Zee/gi; s/\b(bas)s\b/$1e/gi; # hah! let's just assume on this one... s/\bnu-/new-/gi; s/\bN8\b/Nate/gi; s/\bGhreg\b/Greg/gi; s/\bAeon\b/Eon/gi; s/\bHecate\b/Hecka-tay/gi; s/\bRaf-One\b/Raff-one/gi; s/\b(C)\.(Flav)/$1-$2/gi; # s/\b([a-z]+) ([A-Z])\b\.?/$1-$2/gi; # e.g., "Olga T.", "Miss E". s/\bInciter/Insider/gi; s/\bAndr[e\351]s/On-dray/gi; s/\bCraigslist/Craig\'s List/gi; s/\b\.(org|com|net)\b/ dot-$1/gi; # s/\bKMFDM\b/K-M-F-D-M/gi; s/\b(Kidney)(thieves)\b/$1-$2/gi; s/\b(Louder)(milk)\b/$1-$2/gi; s/\bBeni\b/Benny/gi; s/\bMic\b/Mike/gi; s/\bSen[- ]*Sei\b/Sen-say/gi; s/\bm\.path\b/Empath/gi; s/\bDJL\b/D-J-L/gi; s/\bDragn\'?fly\b/Dragon-fly/gi; s/\bSteveo\b/Steve-O/gi; s/\bDaly\b/Daily/gi; s/(h)ayes\b/$1aze/gi; s/\bMiguel\b/Mig-L/gi; s/\bMijares\b/Me-har-es/gi; s/\bDomingu?ez\b/Domeen-guess/gi; s/\bFreq\b/Freak/gi; s/\bFreq(System)/Freak-$1/gi; s/\bStradley\b/Strad-lee/gi; s/\bMadame\b/Madam/gi; s/\bMomodance\b/Mo-Mo-Dance/gi; s/\bOjeda\b/O-heyda/gi; s/\bLatrice\b/Luh-Treez/gi; s/\bB:Smiley\b/Be-Smiley/gi; s/\b(Deep)(fat)(fried)\b/$1-$2-$3/gi; s/\bMcGarry\b/Mac-Garry/gi; s/\bj:miah\b/J-My-Uh/gi; s/\bA:Dubl\b/A-double/gi; s/\bRaffi\b/Raffy/gi; s/\bHalou\b/Ha-lou/gi; s/\bDexorcist\b/Decks-or-cyst/gi; s/\bSukeban\b/Soo-key-bun/gi; s/\bdancehall\b/dance-hall/gi; s/\bFlo-Ology\b/flow-ology/gi; s/trybe/ tribe/gi; s/\b(ever)(soul)\b/$1-$2/gi; s/\bRisque\b/ris-K/gi; s/\bKouture\b/co-tour/gi; s/\bZeromancer\b/zero-man-sir/gi; s/\bAd Noiseam\b/Add Noise-am/gi; s/\bMasumi\b/Ma-sue-me/gi; s/\bPuimond\b/Pwee-mond/gi; s/\b(hog)(tied)\b/$1-$2/gi; s/\b(hell)(raiser)\b/$1-$2/gi; s/\bGiannini\b/Gee-a-nee-nee/gi; s/\bSunil\b/Sue-neel/gi; s/\bKapadia\b/Kap-ah-dee-ah/gi; s/\bFu\b/Foo/gi; s/\b(Shin)(dog)\b/$1-$2/gi; s/\b(b)(boys)\b/$1-$2/gi; s/\bAkida\b/Ah-key-da/gi; s/\bUphonic\b/You-phonic/gi; s/\bWichdokta\b/Witch-Doctor/gi; s/\bRhi-?Know\b/Rhino/gi; s/\bsigh cho/Psycho/gi; # oops, undo change s/\bsigh nth/Synth/gi; # oops, undo change s/\bMuss?elman\b/Mussel-mun/gi; s/\bCarr?ell?i\b/Carelli/gi; s/\bsci[-\s]?fi\b/sigh-fie/gi; s/\bAphrodite\b/Afro-dytee/gi; s/\bAye~n\b/Eye-N/gi; s/\bIntalekt\b/Intellect/gi; s/\bGridlok\b/Grid-lock/gi; s/\bFemmes?\sFatales?/Fem Fuhtail/gi; s/\bE\$KR\b/Esker/gi; s/\bRamiro\b/Ramee-roh/gi; s/\bLivin\b/Living/gi; s/\bMiz\b/Mizz/gi; s/\bVideorama\b/Video-rama/gi; s/\bFakir\b/Fakeer/gi; s/\bMusafar\b/Moosafar/gi; s/\bKimo\b/Keemo/gi; s/\b(Ren)( the Vinyl)\b/$1,$2/gi; s/\bIvry\b/Ivory/gi; s/\bDe La\b/Deelah/gi; s/\bYzer\b/Wiser/gi; s/\bKontinuum\b/Continuum/gi; s/flava\b/flavor/gi; s/\b(gold)(chain)/$1 $2/gi; s/\bVasquez\b/Vass-kez/gi; s/\bKahn\b/Con/gi; s/\bIshmael\b/Ish-male/gi; s/\b(Ex)(clip)(sect)\b/$1-$2-$3/gi; s/matik\b/matic/gi; s/fakt\b/fact/gi; s/\bHaujobb?\b/How-job/gi; s/\bGoapele\b/Go-uhpell/gi; s/\bCapoeira/Cap-oh-aera/gi; s/(ontinu)(um)/$1-$2/gi; s/\bZ\'?s\b/zeez/gi; s/\bSiouxsie\b/Susie/gi; s/\bSioux\b/Sue/gi; s/\bMauricio\b/Marr-e-c-o/gi; s/\bAviles\b/A-vill-s/gi; s/\bPsysex\b/Sigh-sex/gi; s/\bA\.? ?T\.? ?U\.? ?M\.?/Atom/gi; s/\bHesohi\b/He-so-he/gi; s/chyld\b/-child/gi; s/\bRomanowski\b/Romanow-ski/gi; s/\b(Arma)(get)(it)(on)\b/$1-$2-$3-$4/gi; s/\b(Cyrus)(rex)\b/$1 $2/gi; s/\bVogt\b/Vote/gi; s/\bGaraj\b/Garage/gi; s/\bNaugacide\b/Noga-side/gi; s/\bSoulive\b/Soul-Live/gi; s/S[.-]U[.-]N\.?/Sun/gi; s/\b(Trouble)(maker)/$1-$2/gi; s/\bVoodou/Voodoo/gi; s/\b(Why)(sall)/$1-$2/gi; s/4m-?At-?T/format/gi; s/\b(axel)(son)/$1-$2/gi; s/\bWhalen\b/Whale-N/gi; s/\bAaron\b/Erin/gi; s/\b(bio)(flavor)/$1-$2/gi; s/\bBi-?pole/by-pole/gi; s/\b(B)(Moves)/$1-$2/gi; s/\b(brown)(boy)/$1-$2/gi; s/\bMathis/Math-S/gi; s/\bBoomschnazz/Boom-shnoz/gi; s/\bchaos/kaos/gi; s/\bConsuelo/con-sway-low/gi; s/\bD[^a-z]*Ville/Da Vill/gi; s/\bCRYKIT\b/Cricket/gi; s/(wild)(child)/$1-$2/gi; s/\bAju\b/A-jew/gi; s/\bMejia/May-uh/gi; s/\b(Dee)(android)/$1-$2/gi; s/\bDemilo/De-Mylo/gi; s/\bDhyon\b/dyon/gi; s/\bMoeller/Moller/gi; s/\b(Diva)(Sonic)/$1-$2/gi; s/\bDjall\b/DJ All/gi; s/\bChi-?Ka\b/cheeka/gi; s/(dub)(hub)/$1-$2/gi; s/\bEmile/E-meel/gi; s/\bFusch?ia/Feooshuh/gi; s/\bGalen\b/Gaylen/gi; s/\bGianni\b/Gee-annie/gi; s/\bGine\b/Gina/gi; s/\bRene\b/Rennay/gi; s/\bGir-?Lie8\b/Girlie 8/gi; s/\bGrimace\b/Grimis/gi; s/\bGraeme\b/Gray-M/gi; s/\bJaswho\??/Jazz-Who/gi; s/\bJu-?Ju\b/Jew-Jew/gi; s/\bHallucinogen/Heloosinajen/gi; s/\bBassix\b/basics/gi; s/\bJonene\b/Joe-nay-nay/gi; s/\bJeremiah\b/Jerry-my-ah/gi; s/\bTejada\b/Tay-hada/gi; # s/\bO[^a-z]Reilly\b/Oh-Ryeley/gi; s/\bKepi\b/Keppie/gi; s/\bKowe/Cow/gi; s/\bLogrey\b/Low-Gray/gi; s/\bL\'/El-/gi; s/\bIllapes\b/Illa-pess/gi; s/\bMalachy\b/Mella-ky/gi; s/\bMei\b/May/gi; s/\bLwun\b/Lun/gi; s/\bMerlot\b/Merlow/gi; s/\bMaeda\b/Mayda/gi; s/\bNtula\b/Entulla/gi; s/\bObu\b/Oboo/gi; s/\bOlli\b/Ollie/gi; s/\bOphelia\b/Ofeeleeuh/gi; s/\bLeath\b/Leeth/gi; s/\bGauthier\b/Gow-tee-a/gi; s/\bSherburne\b/Share-burn/gi; s/\b(Plate)(shifter)/$1-$2/gi; s/\bR[e3][^a-z]ach\b/Re-Ack/gi; s/\bRa-?Ven\b/Raven/gi; s/\bRhythmist\b/Rith-mist/gi; s/\bRob-ot\b/Robot/gi; s/\bsake\b/sa-kay/gi; s/\bGhael\b/Gale/gi; s/\bSifu\b/See-foo/gi; s/\b(Silence)(fiction)/$1-$2/gi; s/\bSolaris\b/Sole-eris/gi; s/\bInfinti\b/infinity/gi; s/\b(sound):/$1-/gi; s/\bGerrard\b/Jerard/gi; s/\b(Tee)(money)/$1-$2/gi; s/\bTenashus\b/Tenayshus/gi; s/\bDuvante\b/Doo-vantay/gi; s/\bTruby\b/Trooby/gi; s/\b(up)(rock)/$1-$2/gi; s/\bUriel\b/You-Real/gi; s/\bVajra\b/Vajruh/gi; s/dokta\b/-doctor/gi; s/\b(zipper)(spy)/$1-$2/gi; s/\bZach\b/Zack/gi; s/\bPauli\b/Polly/gi; s/\bBio-?Jeff\b/Byo Jeff/gi; s/\bHysterisis\b/Historeesis/gi; s/\bRoneous\b/Roney-us/gi; s/\bdominatrix\b/Dominaytrix/gi; s/\bEBM\b/E-B-M/gi; s/\bJwebb\b/Jay-Web/gi; s/\b(Grave)(dig)/$1-$2/gi; s/\bUnd\b/and/gi; s/(Spectacular)!\s*Spectacular!/$1-$1/gi; s/\$(\d+ *spoo+ky)/$1/gi; s/\bPharoah\b/Farrow/gi; s/\bDerby\b/Durby/gi; s/\b(lethal)(mix)\b/$1-$2/gi; s/\bJayvi\b/J-V/gi; s/\bKwai\b/Cuh-why/gi; s/\bGershoni\b/Gurshoni/gi; s/\bDaniella\b/Dan-yella/gi; s/\bColeman\b/Collmen/gi; s/\bDe Muerte\b/Dee Mware-Tay/gi; s/\bMacho\b/Moch-O/gi; s/\bSasquatcho\b/Sass-kwotch-O/gi; s/\bGigante\b/Gigantay/gi; s/\bSh(ei|ie)k\b/Sheek/gi; s/\bPhysique\b/Fuh-Zeek/gi; s/\bRasputiny\b/Rasputeeny/gi; s/\bEl Pollo/El Poyo/gi; s/\bVerdalet\b/Verdalay/gi; s/\bAriela\b/R-E-Ella/gi; s/\b(Morgen)(stern)\b/$1-$2/gi; s/\bLica Sto\b/Leeka Stow/gi; s/\bDiablo\b/Dee-ablo/gi; s/\bBlag\b/Blagg/gi; s/\brock[^a-z]+(n|and)[^a-z]+roll\b/rock-and-roll/gi; s/\b(Triple)(wide)/$1-$2/gi; s/\bIzzy/Issy/gi; s/\b(\d+)ND\b/$1nd/g; s/\b(\d+)RD\b/$1rd/g; s/\b(\d+)TH\b/$1th/g; s/Dirtyhertz/dirty-hurts/gi; s/(Chub)(ettes)/$1-$2/gi; s/\bMidori\b/Medori/gi; s/(re)(schedule)/$1-$2/gi; s/W\.A\.S\.P\.?/Wasp/gi; s/P\.?A\.W\.S\.?/Paws/gi; s/\bKoi\b/Coy/gi; s/\bDickies\b/Dickees/gi; s/\bNYC\b/New York/gi; s/\b(Witch)(doctor)/$1-$2/gi; s/\btiki\b/teekee/gi; s/\bLeethalmix\b/Lethal Mix/gi; s/\bFauxnique\b/Foe-Neek/gi; s/\bQ[- ]*Bert\b/Q-Bert/gi; s/\b(death)(rock)\b/$1-$2/gi; s/\b(love)(slap)\b/$1-$2/gi; s/\b(pay)(back)\b/$1-$2/gi; s/\bsynthpop\b/sinth-pop/gi; s/\balt\b/olt/gi; # s/\bDecay\b/D-K/gi; s/\bDecay\b/DK/gi; s/\bToph\b/Tofe/gi; s/\bNetik\b/Nettick/gi; s/\bFarina\b/Freena/gi; s/\bMicronaut\b/Micro-not/gi; s/"noise\.to\.signal\.05"/"Noise To Signal Oh Five"/gi; s/\bSolevibe\b/Soul-Vibe/gi; s/\bRelm\b/Realm/gi; s/\bDaniela\b/Dan-Yella/gi; s/\b2x4\b/two-by-four/gi; s/\b3x5\b/three-by-five/gi; s/\bgo[ -]?gos\b/go-go's/gi; s/\bopio\b/Oh-Pee-Oh/gi; s/D\.U\.S\.T\.?/Dust/gi; s/\bJunkies\b/Junky's/gi; s/\bKZSU\b/K-Zee-S-U/gi; s/\bEpoxies\b/E-poxy's/gi; s/\bGroovie\b/Groovy/gi; s/\bGhoulies\b/Goo-leez/gi; s/\b4onefunk\b/Four-One-Funk/gi; s/\bJae\b/Jay/gi; s/\b(hammer)(fall)\b/$1-$2/gi; s/\b(ed)(guy)\b/$1-$2/gi; s/\brox+\b/Rocks/gi; s/Dekonstrukt/Deconstruct/gi; s/\bdjs\b/DJs/gsi; s/\bvjs\b/VJs/gsi; s/\bvj\b/VJ/gsi; s/\bDore\b/Door/g; s/\bcutest\b/qutest/gi; s/\bcorsetry\b/corsetree/gi; s/\bPresidente\b/Presidenty/gi; s/sigh chedelic/psychedelic/gi; s/\bDonaldson\b/Donldson/gi; s/-vs-/versus/gi; s/accoustic/acoostic/gi; s/Records/records/g; s/\bButoh\b/Bootoh/gi; s/\bCaliban\b/Callyban/gi; s/2na/tuna/gi; s/\bFortier\b/Forty-a/gi; s/Combichrist/Combi-Christ/gi; s/:CODEtv/Code TV/gi; s/\bCosmosis\b/Cosmosiss/gi; s/\bBardot\b/Bardo/gi; s/D-Soul-IV-Reel/D-Soul-For-Reel/gi; s/\bKUSF\b/KUSF/gi; s/\bDenki\b/Denky/gi; s/Depeche/Depesh/gi; s/\belementsf\b/Element SF/gi; s/\bDuvdev\b/Dove-Deff/gi; s/\bECTV\b/ECTV/gi; s/\bFerrato\b/Furrotto/gi; s/\bemcee/MC/gi; s/\bEnochian/Enokian/gi; s/Strategi-K/Strategic/gi; s/Eyephunk/I-Funk/gi; s/@/ at /gi; s/\bPerfect\b/Purfect/gi; s/\.([^\s])/ $1/gi; # lose dot in "foo .and more" s/ - / /gi; # lose long hyphens s/\bFrau\b/Frow/gi; s/\bFreebsd\b/Free B-S-D/gi; s/Gabe Real/Gabe Reel/gi; s/Genitorturers/Jenny-Torturers/gi; s/\bManufactura\b/Manufacturra/gi; s/\bGoethe\b/Gurta/gi; s/\bHatiras\b/Hah-Teerus/gi; s/\bybr\b/Y-B-R/gi; s/&/and/gi; s/\bRocc\b/Rock/gi; s/\bCino\b/Seeno/gi; s/\bSpesh\b/Spessh/gi; s/\bJito\b/G-Toe/gi; s/\bJocasta\b/Jacasta/gi; s/\bQuix\b/Quicks/gi; s/\bi-Z\b/Izz/gi; s/\bdi-Z\b/Dizz/gi; s/Kaskade/Cascade/gi; s/\bPhaler\b/Feller/gi; s/\bBobien\b/Bobby-N/gi; s/\blive\b/lyve/gi; s/\bkrs\b/K-R-S/gi; s/\bKTVU\b/K-T-V-U/gi; s/Lifesavas/Life-Savers/gi; s/Live105/Live One Oh Five/gi; s/\bRhymtyme\b/Rhyme-Time/gi; s/Geometrist/Geometryst/gi; s/\bMelyss\b/Meliss/gi; s/Baloff/Bayloff/gi; s/\bTello\b/Tell-o/gi; s/\bMykl\b/Michael/gi; s/Mystchief/Mischief/gi; s/Nursies/Nur-sees/gi; s/Neuromotor/Neuro-Motor/gi; s/Jaine/Jane/gi; s/Pathogen/Pathugen/gi; s/\bXiao\b/Zow/gi; # s/\bVIP\b/V-I-P/gi; s/Politix/Politics/gi; s/\bMCs\b/MCs/gi; s/\bPraxis\b/Pracksis/gi; s/Raison D.etre/Rayzen Dettra/gi; s/\bAmeet\b/A-Meet/gi; s/\bMancias\b/Manchez/gi; s/\bSaifir\b/Say-Fear/gi; s/\bSamira\b/Su-Meera/gi; s/\bIndymedia/Indy-Media/gi; s/\bSariah/Su-Riah/gi; s/\bLatelle/Ladell/gi; s/chupa\s*cabras?/Choopa Cobras/gi; s/\bSety\b/Setty/gi; s/Sigh chic/Psychic/gi; s/\bSOS\b/S-O-S/gi; s/\bBuffet\b/Buffae/gi; s/\bSpinderella/Spinderalla/gi; s/\bKPFA\b/KPFA/gi; s/\bStiletta\b/Stilletta/gi; s/\bDevil-Ettes/Devillettes/gi; s/E-poxy/Epoxie/gi; s/\bIdjut\b/Idiot/gi; s/\bPhenomenauts\b/Phenomenots/gi; s/\bMc-Guiness\b/McGuiness/gi; s/\bStakeaart\b/Stake-Art/gi; s/\bTofe\b/Toffe/gi; s/\bTrashina\b/Trash-eena/gi; s/\bTreavor\b/Trevor/gi; s/\bThreat\b/Thrett/gi; s/\bSkool\b/School/gi; s/\bTycho\b/Tyco/gi; s/\bUFO\!?/U-F-O/gi; s/\bNefertiti\b/Nefer-teetee/gi; s/\bXLR8R\b/Accelerator/gi; s/\bsfstation\b/S-F Station/gi; s/\bXia\b/Zow/gi; s/\bZeph\b/Zeff/gi; s/\bZombifiers\b/Zombie-fyers/gi; s/\bKAOS\b/Chaos/gi; s/\bfaux\b/foe/gi; s/\bpresents\b/prezents/gi; s/\bindie\b/Indy/gi; s/\bYO\b/Yo/g; s/\bMz\b/Miss/gi; s/\b(Mysterious) D\b/$1 Dee/gi; s/\b21\+/Twenty-one and over/gi; s/\b18\+/Eighteen and over/gi; s/\b18 and over/Eighteen and over/gi; s/\b21 and over/Twenty-one and over/gi; s/( and over)( with ID)/-and-over, $2/gi; s/\bEFF\b/E-F-F/gi; s/\bLexor\b/Leckser/gi; s/\bAlotta Boutt[^\s.;!]+/A Lotta Boo-tay/gi; s/\bStarre\b/Star/gi; s/\bDonimo\b/Dawnimo/gi; s/\bFuque\b/Fuck/gi; s/\bmon-?q\b/monk/gsi; s/\bltd\./limited/gsi; s/\bcontest\b/con-test/gsi; s/Wiedlin/Weedlin/gsi; s/first 100\b/first hundred/gsi; s/\bRahni\b/Ronny/gsi; s/(tranny)(shack)/$1 $2/gsi; s/\b24-7\b/Twenty Four Seven/gsi; s/\bDub\b/Dubb/gsi; s/\b(eye)(scream)\b/$1-$2/gsi; s/\b li'?l \b/little/gsix; s/de vire\b/dee veer/gsi; s/Metropolus/Metropolis/gsi; s/Roccopura/Rocko-poora/gsi; s/\bFaroff/Far-Off/gsi; s/\b(electro)(pop)/$1-$2/gsi; s/\bLA\b/L-A/gs; s/(Valentine)z/$1s/gsi; s/D[^a-z]*(Flower)/De Flower/gsi; s/(Press)(on)/$1-$2/gsi; s/\bDe[^a-z]*Ville\b/Deville/gsi; s/\bMynx\b/Minks/gsi; s/d[^a-z]*Meanor/Demeanor/gsi; s/\bDe[^a-z]*Lish\b/Deelish/gsi; s/\bLa[^a-z]*Roux\b/Laroux/gsi; s/\bTang\b/Taing/gsi; s/\bsq?urr+l\b/squirrel/gsi; s/\bDanyol\b/Daniel/gsi; s/\b3RDEYEGIRL\b/Third Eye Girl/gsi; s/\bHaute\b/Hot/gsi; s/\bIamdynamite\b/I Am Dynamite/gsi; s/\bSFIEC\b/S F I E C/gsi; s/\bDAMSF\b/D A M S F/gsi; s/\bR3y Guti3rr3z\b/Ray Gutierrez/gsi; s/\bMyster\b/Mister/gsi; s/\bZomboy\b/Zom Boy/gsi; s/\bA Plus D\b/Ay Plus Dee/gsi; s/\bK[. ]R[. ]I[. ]T\b/Crit/gsi; s/\bDza\b/Dizza/gsi; s/\bvis-a-vis\b/veese-a-vee/gsi; s/Flossafee/Philosophy/gs; s/\bL[.\s]*Rucus/L-Ruckus/gsi; s/\b(Ma?c)-([A-Z])/$1$2/g; # undo Mc-Gee -> McGee s/\bL-A Fey/La Fey/g; s/4-15 to 8-5-7-4417/4 1 5, 8 5 7, 4 4 1 7/g; # Oh geez ########################################################################### s/:(\s)/!$1/g; # colons -> exclam s/\.[.:;]*([.:;])/$1/g; # punctuation after period -> just that punct. s/;(\s)/. $1/g; # semicolons -> full stop s/\b([AP]-?M) *$/$1./gm; # full stop after times at end-of-line. s/&/&/g; s//after/g; s/\$(\d+)\b/$1 dollars/g; # s/(\s[A-Z]\.\n)/$1\n/gsi; # extra \n if line ends in single letter. s/\n\n+/\n/gsi; # strip blank lines s@\n\s*(All events)( are)@\n$1,$2@gs; s@\b(Thank you for calling!)@\n$1@gsi; s@\b(Transmission ends\.)@$1@gsi; s/^(\s*[a-z])/\U$1/gm; # Upcase first letter on line. s/\. *$/!/gm; # Use bang instead of period. $body = $_; my @lines = split(/\n/, $body); foreach (@lines) { if (m/^[^\s<]/si) { # date line $_ = "\n$_"; } elsif (m/^ /) { # small pause at each comma. s@,@,@gsi; } # unindent all lines. s/^ +//gm; s/ +$//gm; } $body = join ("\n", @lines); $body = ("\n" . "\n" . "\n" . $body . "\n\n"); # for Mac/Twilio, screw all that Sable stuff. $body =~ s/<[^<>]+>//gs; $body =~ s/^\s+//gs; $body =~ s/\s+$/\n/gs; $body =~ s/[ \t]+/ /gs; return $body; } sub sable_convert_file($$) { my ($infile, $outfile) = @_; open (my $in, '<', $infile) || error ("$infile: $!"); local $/ = undef; # read entire file my $body = <$in>; close $in; $body = sable_convert ($body); my $file_tmp = "$outfile.tmp"; open (my $out, '>', $file_tmp) || error ("$file_tmp: $!"); (print $out $body) || error ("$file_tmp: $!"); close $out; rename_or_delete ("$outfile", "$file_tmp"); } sub error($) { my ($err) = @_; print STDERR "$progname: $err\n"; exit 1; } sub usage() { print STDERR "usage: $progname [--verbose] infile outfile\n"; exit 1; } sub main() { my ($infile, $outfile); while ($_ = $ARGV[0]) { shift @ARGV; if ($_ eq "--verbose") { $verbose++; } elsif ($_ eq "--debug") { $debug++; } elsif (m/^-v+$/) { $verbose += length($_)-1; } elsif (m/^-./) { usage; } elsif (!defined($infile)) { $infile = $_; } elsif (!defined($outfile)) { $outfile = $_; } else { usage; } } usage unless ($infile && $outfile); sable_convert_file ($infile, $outfile); } main(); exit 0;