#!/usr/bin/perl -w # Copyright © 2005 Jamie Zawinski # # Converts "infoline.txt" to "infoline.sable" for the Festival speech synth. # # Permission to use, copy, modify, distribute, and sell this software and its # documentation for any purpose is hereby granted without fee, provided that # the above copyright notice appear in all copies and that both that # copyright notice and this permission notice appear in supporting # documentation. No representations are made about the suitability of this # software for any purpose. It is provided "as is" without express or # implied warranty. # # Created: 25-Mar-2002. require 5; use diagnostics; use strict; my $progname = $0; $progname =~ s@.*/@@g; my $data_dir = $0; $data_dir =~ s@/[^/]*$@@; my $version = q{ $Revision: 1.5 $ }; $version =~ s/^[^0-9]+([0-9.]+).*$/$1/; my $verbose = 0; my $debug = 0; my $max_events = 6; sub sable_convert($) { my ($body) = @_; $_ = $body; s/[*]//gm; # asterisks are bad! # make times be pronouncable. s/\b([\d:.]+)am\b/$1 A-M/gi; s/\b([\d:.]+)pm\b/$1 P-M/gi; # time ranges. s/([AP][- \.]M\.?) +- /$1 to /gi; s/(\d+|midnight|midnite) *- *(\d+|midnight|midnite)/$1 to $2/gi; # convert money ranges "$15 - $20" -> "15 to $20" s/\$(\d+) *- *\$(\d+)/$1 to \$$2/gi; # some latin1 characters... s/[âãäåæ]/a/gi; s/[èéêë]/e/gi; s/[ìíîï]/i/gi; s/[ðòóôõöø]/o/gi; s/[ùúûü]/u/gi; s/[ýÿ]/y/gi; s/[ññ]/n/gi; s/(x)x+/$1/gi; # multiple Xes can't be good. s/[*]//gm; # asterisks are bad! s![_/]!-!g; # no underscores or slashes... s/([a-z])([A-Z])/$1-$2/g; # "xA" -> "x-A" s/(``|'')/\"/g; # use double-quotes # convert three digit numbers: s/\b(\d)0([1-9])\b/$1-oh-$2/gi; # "606" -> "6-oh-6", "600" -> unchanged s/\b(\d)(1\d)\b/$1-$2/gi; # "616" -> "6-16", "610" -> "6-10" s/\b(\d)(\d)([1-9])\b/$1-$2-$3/gi; # "626" -> "6-2-6", "620" -> "6-20" # our address! s/\b3-7-5\b/three-seventy-five/gi; ########################################################################### s/\bID\b/I-D/gm; s/\bFolsom\b/Foal-some/gm; s/www\.dnalounge\.com\b/W-W-W dot DNA Lounge dot com/gm; s/\bdnalounge\.com\b/DNA Lounge dot com/gm; s/present\b/prezent/gi; s/\bn:code\b/N-Code/gi; s/\bd:code\b/D-Code/gi; s/:code\b/Code/gi; s/Blasthaus/Blast-House/gi; s/\b(joy)(pad)\b/$1-$2/gi; s/\b(pre)(sale)\b/$1-$2/gi; s/\b(nu)(jazz)\b/new-$2/gi; s/\b(ambient)(tech)\b/$1-$2/gi; s/\b(code)(con)\b/$1-$2/gi; s/\b(trip)(hop)\b/$1-$2/gi; s/\b(hip)(hop)\b/$1-$2/gi; s/\s+AKA\b/, AKA/gi; s/^(\s*OM)\.?$/$1 Records presents OM./gm; s/\bOM\b/Ohm/gi; s/\b4Real\b/For-Real/gi; s/\b(worm)(food)\b/$1-$2/gi; s/^\s*(Thump Radio) presents thump\./$1./gim; s/\bQ[OÖ][OÖ]L/Cool/gi; s/T\.R\.I\.P\./Trip/gi; s/\bJen[oö](\b|\s)/Yay-no$1/gi; s/\b70\'?s\b/seventies/gi; s/\b80\'?s\b/eighties/gi; s/\b90\'?s\b/nineties/gi; s/\bde ?luxe/D-Lucks/gi; s/\bpsy[- ]*/Sigh /gi; s/\bPA\b/P-A/g; s/\bBre-ad/Bry Add/gi; s/\biZ\b/Izz/gi; s/\bDiZ\b/Dizz/gi; s/\b(r[&]b|r and b)/are-N-bee/gi; s/\bsushi\b/sue-she/gi; s/\bblowfish\b/Blow-fish/gi; s/\bjorge\b/Hore-hay/gi; s/\bTerez\b/Tear-ez/gi; s/\bD\'Funk\'D\b/Dee-Funk-Dee/gi; s/\bLa Couqui\b/Le-Coke-Wee/gi; s/\bEzzy\b/Essie/gi; s/\bMarques\b/Mark-S/gi; s/\bJZ\b/Jay-Zee/gi; s/\b(bas)s\b/$1e/gi; # hah! let's just assume on this one... s/\bnu-/new-/gi; s/\bN8\b/Nate/gi; s/\bGhreg\b/Greg/gi; s/\bAeon\b/Eon/gi; s/\bHecate\b/Hecka-tay/gi; s/\bRaf-One\b/Raff-one/gi; s/\b(C)\.(Flav)/$1-$2/gi; # s/\b([a-z]+) ([A-Z])\b\.?/$1-$2/gi; # e.g., "Olga T.", "Miss E". s/\bInciter/Insider/gi; s/\bAndr[e\351]s/On-dray/gi; s/\bCraigslist/Craig\'s List/gi; s/\b\.(org|com|net)\b/ dot-$1/gi; s/\bKMFDM\b/K-M-F-D-M/gi; s/\b(Kidney)(thieves)\b/$1-$2/gi; s/\b(Louder)(milk)\b/$1-$2/gi; s/\bBeni\b/Benny/gi; s/\bMic\b/Mike/gi; s/\bSen[- ]*Sei\b/Sen-say/gi; s/\bm\.path\b/Empath/gi; s/\bDJL\b/D-J-L/gi; s/\bDragn\'?fly\b/Dragon-fly/gi; s/\bSteveo\b/Steve-O/gi; s/\bDaly\b/Daily/gi; s/(h)ayes\b/$1aze/gi; s/\bMiguel\b/Mig-L/gi; s/\bMijares\b/Me-har-es/gi; s/\bDomingu?ez\b/Domeen-guess/gi; s/\bFreq\b/Freak/gi; s/\bFreq(System)/Freak-$1/gi; s/\bStradley\b/Strad-lee/gi; s/\bMadame\b/Madam/gi; s/\bMomodance\b/Mo-Mo-Dance/gi; s/\bOjeda\b/O-heyda/gi; s/\bLatrice\b/Luh-Treez/gi; s/\bB:Smiley\b/Be-Smiley/gi; s/\b(Deep)(fat)(fried)\b/$1-$2-$3/gi; s/\bMcGarry\b/Mac-Garry/gi; s/\bj:miah\b/J-My-Uh/gi; s/\bA:Dubl\b/A-double/gi; s/\bRaffi\b/Raffy/gi; s/\bHalou\b/Ha-lou/gi; s/\bDexorcist\b/Decks-or-cyst/gi; s/\bSukeban\b/Soo-key-bun/gi; s/\bdancehall\b/dance-hall/gi; s/\bFlo-Ology\b/flow-ology/gi; s/trybe/ tribe/gi; s/\b(ever)(soul)\b/$1-$2/gi; s/\bRisque\b/ris-K/gi; s/\bKouture\b/co-tour/gi; s/\bZeromancer\b/zero-man-sir/gi; s/\bAd Noiseam\b/Add Noise-am/gi; s/\bMasumi\b/Ma-sue-me/gi; s/\bPuimond\b/Pwee-mond/gi; s/\b(hog)(tied)\b/$1-$2/gi; s/\b(hell)(raiser)\b/$1-$2/gi; s/\bGiannini\b/Gee-a-nee-nee/gi; s/\bSunil\b/Sue-neel/gi; s/\bKapadia\b/Kap-ah-dee-ah/gi; s/\bFu\b/Foo/gi; s/\b(Shin)(dog)\b/$1-$2/gi; s/\b(b)(boys)\b/$1-$2/gi; s/\bAkida\b/Ah-key-da/gi; s/\bUphonic\b/You-phonic/gi; s/\bWichdokta\b/Witch-Doctor/gi; s/\bRhi-?Know\b/Rhino/gi; s/\bsigh cho/Psycho/gi; # oops, undo change s/\bsigh nth/Synth/gi; # oops, undo change s/\bMuss?elman\b/Mussel-mun/gi; s/\bCarr?ell?i\b/Carelli/gi; s/\bsci[-\s]?fi\b/sigh-fie/gi; s/\bAphrodite\b/Afro-dytee/gi; s/\bAye~n\b/Eye-N/gi; s/\bIntalekt\b/Intellect/gi; s/\bGridlok\b/Grid-lock/gi; s/\bFemmes?\sFatales?/Fem Fuhtail/gi; s/\bE\$KR\b/Esker/gi; s/\bRamiro\b/Ramee-roh/gi; s/\bLivin\b/Living/gi; s/\bMiz\b/Mizz/gi; s/\bVideorama\b/Video-rama/gi; s/\bFakir\b/Fakeer/gi; s/\bMusafar\b/Moosafar/gi; s/\bKimo\b/Keemo/gi; s/\b(Ren)( the Vinyl)\b/$1,$2/gi; s/\bIvry\b/Ivory/gi; s/\bDe La\b/Deelah/gi; s/\bYzer\b/Wiser/gi; s/\bKontinuum\b/Continuum/gi; s/flava\b/flavor/gi; s/\b(gold)(chain)/$1 $2/gi; s/\bVasquez\b/Vass-kez/gi; s/\bKahn\b/Con/gi; s/\bIshmael\b/Ish-male/gi; s/\b(Ex)(clip)(sect)\b/$1-$2-$3/gi; s/matik\b/matic/gi; s/fakt\b/fact/gi; s/\bHaujobb?\b/How-job/gi; s/\bGoapele\b/Go-uhpell/gi; s/\bCapoeira/Cap-oh-aera/gi; s/(ontinu)(um)/$1-$2/gi; s/\bZ\'?s\b/zeez/gi; s/\bSiouxsie\b/Susie/gi; s/\bSioux\b/Sue/gi; s/\bMauricio\b/Marr-e-c-o/gi; s/\bAviles\b/A-vill-s/gi; s/\bPsysex\b/Sigh-sex/gi; s/\bA\.? ?T\.? ?U\.? ?M\.?/Atom/gi; s/\bHesohi\b/He-so-he/gi; s/chyld\b/-child/gi; s/\bRomanowski\b/Romanow-ski/gi; s/\b(Arma)(get)(it)(on)\b/$1-$2-$3-$4/gi; s/\b(Cyrus)(rex)\b/$1 $2/gi; s/\bVogt\b/Vote/gi; s/\bGaraj\b/Garage/gi; s/\bNaugacide\b/Noga-side/gi; s/\bSoulive\b/Soul-Live/gi; s/S[.-]U[.-]N\.?/Sun/gi; s/\b(Trouble)(maker)/$1-$2/gi; s/\bVoodou/Voodoo/gi; s/\b(Why)(sall)/$1-$2/gi; s/4m-?At-?T/format/gi; s/\b(axel)(son)/$1-$2/gi; s/\bWhalen\b/Whale-N/gi; s/\bAaron\b/Erin/gi; s/\b(bio)(flavor)/$1-$2/gi; s/\bBi-?pole/by-pole/gi; s/\b(B)(Moves)/$1-$2/gi; s/\b(brown)(boy)/$1-$2/gi; s/\bMathis/Math-S/gi; s/\bBoomschnazz/Boom-shnoz/gi; s/\bchaos/kaos/gi; s/\bConsuelo/con-sway-low/gi; s/\bD[^a-z]Ville/de-vill/gi; s/\bCRYKIT\b/Cricket/gi; s/(wild)(child)/$1-$2/gi; s/\bAju\b/A-jew/gi; s/\bMejia/May-uh/gi; s/\b(Dee)(android)/$1-$2/gi; s/\bDemilo/De-Mylo/gi; s/\bDhyon\b/dyon/gi; s/\bMoeller/Moller/gi; s/\b(Diva)(Sonic)/$1-$2/gi; s/\bDjall\b/DJ All/gi; s/\bChi-?Ka\b/cheeka/gi; s/(dub)(hub)/$1-$2/gi; s/\bEmile/E-meel/gi; s/\bFusch?ia/Feooshuh/gi; s/\bGalen\b/Gaylen/gi; s/\bGianni\b/Gee-annie/gi; s/\bGine\b/Gina/gi; s/\bRene\b/Rennay/gi; s/\bGir-?Lie8\b/Girlie 8/gi; s/\bGrimace\b/Grimis/gi; s/\bGraeme\b/Gray-M/gi; s/\bJaswho\??/Jazz-Who/gi; s/\bJu-?Ju\b/Jew-Jew/gi; s/\bHallucinogen/Heloosinajen/gi; s/\bBassix\b/basics/gi; s/\bJonene\b/Joe-nay-nay/gi; s/\bJeremiah\b/Jerry-my-ah/gi; s/\bTejada\b/Tay-hada/gi; # s/\bO[^a-z]Reilly\b/Oh-Ryeley/gi; s/\bKepi\b/Keppie/gi; s/\bKowe/Cow/gi; s/\bLogrey\b/Low-Gray/gi; s/\bL\'/El-/gi; s/\bIllapes\b/Illa-pess/gi; s/\bMalachy\b/Mella-ky/gi; s/\bMei\b/May/gi; s/\bLwun\b/Lun/gi; s/\bMerlot\b/Merlow/gi; s/\bMaeda\b/Mayda/gi; s/\bNtula\b/Entulla/gi; s/\bObu\b/Oboo/gi; s/\bOlli\b/Ollie/gi; s/\bOphelia\b/Ofeeleeuh/gi; s/\bLeath\b/Leeth/gi; s/\bGauthier\b/Gow-tee-a/gi; s/\bSherburne\b/Share-burn/gi; s/\b(Plate)(shifter)/$1-$2/gi; s/\bR[e3][^a-z]ach\b/Re-Ack/gi; s/\bRa-?Ven\b/Raven/gi; s/\bRhythmist\b/Rith-mist/gi; s/\bRob-ot\b/Robot/gi; s/\bsake\b/sa-kay/gi; s/\bGhael\b/Gale/gi; s/\bSifu\b/See-foo/gi; s/\b(Silence)(fiction)/$1-$2/gi; s/\bSolaris\b/Sole-eris/gi; s/\bInfinti\b/infinity/gi; s/\b(sound):/$1-/gi; s/\bGerrard\b/Jerard/gi; s/\b(Tee)(money)/$1-$2/gi; s/\bTenashus\b/Tenayshus/gi; s/\bDuvante\b/Doo-vantay/gi; s/\bTruby\b/Trooby/gi; s/\b(up)(rock)/$1-$2/gi; s/\bUriel\b/You-Real/gi; s/\bVajra\b/Vajruh/gi; s/dokta\b/-doctor/gi; s/\b(zipper)(spy)/$1-$2/gi; s/\bZach\b/Zack/gi; s/\bPauli\b/Polly/gi; s/\bBio-?Jeff\b/Byo Jeff/gi; s/\bHysterisis\b/Historeesis/gi; s/\bRoneous\b/Roney-us/gi; s/\bdominatrix\b/Dominaytrix/gi; s/\bEBM\b/E-B-M/gi; s/\bJwebb\b/Jay-Web/gi; s/\b(Grave)(dig)/$1-$2/gi; s/\bUnd\b/and/gi; s/(Spectacular)!\s*Spectacular!/$1-$1/gi; s/\$(\d+ *spoo+ky)/$1/gi; s/\bPharoah\b/Farrow/gi; s/\bDerby\b/Durby/gi; s/\b(lethal)(mix)\b/$1-$2/gi; s/\bJayvi\b/J-V/gi; s/\bKwai\b/Cuh-why/gi; s/\bGershoni\b/Gurshoni/gi; s/\bDaniella\b/Dan-yella/gi; s/\bColeman\b/Collmen/gi; s/\bDe Muerte\b/Dee Mware-Tay/gi; s/\bMacho\b/Moch-O/gi; s/\bSasquatcho\b/Sass-kwotch-O/gi; s/\bGigante\b/Gigantay/gi; s/\bSh(ei|ie)k\b/Sheek/gi; s/\bPhysique\b/Fuh-Zeek/gi; s/\bRasputiny\b/Rasputeeny/gi; s/\bEl Pollo/El Poyo/gi; s/\bVerdalet\b/Verdalay/gi; s/\bAriela\b/R-E-Ella/gi; s/\b(Morgen)(stern)\b/$1-$2/gi; s/\bLica Sto\b/Leeka Stow/gi; s/\bDiablo\b/Dee-ablo/gi; s/\bBlag\b/Blagg/gi; s/\brock[^a-z]+(n|and)[^a-z]+roll\b/rock-and-roll/gi; s/\b(Triple)(wide)/$1-$2/gi; s/\bIzzy/Issy/gi; s/\b(\d+)ND\b/$1th/gi; s/\b(\d+)RD\b/$1th/gi; s/\b(\d+)TH\b/$1th/gi; s/Dirtyhertz/dirty-hurts/gi; s/(Chub)(ettes)/$1-$2/gi; s/\bMidori\b/Medori/gi; s/(re)(schedule)/$1-$2/gi; s/W\.A\.S\.P\.?/Wasp/gi; s/P\.?A\.W\.S\.?/Paws/gi; s/\bKoi\b/Coy/gi; s/\bDickies\b/Dickees/gi; s/\bNYC\b/New York/gi; s/\b(Witch)(doctor)/$1-$2/gi; s/\btiki\b/teekee/gi; s/\bLeethalmix\b/Lethal Mix/gi; s/\bFauxnique\b/Foe-Neek/gi; s/\bQ[- ]*Bert\b/Q-Bert/gi; s/\b(death)(rock)\b/$1-$2/gi; s/\b(love)(slap)\b/$1-$2/gi; s/\b(pay)(back)\b/$1-$2/gi; s/\b(synth)(pop)\b/$1-$2/gi; s/\balt\b/olt/gi; s/\bDecay\b/D-K/gi; s/\bToph\b/Tofe/gi; s/\bNetik\b/Nettick/gi; s/\bFarina\b/Freena/gi; s/\bMicronaut\b/Micro-not/gi; s/"noise\.to\.signal\.05"/"Noise To Signal Oh Five"/gi; s/\bSolevibe\b/Soul-Vibe/gi; s/\bRelm\b/Realm/gi; s/\bDaniela\b/Dan-Yella/gi; s/\b2x4\b/two-by-four/gi; s/\b3x5\b/three-by-five/gi; s/\bgo[ -]?gos\b/go-go's/gi; s/\bopio\b/Oh-Pee-Oh/gi; s/D\.U\.S\.T\.?/Dust/gi; s/\bJunkies\b/Junky's/gi; s/\bKZSU\b/K-Zee-S-U/gi; s/\bEpoxies\b/E-poxy's/gi; s/\bGroovie\b/Groovy/gi; s/\bGhoulies\b/Goo-leez/gi; s/\b4onefunk\b/Four-One-Funk/gi; s/\bJae\b/Jay/gi; s/\b(hammer)(fall)\b/$1-$2/gi; s/\b(ed)(guy)\b/$1-$2/gi; s/\brox+\b/Rocks/gi; s/Dekonstrukt/Deconstruct/gi; s/\bdjs\b/DJs/g; s/\bDore\b/Door/g; s/\bcutest\b/qutest/gi; s/\bcorsetry\b/corsetree/gi; s/\bPresidente\b/Presidenty/gi; s/sigh chedelic/psychedelic/gi; s/\bDonaldson\b/Donldson/gi; s/-vs-/versus/gi; s/accoustic/acoostic/gi; s/Records/records/g; s/\bButoh\b/Bootoh/gi; s/\bCaliban\b/Callyban/gi; s/2na/tuna/gi; s/\bFortier\b/Forty-a/gi; s/Combichrist/Combi-Christ/gi; s/:CODEtv/Code TV/gi; s/\bCosmosis\b/Cosmosiss/gi; s/\bBardot\b/Bardo/gi; s/D-Soul-IV-Reel/D-Soul-For-Reel/gi; s/\bKUSF\b/KUSF/gi; s/\bDenki\b/Denky/gi; s/Depeche/Depesh/gi; s/\belementsf\b/Element SF/gi; s/\bDuvdev\b/Dove-Deff/gi; s/\bECTV\b/ECTV/gi; s/\bFerrato\b/Furrotto/gi; s/\bemcee/MC/gi; s/\bEnochian/Enokian/gi; s/Strategi-K/Strategic/gi; s/Eyephunk/I-Funk/gi; s/@/ at /gi; s/\bPerfect\b/Purfect/gi; s/\.([^\s])/ $1/gi; # lose dot in "foo .and more" s/ - //gi; # lose long hyphens s/\bFrau\b/Frow/gi; s/\bFreebsd\b/Free B-S-D/gi; s/Gabe Real/Gabe Reel/gi; s/Genitorturers/Jenny-Torturers/gi; s/\bManufactura\b/Manufacturra/gi; s/\bGoethe\b/Gurta/gi; s/\bHatiras\b/Hah-Teerus/gi; s/\bybr\b/Y-B-R/gi; s/&/and/gi; s/\bRocc\b/Rock/gi; s/\bCino\b/Seeno/gi; s/\bSpesh\b/Spessh/gi; s/\bJito\b/G-Toe/gi; s/\bJocasta\b/Jacasta/gi; s/\bQuix\b/Quicks/gi; s/\bi-Z\b/Izz/gi; s/\bdi-Z\b/Dizz/gi; s/Kaskade/Cascade/gi; s/\bPhaler\b/Feller/gi; s/\bBobien\b/Bobby-N/gi; s/\blive\b/lyve/gi; s/\bkrs\b/K-R-S/gi; s/\bKTVU\b/K-T-V-U/gi; s/Lifesavas/Life-Savers/gi; s/Live105/Live One Oh Five/gi; s/\bRhymtyme\b/Rhyme-Time/gi; s/Geometrist/Geometryst/gi; s/\bMelyss\b/Meliss/gi; s/Baloff/Bayloff/gi; s/\bTello\b/Tell-o/gi; s/\bMykl\b/Michael/gi; s/Mystchief/Mischief/gi; s/Nursies/Nur-sees/gi; s/Neuromotor/Neuro-Motor/gi; s/Jaine/Jane/gi; s/Pathogen/Pathugen/gi; s/\bXiao\b/Zow/gi; s/\bVIP\b/V-I-P/gi; s/Politix/Politics/gi; s/\bMCs\b/MCs/gi; s/\bPraxis\b/Pracksis/gi; s/Raison D.etre/Rayzen Dettra/gi; s/\bAmeet\b/A-Meet/gi; s/\bMancias\b/Manchez/gi; s/\bSaifir\b/Say-Fear/gi; s/\bSamira\b/Su-Meera/gi; s/\bIndymedia/Indy-Media/gi; s/\bSariah/Su-Riah/gi; s/\bLatelle/Ladell/gi; s/chupa\s*cabras?/Choopa Cobras/gi; s/\bSety\b/Setty/gi; s/Sigh chic/Psychic/gi; s/\bSOS\b/S-O-S/gi; s/\bBuffet\b/Buffae/gi; s/\bSpinderella/Spinderalla/gi; s/\bKPFA\b/KPFA/gi; s/\bStiletta\b/Stilletta/gi; s/\bDevil-Ettes/Devillettes/gi; s/E-poxy/Epoxie/gi; s/\bIdjut\b/Idiot/gi; s/\bPhenomenauts\b/Phenomenots/gi; s/\bMc-Guiness\b/McGuiness/gi; s/\bStakeaart\b/Stake-Art/gi; s/\bTofe\b/Toffe/gi; s/\bTrashina\b/Trash-eena/gi; s/\bTreavor\b/Trevor/gi; s/\bThreat\b/Thrett/gi; s/\bSkool\b/School/gi; s/\bTycho\b/Tyco/gi; s/\bUFO\!?/U-F-O/gi; s/\bNefertiti\b/Nefer-teetee/gi; s/\bXLR8R\b/Accelerator/gi; s/\bsfstation\b/S-F Station/gi; s/\bXia\b/Zow/gi; s/\bZeph\b/Zeff/gi; s/\bZombifiers\b/Zombie-fyers/gi; s/\bKAOS\b/Chaos/gi; s/\bfaux\b/foe/gi; s/\bpresents\b/prezents/gi; s/\bindie\b/Indy/gi; s/\bYO\b/Yo/g; ########################################################################### s/:(\s)/!$1/g; # colons -> exclam s/\.[.:;]*([.:;])/$1/g; # punctuation after period -> just that punct. s/;(\s)/. $1/g; # semicolons -> full stop s/\b([AP]-M) *$/$1./gm; # full stop after times at end-of-line. s/&/&/g; s//after/g; # s/(\s[A-Z]\.\n)/$1\n/gsi; # extra \n if line ends in single letter. s/\n\n+/\n/gsi; # strip blank lines s@\n\s*(All events)( are)@\n$1,$2@gs; s@\b(Thank you for calling!)@$1@gsi; s@\b(Transmission ends\.)@$1@gsi; $body = $_; my @lines = split(/\n/, $body); foreach (@lines) { if (m/^[^\s<]/si) { # date line $_ = "\n$_"; } elsif (m/^ /) { # small pause at each comma. s@,@,@gsi; } # unindent all lines. s/^ +//gm; s/ +$//gm; } $body = join ("\n", @lines); $body = ("\n" . "\n" . "\n" . $body . "\n\n"); # for Mac, screw all that Sable stuff. $body =~ s/<[^<>]+>//gs; $body =~ s/^\s+//gs; $body =~ s/\s+$/\n/gs; return $body; } sub sable_convert_file($$) { my ($infile, $outfile) = @_; my $body = ''; local *IN; open (IN, "<$infile") || error ("$infile: $!"); while () { $body .= $_; } close IN; $body = sable_convert ($body); #### no rename_or_delete ... local *OUT; open (OUT, ">$outfile") || error ("$outfile: $!"); print OUT $body; close OUT; } sub error($) { my ($err) = @_; print STDERR "$progname: $err\n"; exit 1; } sub usage() { print STDERR "usage: $progname [--verbose] infile outfile\n"; exit 1; } sub main() { my ($infile, $outfile); while ($#ARGV >= 0) { $_ = shift @ARGV; if ($_ eq "--verbose") { $verbose++; } elsif (m/^-v+$/) { $verbose += length($_)-1; } elsif (m/^-./) { usage; } elsif (!defined($infile)) { $infile = $_; } elsif (!defined($outfile)) { $outfile = $_; } else { usage; } } sable_convert_file ($infile, $outfile); } main(); exit 0;