#!/usr/bin/perl -w # Copyright © 2018-2021 Jamie Zawinski # # Permission to use, copy, modify, distribute, and sell this software and its # documentation for any purpose is hereby granted without fee, provided that # the above copyright notice appear in all copies and that both that # copyright notice and this permission notice appear in supporting # documentation. No representations are made about the suitability of this # software for any purpose. It is provided "as is" without express or # implied warranty. # # This is an extremely simple WebSocket server for DNA Lounge internal apps, # including the webcast chat and the music videos request system. # # # *************************************************************************** # # PLEASE NOTE! # # As far as the public webcast video chat system goes, this code obviously # makes *no* attempt at authentication or validation, and does only the # tiniest amount of input verification. Without much effort, you can # probably find a way to shit on other people's fun. # # Just know that nobody is impressed by that. So please don't. # # Play nice. # # *************************************************************************** # # # # Details: # # - Any messages you send are sent to every other participant who connected # with the same path part. (Messages are not echoed back to yourself). # # - On the chat channel, the last hour's worth of messages are echoed # back at connect time, to populate the log with a bit of history. # # - For non-public channels, connections are denied unless the https # set-up presents a valid DNA_AUTH cookie (which means, JavaScript can # only connect to this server if the invoking browser has already logged # in). Exceptions include: # # - The client IP is localhost, or the server IP. # - The client IP matches a per-channel whitelist. # # - You say "ping" I say "pong". # # - To restrict to same-origin, use: --origin yourdomain.com # This means that connections will be rejected when initiated from # JavaScript that was not served from your domain, to avoid cross-site # hijacking and exfiltration of your authentication cookies. # # # There's SSL support, but it was a bad idea and you shouldn't use it: # # It turns out that the IO::Socket::SSL module is not suitable for use in # servers, because as it tries to transparently hide SSL and pretend that # it is just a plain old socket, it does blocking reads at times when # non-blocking behavior is expected. That means that if a client # disconnects at just the wrong time, the WebSocket server will randomly # hang and stop servicing any connections at all. Hooray! # # A better approach is to run the unencrypted websocket server on an # internal, private IP, and let Apache handle the crypto while proxying # for it: # # LoadModule proxy_module modules/mod_proxy.so # LoadModule proxy_wstunnel_module modules/mod_proxy_wstunnel.so # # # ProxyPass / "ws://localhost:[PRIVATE-PORT]/" disablereuse=on # ProxyPassReverse / "ws://localhost:[PRIVATE-PORT]/" disablereuse=on # SSLEngine on # SSLCertificateKeyFile [...etc...] # # # This is also better practice because Apache is far less likely to # contain an exploit that will leak your private keys than is some # crumbling edifice of Perl libraries glommed together from various # questionable sources. # # BUT! Each open connection will consume an Apache preform process and # will blow through your ServerLimit faster than you expected. # If you expect to have hundreds of websock clients staying connected, # proxying SSL with "haproxy" works better than Apache. # # frontend websock_ssl # option http-server-close # bind 0.0.0.0:NNNN ssl crt XXX.pem # backend websock_backend # server websock-local 127.0.0.1:MMMM # # Also note that browsers don't like self-signed certs in websockets. # # Created: 6-Sep-2018. require 5; use diagnostics; use strict; use POSIX; use Socket; use IO::Socket::SSL; use Net::WebSocket::Server; use Sys::Hostname; # Not yours BEGIN { push @INC, "/var/www/dnalounge/utils/"; } use dna_auth; use open ":encoding(utf8)"; my $progname = $0; $progname =~ s@.*/@@g; $progname =~ s/\.pl$//s; my ($version) = ('$Revision: 1.32 $' =~ m/\s(\d[.\d]+)\s/s); my $verbose = 0; my $debug_p = 0; my %channel_info = ( '^/jwztv' => { #whitelist => '^(50\.0\.18\.32|75\.101\.62\.19)$', whitelist => '.', }, '^/irc' => { connect => \&irc_connect_hook, text => \&irc_text_hook, whitelist => '.', blacklist => '^(63\.80\.184\.76)$', }, ); my $irc_log_file_base = '/var/log/dnairc'; my $irc_metadata_file = '/home/archive/metadata.txt'; my $irc_metadata2_file = '/home/archive/metadata2.txt'; my $server = undef; # If we don't call shutdown() upon signals and other abnormal exits, then # the kernel keeps listening on our port number for up to 30 seconds, and # we can't re-launch until that times out. How very. # # But I guess sometimes that happens anyway? WTF. # END { LOG ("END") if ($verbose); $server->shutdown() if $server; } sub signal_cleanup($) { my ($s) = @_; LOG ("SIG$s"); $server->shutdown() if $server; $server = undef; exit (2); # This causes END{} to run. } $SIG{TERM} = \&signal_cleanup; # kill $SIG{INT} = \&signal_cleanup; # shell ^C $SIG{QUIT} = \&signal_cleanup; # shell ^| $SIG{KILL} = \&signal_cleanup; # nope $SIG{ABRT} = \&signal_cleanup; $SIG{HUP} = \&signal_cleanup; sub url_unquote($) { my ($url) = @_; $url =~ s/[+]/ /g; $url =~ s/%([a-z0-9]{2})/chr(hex($1))/ige; return $url; } sub channel_info($$) { my ($channel, $field) = @_; foreach my $k (sort keys %channel_info) { if ($channel =~ m/$k/s) { return $channel_info{$k}->{$field}; } } return undef; } sub launch_server($$;$$$) { my ($sslp, $port, $key, $crt, $origin) = @_; LOG ("listening on port $port") if ($verbose); $IO::Socket::SSL::DEBUG = $verbose-1 if ($verbose); $SSL_ERROR = ''; error ("origin should be a domain name: $origin") if (defined($origin) && !($origin =~ m/^[-_a-z\d]+(\.[-_a-z\d]+)+$/s)); my $S = ($sslp ? IO::Socket::SSL->new (Listen => 5, LocalPort => $port, SSL_cert_file => $crt, SSL_key_file => $key, ReuseAddr => 1) : IO::Socket::INET->new (Listen => 5, LocalPort => $port, ReuseAddr => 1)); error ("socket failed: $! $SSL_ERROR") unless $S; # Sometimes the accept() down inside WebSocket/Server.pm blocks inside # of IO::Socket::SSL if the client hangs up during SSL setup, as happens # (I think) when a phone briefly wakes up, starts to reconnect, and then # sleeps again before SSL is done. I had hoped that this would set that # accept() to non-blocking, but it does not: # $S->blocking(0); # Try to avoid "Address already in use" when restarting. # Since END calls shutdown, this shouldn't be necessary. # Also, this doesn't help. The fucking kernel is still marking our # port as "in use" for 30+ seconds after we have cleanly exited, # kernel 2.6.32 Centos 6.2. How very. # # Need to do this in ->new instead, maybe? # # setsockopt ($S, SOL_SOCKET, SO_REUSEADDR, 1) || # LOG ("SO_REUSEADDR failed: $!"); # # Update: Apparently the fix to this was to set ReuseAddr above. my $server_ip = inet_ntoa ((gethostbyname (hostname()))[4]); $server = Net::WebSocket::Server->new (listen => $S, on_connect => sub { my ($serv, $conn) = @_; $conn->on ( handshake => sub { my ($conn, $handshake) = @_; my $client_ip = $conn->ip(); my $client_port = $conn->port(); # Save the path part of the URL into $conn so that we can tell # which connections are a part of the same "room". my $res = $handshake->req->resource_name || ''; $conn->{resource_name} = $res; # If this was proxied by Apache, find the original source IP # for access control. if (($client_ip eq $server_ip || $client_ip eq '127.0.0.1') && $handshake->req && $handshake->req->fields) { my $ip2 = $handshake->req->fields->{'x-forwarded-for'}; if ($ip2) { LOG ("$res: proxy: $client_ip -> $ip2") if ($verbose); $client_ip = $ip2; } } # For simplicity $client_ip = '127.0.0.1' if ($client_ip eq $server_ip); # Save the IP and port for later diagnostics, since disconnect() # doesn't receive a connection object that still has an IP. my $cid = "$client_ip:$client_port$res"; $conn->{cid} = $cid; $conn->{client_ip} = $client_ip; # Make sure the JS code is hosted from our domain. if ($origin) { my $ourl = $handshake->res->origin || ''; my ($ohost) = ($ourl =~ m@^[^:/]+://([^:/]+)@s); if ($ohost && $ohost =~ m/\b\Q$origin\E$/si) { LOG ("$cid: origin matches: $origin, $ourl") if ($verbose); } else { LOG ("$cid: origin mismatch: $origin, $ourl"); $conn->disconnect(); return; } } my $auth_required_p = !$debug_p; if ($client_ip eq '127.0.0.1') { LOG ("$cid: localhost $client_ip") if ($verbose); $auth_required_p = 0; } else { my $wre = channel_info ($res, 'whitelist'); my $bre = channel_info ($res, 'blacklist'); if ($wre && $client_ip =~ m/$wre/s) { LOG ("$cid: whitelist $res $client_ip") if ($verbose); $auth_required_p = 0; } if ($bre && $client_ip =~ m/$bre/s) { LOG ("$cid: blacklist $res $client_ip") if ($verbose); $auth_required_p = 1; } } my $cookie = undef; if ($handshake->req && $handshake->req->cookies && ref($handshake->req->cookies) eq 'Protocol::WebSocket::Cookie' && ref($handshake->req->cookies->pairs)) { foreach my $pair (@{$handshake->req->cookies->pairs}) { if ($pair->[0] eq 'DNA_AUTH') { $cookie = url_unquote ($pair->[1]); last; } } } if (!$cookie && $auth_required_p) { LOG ("$cid: handshake: no DNA cookie"); $conn->disconnect(); return; } if ($cookie && !dna_auth::dna_auth_validate_login_cookie ($cookie)) { LOG ("$cid: handshake: invalid DNA cookie" . ($debug_p ? ', DEBUG' : '')); if ($auth_required_p) { $conn->disconnect(); return; } } LOG ("$cid: handshake: authorized") if ($verbose); }, ready => sub { my ($conn) = @_; LOG ($conn->{cid} . ": ready") if ($verbose); my $res = $conn->{resource_name}; my $hook = channel_info ($res, 'connect'); $hook->($conn) if $hook; }, utf8 => sub { my ($conn, $msg) = @_; my $res = $conn->{resource_name}; # I tried a few things with Encode::decode to ensure that $msg # only contains valid UTF-8 data, but I couldn't figure it out. # If it doesn't (and how does that even happen??) we get an # error message from down inside send_utf8(). LOG ($conn->{cid} . ": utf8: $msg") if ($verbose); my $hook = channel_info ($res, 'text'); $msg = $hook->($conn, $msg) if $hook; if (!defined($msg) || $msg eq '') { # Silently reject - hook logs it. return; } if ($msg =~ m/^ping\s*$/s) { $conn->send_utf8 ("pong\n"); return; } foreach my $conn2 ($conn->server->connections) { if ($conn ne $conn2 && $res eq $conn2->{resource_name}) { $conn2->send_utf8 ($msg); } } }, binary => sub { my ($conn, $msg) = @_; my $res = $conn->{resource_name}; LOG ($conn->{cid} . ": binary: " . length($msg) . " bytes") if ($verbose); foreach my $conn2 ($conn->server->connections) { if ($conn ne $conn2 && $res eq $conn2->{resource_name}) { $conn2->send_binary ($msg); } } }, pong => sub { my ($conn) = @_; LOG ($conn->{cid} . ": pong") if ($verbose); }, disconnect => sub { my ($conn) = @_; my $res = $conn->{resource_name}; my $hook = channel_info ($res, 'disconnect') if $res; $hook->($conn) if $hook; if ($verbose) { my $cid = $conn->{cid}; my $err = $conn->{handshake} && $conn->{handshake}->{error}; LOG (($cid ? "$cid: " : "") . "disconnect" . ($err ? ": $err" : "")); } }, ), }, on_tick => sub { my ($serv) = @_; LOG ("tick") if ($verbose); }, on_shutdown => sub { my ($serv) = @_; LOG ("shutdown") if ($verbose); # Try closing each client explicitly. But only once. # Maybe this fixes the "Address already in use" problem? # if (! $serv->{shutting_down_p}) { $serv->{shutting_down_p} = 1; foreach my $conn ($serv->connections) { LOG ($conn->{cid} . ": shutdown disconnect") if ($verbose); $conn->disconnect (1001); $conn->{listen}->close() if ($conn->{listen}); } } }, ); eval { $server->start(); }; LOG ("error: $@") if ($@); exit (1); # run END and call shutdown() } ######################################################################## # # A few extra things for the /irc/ channels. # ######################################################################## # Log all IRC conversations. Each channel gets its own log file. # sub irc_logfile($) { my ($conn) = @_; my $res = $conn->{resource_name}; $res = lc($res); $res =~ s@^/irc/?@@s; $res =~ s/[^a-z\d]//s; # Sanitize return "$irc_log_file_base-$res.log"; } # Returns the new text. Blank to reject. # sub irc_text_hook($$) { my ($conn, $msg) = @_; my $client_ip = $conn->{client_ip} || '?'; my ($time, $user, $txt) = ($msg =~ m/^(\d+)\t([^\t]+)\t(.*)$/s); if (!$txt || $txt !~ m/[^\s]/s || !$user || length($user) > 25 || $user !~ m/^[a-z\d][^\s]+$/i || length($txt) > 600 || # This doesn't work anyway, but how about you try not to be a jerk? Thx. $txt =~ m/< \s* (IMG | SVG | IFRAME | BR | H\d | BODY | STYLE | META )\b | \b onload \s* = | \b eval \s* \( /six || # Ctrl chars (spammer from an IRC bridge used ^C etc as MIRC color codes) $txt =~ m/[\001-\010\013\014\016-\037]/s ) { if ($client_ip ne '127.0.0.1') { # No filter from localhost LOG ("$client_ip: IRC: reject: $msg"); return ""; } } my $file = irc_logfile ($conn); my $log = $msg; $log =~ s/\n/ /gs; $log = "$client_ip\t$log\n"; if ($msg =~ m/^title\d*\t/s) { LOG ("$file: not logging $msg") if ($debug_p); } elsif (open (my $out, '>>:utf8', $file)) { print $out $log; close $out; } else { LOG ("$file: $!"); } LOG ("$client_ip: IRC: $user: $txt") if ($user); return $msg; } # When a client connects to an IRC channel, play back the last hour's worth # of traffic on the channel. # sub irc_connect_hook($) { my ($conn) = @_; my $file = irc_logfile ($conn); my ($metadata, $metadata2); # Read the current metadata to generate fake "title" and "title2" lines. # Otherwise the user would have to wait until the next time the track # changed, which could be minutes from now. if (open (my $in, '<:utf8', $irc_metadata_file)) { $metadata = <$in>; $metadata =~ s/\s+$//s if ($metadata); close $in; } if (open (my $in, '<:utf8', $irc_metadata2_file)) { $metadata2 = <$in>; $metadata2 =~ s/\s+$//s if ($metadata2); close $in; } # Read :raw in case we seek() into the middle of a UTF-8 sequence. if (open (my $in, '<:raw', $file)) { my $size = (stat($in))[7]; my $seek = 1024 * 100; if ($size > $seek) { seek ($in, $size - $seek, 0); # || error ("seeking $file: $!"); } # Replay any comments within the last hour, or the last 20 comments, # whichever is larger. # my $now = time(); my $start = $now - (60 * 60); my $min_lines = 20; my $max_lines = 200; my @lines = (); while (<$in>) { my $line = $_; my ($ip, $line2, $time, $user, $txt) = ($line =~ m/^ (?: ([\d.]+) \t )? ( (\d+) \t ([^\t]+) \t (.*) ) $/sx); next unless $txt; push @lines, [$time, $line2]; # Remove oldest line if we've hit minimum, and it's earlier than $start. shift @lines if (@lines > $min_lines && $lines[0]->[0] < $start); } push @lines, [0, "title\t$metadata\n"] if ($metadata); push @lines, [0, "title2\t$metadata2\n"] if ($metadata2); @lines = @lines[-$max_lines .. -1] if (@lines > $max_lines); foreach my $L (@lines) { my $line = $L->[1]; $line =~ s/\n$//s; utf8::decode ($line); # Pack UTF-8 into wide chars. $conn->send_utf8 ($line); } my $count = scalar(@lines); LOG ($conn->{cid} . ": replayed $count old messages") if ($verbose && $count); close $in; } } ######################################################################## sub LOG($) { my ($s) = @_; # Redundant with systemd, which sends stderr to syslog. print STDERR "$progname: " . strftime("%H:%M:%S", localtime) . ": $s\n" if ($debug_p); system ('logger', '-t', $progname, $s); } sub error($) { my ($err) = @_; LOG ("ERROR: $err"); exit 1; } sub usage() { print STDERR "usage: $progname [--verbose] [--port P]" . " [--origin DOMAIN]" . # " [--ssl] [--certs PRIVKEY FULLCHAIN]" . "\n"; exit 1; } sub main() { binmode (STDERR, ':utf8'); my $port = undef; my ($sslp, $crt, $key, $origin); while (@ARGV) { $_ = shift @ARGV; if (m/^--?verbose$/) { $verbose++; } elsif (m/^-v+$/) { $verbose += length($_)-1; } elsif (m/^--?debug$/) { $debug_p++; } elsif (m/^--?port$/) { $port = 0 + shift @ARGV; } elsif (m/^--?ssl$/) { $sslp = 1; } elsif (m/^--?certs?$/) { $key = shift @ARGV; $crt = shift @ARGV; } elsif (m/^--?origin$/) { $origin = shift @ARGV; } elsif (m/^-./) { usage; } else { usage; } } usage unless ($port && $port > 80); launch_server ($sslp, $port, $key, $crt, $origin); } main(); exit 0;