From 8bc3500a3e10d4b8812c9d41d1374c2772ec95c9 Mon Sep 17 00:00:00 2001 From: Nick Bowler Date: Wed, 23 Feb 2011 18:36:37 -0500 Subject: [PATCH] Initial commit --- WTFPL-2 | 13 ++ shortenurl.pl | 328 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 341 insertions(+) create mode 100644 WTFPL-2 create mode 100644 shortenurl.pl diff --git a/WTFPL-2 b/WTFPL-2 new file mode 100644 index 0000000..d23841b --- /dev/null +++ b/WTFPL-2 @@ -0,0 +1,13 @@ + DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE + Version 2, December 2004 + + Copyright (C) 2004 Sam Hocevar + 22 rue de Plaisance, 75014 Paris, France + Everyone is permitted to copy and distribute verbatim or modified + copies of this license document, and changing it is allowed as long + as the name is changed. + + DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. You just DO WHAT THE FUCK YOU WANT TO. diff --git a/shortenurl.pl b/shortenurl.pl new file mode 100644 index 0000000..648a7ae --- /dev/null +++ b/shortenurl.pl @@ -0,0 +1,328 @@ +# Copyright © 2011 Nick Bowler +# +# License WTFPL2: Do What The Fuck You Want To Public License, version 2. +# This is free software: you are free to do what the fuck you want to. +# There is NO WARRANTY, to the extent permitted by law. +# +# ShortenURL implements an HTTP URL shortener for Irssi. It consists of two +# parts: an HTTP server to handle the short URLs, and a message filter which +# replaces long URLs with short ones. Message filtering occurs after logging, +# so the log files should contain original URLs (which is good because the +# shortened URLs cease to be valid once the script is unloaded). +# +# In IRC discussions, people often use URLs as words in a sentence. For +# instance, +# +# < luser> Check out my awesome webpage at +# http://example.org/I_like_very_long_urls_for_no_reason! +# +# If we matched the longest possible valid URIs, we would shorten the full URL +# including the trailing exclamation point. But in this case, it is possible +# that the exclamation point was not intended to be interpreted as part of the +# URL. Since ShortenURL can't possibly decide whether or not trailing +# punctuation is part of the sentence structure or the URL, it employs a simple +# rule: any punctuation immediately following a URL is not considered part of +# the URL for shortening. To handle the situation where the punctuation is +# part of the URL, the HTTP server permits arbitrary punctuation to follow any +# shortened URL. Any such trailing punctuation will be appended to the long +# URL. +# +# ShortenURL has a number of options: +# +# shortenurl_url_addr (default: "::1") +# The address that ShortenURL will use for the short URLs that it +# generates. This is useful if you run Irssi on a remote machine. +# +# shortenurl_bind_addr (default: "") +# The bind address and/or port of the ShortenURL HTTP server. +# +# shortenurl_level (default: msgs public) +# The message levels for which ShortenURL will translate URLs. +# +# shortenurl_redirect (default: ON) +# If enabled, HTTP responses will return a 301 Moved Permanently +# status which will normally cause browsers to load the full URL +# automatically. If disabled, you will be presented with an HTML +# document that links to the full URL instead. Regardless of this +# setting, it is possible to visit the redirect page by appending +# ?safe to the short URL. +# +# shortenurl_threshold (default: 0) +# The minimum length (in bytes) that a URL needs to be to be considered +# for shortening. No matter what this is set to, ShortenURL will never +# replace a URL unless the replacement is actually shorter. + +use strict; +use feature "state"; +use vars qw($VERSION %IRSSI); + +use IO::Socket::INET6; +use Socket qw(:crlf); +use Errno qw(:POSIX); +use Encode; + +use MIME::Base64 qw(encode_base64url); +use Digest::SHA qw(sha224); +use Regexp::Common qw(URI); +use XML::Generator; +use HTTP::Parser; + +$VERSION = '0.8'; +%IRSSI = ( + authors => 'Nick Bowler', + contact => 'nbowler@draconx.ca', + name => 'ShortenURL', + description => 'Automatically shortens long URLs.', + license => 'WTFPL2', +); + +Irssi::settings_add_str ("shortenurl", "shortenurl_url_addr", "::1"); +Irssi::settings_add_str ("shortenurl", "shortenurl_bind_addr", ""); +Irssi::settings_add_level("shortenurl", "shortenurl_level", "msgs public"); +Irssi::settings_add_bool ("shortenurl", "shortenurl_redirect", 1); +Irssi::settings_add_int ("shortenurl", "shortenurl_threshold", 0); + +my $listenport = 0; +my %urihash; + +sub conn_close { + my ($conn) = @_; + + Irssi::timeout_remove($conn->{timetag}); + Irssi::input_remove($conn->{datatag}); + close $conn->{sock}; +} + +sub xhtml_generator { + my $xhtml_w3c = '"-//W3C//DTD XHTML 1.1//EN"'; + my $xhtml_dtd = '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"'; + + return XML::Generator->new( + pretty => 2, + conformance => 'strict', + encoding => 'utf-8', + namespace => ["http://www.w3.org/1999/xhtml"], + dtd => ["html", "PUBLIC", $xhtml_w3c, $xhtml_dtd], + ); +} + +sub error_page { + my ($resp) = @_; + + my $g = xhtml_generator; + my $doc = $g->html( + $g->head($g->title($resp->status_line)), + $g->body( + $g->h1($resp->status_line) + ) + ); + + return $g->xmldecl . "$doc\n"; +} + +sub error_response { + my ($resp, $code, $method) = @_; + + $resp->code($code); + $resp->content(Encode::encode_utf8(error_page($resp))); + $resp->header("Content-Type" => "application/xhtml+xml; charset=utf-8"); + $resp->header("Content-Length" => length $resp->content); + + if (defined $method && $method eq "HEAD") { + $resp->content(undef); + } + + return $resp->as_string($CRLF); +} + +sub redir_page { + my ($loc) = @_; + + my $g = xhtml_generator; + my $doc = $g->html( + $g->head($g->title("ShortenURL Redirector")), + $g->body( + $g->h1("ShortenURL Redirector"), + $g->p($g->a({href=>$loc}, $loc)) + ) + ); + + return $g->xmldecl . "$doc\n"; +} + +sub redir_response { + my ($resp, $loc, $method) = @_; + + $resp->content(Encode::encode_utf8(redir_page($loc))); + $resp->header("Content-Type" => "application/xhtml+xml; charset=utf-8"); + $resp->header("Content-Length" => length $resp->content); + + if ($method eq "HEAD") { + $resp->content(undef); + } + + return $resp->as_string($CRLF); +} + +sub conn_process { + my ($conn) = @_; + my ($digest, $base, $tail); + my $do_redirect = Irssi::settings_get_bool("shortenurl_redirect"); + + my $resp = HTTP::Response->new(200); + my $req = $conn->{parser}->object; + + $resp->header(Server => "ShortenURL/$VERSION"); + + # Respond using the same protocol as the request. + # (The X-Http-Version header is added by HTTP::Parser) + $resp->protocol("HTTP/" . $req->header("X-Http-Version")); + + # Check the request type + unless ($req->method eq "GET" || $req->method eq "HEAD") { + $conn->{output} = error_response($resp, 501); + return; + } + + # Check the URI format + unless ($req->uri->path =~ m/^\/([-_[:alnum:]]+)([?!;,.]*)$/) { + $conn->{output} = error_response($resp, 404, $req->method); + return; + } + + ($digest, $tail) = ($1, $2); + unless (defined $urihash{$digest}) { + $conn->{output} = error_response($resp, 404, $req->method); + return; + } + + if ($req->uri->query =~ m/safe/i) { + $do_redirect = 0; + } + + $base = $urihash{$digest}; + if ($do_redirect) { + $resp->code(301); + $resp->header("Location" => "$base$tail"); + } + $conn->{output} = redir_response($resp, "$base$tail", $req->method); +} + +sub conn_write { + my ($conn) = @_; + my $rc; + + unless (length $conn->{output} > 0) { + conn_close($conn); + return; + } + + $rc = $conn->{sock}->send($conn->{output}, 0); + if (!defined $rc) { + print CLIENTERROR "shortenurl write failure: $!"; + conn_close($conn); + return; + } + + $conn->{output} = substr($conn->{output}, $rc); +} + +sub conn_read { + my ($conn) = @_; + my $buf; + + unless (defined $conn->{sock}->recv($buf, 1024, MSG_DONTWAIT)) { + unless ($!{EWOULDBLOCK}) { + print CLIENTERROR "shortenurl read failure: $!"; + conn_close($conn); + } + + return; + } + + unless (length $buf > 0) { + # Remote closed the connection, clean up. + conn_close($conn); + return; + } + + unless (defined $conn->{parser}) { + $conn->{parser} = HTTP::Parser->new( request => 1 ); + } + + # HTTP::Parser likes to die gratuitously, so trap that. + eval { + $_ = $conn->{parser}->add($buf); + + # Since we don't support any requests with content, + # we can treat the 0 and -3 returns identically. + return unless ($_ == 0 || $_ == -3); + + conn_process($conn); + Irssi::input_remove($conn->{datatag}); + $conn->{datatag} = Irssi::input_add(fileno($conn->{sock}), + INPUT_WRITE, "conn_write", $conn); + } or do { + conn_close($conn); + } +} + +sub server_new_conn { + my ($listensock) = @_; + my %conn; + + $conn{sock} = $listensock->accept(); + $conn{timetag} = Irssi::timeout_add_once(5000, "conn_close", \%conn); + $conn{datatag} = Irssi::input_add(fileno($conn{sock}), INPUT_READ, + "conn_read", \%conn); +} + +sub server_start { + my $sock = new IO::Socket::INET6( + Proto => 'tcp', + Listen => 10, + LocalAddr => Irssi::settings_get_str("shortenurl_bind_addr"), + ) or die "Failed to create listen socket: $!."; + + $listenport = $sock->sockport(); + print CLIENTNOTICE "ShortenURL listening on port $listenport"; + Irssi::input_add(fileno($sock), INPUT_READ, "server_new_conn", $sock); +} +server_start(); + +sub process_uri { + my ($uri) = @_; + my ($base, $tail, $digest, $newuri); + my $threshold = Irssi::settings_get_int("shortenurl_threshold"); + my $host = Irssi::settings_get_str("shortenurl_url_addr"); + + return $uri unless (length $uri > $threshold); + + $uri =~ m/^(.*?)([?;!,.]*)$/; + ($base, $tail) = ($1, $2); + $digest = substr(encode_base64url(sha224($base)), 0, 12); + + $host = "[$host]" if ($host =~ /:/); + $newuri = "http://$host:$listenport/$digest$tail"; + return $uri if (length $newuri >= length $uri); + + $urihash{$digest} = $base; + return $newuri; +} + +sub filter_text { + my ($dest, $text, $stripped) = @_; + state $handling = 0; + + # Avoid accidentally throwing Irssi into an infinite loop. + return if ($handling); + $handling = 1; + + if (Irssi::settings_get_level("shortenurl_level") & $dest->{level}) { + $text =~ s/($RE{URI}{HTTP})/process_uri($1)/ge; + Irssi::signal_continue($dest, $text, $stripped); + } + + $handling = 0; +} +Irssi::signal_add("print text", "filter_text"); -- 2.43.0