1 # Copyright © 2011 Nick Bowler
3 # License WTFPL2: Do What The Fuck You Want To Public License, version 2.
4 # This is free software: you are free to do what the fuck you want to.
5 # There is NO WARRANTY, to the extent permitted by law.
7 # ShortenURL implements an HTTP URL shortener for Irssi. It consists of two
8 # parts: an HTTP server to handle the short URLs, and a message filter which
9 # replaces long URLs with short ones. Message filtering occurs after logging,
10 # so the log files should contain original URLs (which is good because the
11 # shortened URLs cease to be valid once the script is unloaded).
13 # In IRC discussions, people often use URLs as words in a sentence. For
16 # < luser> Check out my awesome webpage at
17 # http://example.org/I_like_very_long_urls_for_no_reason!
19 # If we matched the longest possible valid URIs, we would shorten the full URL
20 # including the trailing exclamation point. But in this case, it is possible
21 # that the exclamation point was not intended to be interpreted as part of the
22 # URL. Since ShortenURL can't possibly decide whether or not trailing
23 # punctuation is part of the sentence structure or the URL, it employs a simple
24 # rule: any punctuation immediately following a URL is not considered part of
25 # the URL for shortening. To handle the situation where the punctuation is
26 # part of the URL, the HTTP server permits arbitrary punctuation to follow any
27 # shortened URL. Any such trailing punctuation will be appended to the long
30 # ShortenURL has a number of options:
32 # shortenurl_url_addr (default: "::1")
33 # The address that ShortenURL will use for the short URLs that it
34 # generates. This is useful if you run Irssi on a remote machine.
36 # shortenurl_bind_addr (default: "")
37 # The bind address and/or port of the ShortenURL HTTP server.
39 # shortenurl_level (default: msgs public)
40 # The message levels for which ShortenURL will translate URLs.
42 # shortenurl_redirect (default: ON)
43 # If enabled, HTTP responses will return a 301 Moved Permanently
44 # status which will normally cause browsers to load the full URL
45 # automatically. If disabled, you will be presented with an HTML
46 # document that links to the full URL instead. Regardless of this
47 # setting, it is possible to visit the redirect page by appending
48 # ?safe to the short URL.
50 # shortenurl_threshold (default: 0)
51 # The minimum length (in bytes) that a URL needs to be to be considered
52 # for shortening. No matter what this is set to, ShortenURL will never
53 # replace a URL unless the replacement is actually shorter.
57 use vars qw($VERSION %IRSSI);
59 use IO::Socket::INET6;
64 use MIME::Base64 qw(encode_base64url);
65 use Digest::SHA qw(sha224);
66 use Regexp::Common qw(URI);
72 authors => 'Nick Bowler',
73 contact => 'nbowler@draconx.ca',
75 description => 'Automatically shortens long URLs.',
79 Irssi::settings_add_str ("shortenurl", "shortenurl_url_addr", "::1");
80 Irssi::settings_add_str ("shortenurl", "shortenurl_bind_addr", "");
81 Irssi::settings_add_level("shortenurl", "shortenurl_level", "msgs public");
82 Irssi::settings_add_bool ("shortenurl", "shortenurl_redirect", 1);
83 Irssi::settings_add_int ("shortenurl", "shortenurl_threshold", 0);
91 Irssi::timeout_remove($conn->{timetag});
92 Irssi::input_remove($conn->{datatag});
97 my $xhtml_w3c = '"-//W3C//DTD XHTML 1.1//EN"';
98 my $xhtml_dtd = '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"';
100 return XML::Generator->new(
102 conformance => 'strict',
104 namespace => ["http://www.w3.org/1999/xhtml"],
105 dtd => ["html", "PUBLIC", $xhtml_w3c, $xhtml_dtd],
112 my $g = xhtml_generator;
114 $g->head($g->title($resp->status_line)),
116 $g->h1($resp->status_line)
120 return $g->xmldecl . "$doc\n";
124 my ($resp, $code, $method) = @_;
127 $resp->content(Encode::encode_utf8(error_page($resp)));
128 $resp->header("Content-Type" => "application/xhtml+xml; charset=utf-8");
129 $resp->header("Content-Length" => length $resp->content);
131 if (defined $method && $method eq "HEAD") {
132 $resp->content(undef);
135 return $resp->as_string($CRLF);
141 my $g = xhtml_generator;
143 $g->head($g->title("ShortenURL Redirector")),
145 $g->h1("ShortenURL Redirector"),
146 $g->p($g->a({href=>$loc}, $loc))
150 return $g->xmldecl . "$doc\n";
154 my ($resp, $loc, $method) = @_;
156 $resp->content(Encode::encode_utf8(redir_page($loc)));
157 $resp->header("Content-Type" => "application/xhtml+xml; charset=utf-8");
158 $resp->header("Content-Length" => length $resp->content);
160 if ($method eq "HEAD") {
161 $resp->content(undef);
164 return $resp->as_string($CRLF);
169 my ($digest, $base, $tail);
170 my $do_redirect = Irssi::settings_get_bool("shortenurl_redirect");
172 my $resp = HTTP::Response->new(200);
173 my $req = $conn->{parser}->object;
175 $resp->header(Server => "ShortenURL/$VERSION");
177 # Respond using the same protocol as the request.
178 # (The X-Http-Version header is added by HTTP::Parser)
179 $resp->protocol("HTTP/" . $req->header("X-Http-Version"));
181 # Check the request type
182 unless ($req->method eq "GET" || $req->method eq "HEAD") {
183 $conn->{output} = error_response($resp, 501);
187 # Check the URI format
188 unless ($req->uri->path =~ m/^\/([-_[:alnum:]]+)([?!;,.]*)$/) {
189 $conn->{output} = error_response($resp, 404, $req->method);
193 ($digest, $tail) = ($1, $2);
194 unless (defined $urihash{$digest}) {
195 $conn->{output} = error_response($resp, 404, $req->method);
199 if ($req->uri->query =~ m/safe/i) {
203 $base = $urihash{$digest};
206 $resp->header("Location" => "$base$tail");
208 $conn->{output} = redir_response($resp, "$base$tail", $req->method);
215 unless (length $conn->{output} > 0) {
220 $rc = $conn->{sock}->send($conn->{output}, 0);
222 print CLIENTERROR "shortenurl write failure: $!";
227 $conn->{output} = substr($conn->{output}, $rc);
234 unless (defined $conn->{sock}->recv($buf, 1024, MSG_DONTWAIT)) {
235 unless ($!{EWOULDBLOCK}) {
236 print CLIENTERROR "shortenurl read failure: $!";
243 unless (length $buf > 0) {
244 # Remote closed the connection, clean up.
249 unless (defined $conn->{parser}) {
250 $conn->{parser} = HTTP::Parser->new( request => 1 );
253 # HTTP::Parser likes to die gratuitously, so trap that.
255 $_ = $conn->{parser}->add($buf);
257 # Since we don't support any requests with content,
258 # we can treat the 0 and -3 returns identically.
259 return unless ($_ == 0 || $_ == -3);
262 Irssi::input_remove($conn->{datatag});
263 $conn->{datatag} = Irssi::input_add(fileno($conn->{sock}),
264 INPUT_WRITE, "conn_write", $conn);
270 sub server_new_conn {
271 my ($listensock) = @_;
274 $conn{sock} = $listensock->accept();
275 $conn{timetag} = Irssi::timeout_add_once(5000, "conn_close", \%conn);
276 $conn{datatag} = Irssi::input_add(fileno($conn{sock}), INPUT_READ,
277 "conn_read", \%conn);
281 my $sock = new IO::Socket::INET6(
284 LocalAddr => Irssi::settings_get_str("shortenurl_bind_addr"),
285 ) or die "Failed to create listen socket: $!.";
287 $listenport = $sock->sockport();
288 print CLIENTNOTICE "ShortenURL listening on port $listenport";
289 Irssi::input_add(fileno($sock), INPUT_READ, "server_new_conn", $sock);
295 my ($base, $tail, $digest, $newuri);
296 my $threshold = Irssi::settings_get_int("shortenurl_threshold");
297 my $host = Irssi::settings_get_str("shortenurl_url_addr");
299 return $uri unless (length $uri > $threshold);
301 $uri =~ m/^(.*?)([?;!,.]*)$/;
302 ($base, $tail) = ($1, $2);
303 $digest = substr(encode_base64url(sha224($base)), 0, 12);
305 $host = "[$host]" if ($host =~ /:/);
306 $newuri = "http://$host:$listenport/$digest$tail";
307 return $uri if (length $newuri >= length $uri);
309 $urihash{$digest} = $base;
314 my ($dest, $text, $stripped) = @_;
317 # Avoid accidentally throwing Irssi into an infinite loop.
318 return if ($handling);
321 if (Irssi::settings_get_level("shortenurl_level") & $dest->{level}) {
322 $text =~ s/($RE{URI}{HTTP})/process_uri($1)/ge;
323 Irssi::signal_continue($dest, $text, $stripped);
328 Irssi::signal_add("print text", "filter_text");