From 7427782c0713a6d3dfe18009904b9a0195ed14a2 Mon Sep 17 00:00:00 2001 From: dakkar Date: Mon, 4 Apr 2011 23:09:03 +0100 Subject: simple shortener uses ranges of Unicode chars, stores stuff in a SQLite db TODO: * auth * CGI it --- test.pl | 117 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 test.pl diff --git a/test.pl b/test.pl new file mode 100644 index 0000000..514dba2 --- /dev/null +++ b/test.pl @@ -0,0 +1,117 @@ +#!/usr/bin/perl +use strict; +use warnings; +use DBI; +use open ':std',':utf8'; +use Encode; +use 5.12.0; + +my $db=DBI->connect('dbi:SQLite:dbname=urls.db','','',{ + PrintError => 0, + RaiseError => 1, + AutoCommit => 1, + sqlite_unicode => 1, +}); + +my $dbversion=eval { + $db->selectall_arrayref('select version from metainfo',{Slice=>{}}) + ->[0]{version}; +}; +if (!defined $dbversion) { + $db->do(q{create table metainfo(version integer not null)}); + $db->do(q{create table url(url varchar not null unique,shorted varchar not null unique)}); + $db->do(q{insert into metainfo(version) values(1)}); +} + +{ +# these could be multiple ranges +my @ranges=(0x4e00,0x9fcb); +my $base=0; +for(my $i=0;$i<@ranges;$i+=2) { + $base+=$ranges[$i+1]-$ranges[$i]; +} +sub num2chars { + my ($number) = @_; + + my @ret=(); + while ($number >= $base) { + unshift @ret,$number % $base; + $number=int($number/$base); + } + unshift @ret,$number; + + for my $v (@ret) { + for(my $i=0;$i<@ranges;$i+=2) { + $v+=$ranges[$i]; + last + if $v<$ranges[$i+1]; + $v-=$ranges[$i+1]; + } + } + return @ret; +} +} + +sub shorten { + my ($url) = @_; + + my $ret=$db->selectall_arrayref(q{select shorted from url where url=?}, + {Slice=>{}}, + $url); + if (@$ret) { + return $ret->[0]{shorted}; + } + + $ret=$db->selectall_arrayref(q{select count(*) as howmany from url}, + {Slice=>{}}); + my $number = $ret->[0]{howmany}; + my @codepoints=num2chars($number); + my $short=pack 'U0U*',@codepoints; + $db->do(q{insert into url(url,shorted) values (?,?)}, + {}, + $url,$short); + return $short; +} + +sub lengthen { + my ($short) = @_; + my $ret=$db->selectall_arrayref(q{select url from url where shorted=?}, + {Slice=>{}}, + $short); + if (@$ret) { + return $ret->[0]{url}; + } + return 404; +} + +sub urlencode { + my ($str) = @_; + + my @bytes = unpack 'U0C*',$str; + return join '',map {sprintf '%%%02X',$_} @bytes; +} + +sub urldecode { + my ($str) = @_; + + my @pieces = ($str =~ m{%(..)}g); + my @bytes = map { oct("0x$_") } @pieces; + return pack 'U0C*',@bytes; +} + +if (@ARGV>1) { + my $str=$ARGV[1]; + if ($str =~ m{^%}) { + $str=urldecode($str); + } + else { + $str=decode('utf8',$str); + } + say lengthen($str); +} +else { + my $url=$ARGV[0]; + my $str=shorten($url); + say $str; + say urlencode($str); +} -- cgit v1.2.3