#!/usr/bin/perl -w use strict; use URI; use XMLRPC::Transport::HTTP; my $server = XMLRPC::Transport::HTTP::CGI -> dispatch_to( 'BlogCensus::getGenerator', 'BlogCensus::getLanguage', 'BlogCensus::getIncomingLinkCount', 'BlogCensus::getOutgoingLinkCount', 'BlogCensus::isBlog', 'BlogCensus::crawledSince', ) -> handle ; sub handler { $server->handler( @_ ); } package BlogCensus; use SOAP::Lite; use CrawlDB; sub getLanguage { my ( $self, $url ) = @_; my $id = check_id( $url ); my $lang = single_result( "select language from metadata where id = '$id'"); die "No language information available" unless defined $lang; return $lang; } sub isBlog { my ( $self, $url ) = @_; my $id = check_id( $url ); return 0 unless $id; return 0 unless single_result( "select count(*) from blogs where id = '$id'"); return 1; } sub crawledSince { my ( $self, $stamp ) = @_; die "Not a valid datestamp" unless $stamp > 0; die "Fishy number" unless $stamp == int( $stamp ); my $result = do_sql( "select url, unix_timestamp(crawled) as crawled from blogs, urls where blogs.id = urls.id and crawled > from_unixtime( $stamp ) order by crawled desc;"); my @list; while ( my $x = $result->fetchrow_arrayref() ){ push @list, [ $x->[0], $x->[1] ]; } return \@list; } sub getGenerator{ my ( $self, $url ) = @_; my $id = check_id( $url ); my $gen = single_result( "select flavor from metadata where id = '$id'"); return $gen; } sub getIncomingLinkCount{ my ( $self, $url ) = @_; my $id = check_id( $url ); my $count = single_result( "select count(*) from links where sink = $id" ); return $count; } sub getOutgoingLinkCount{ my ( $self, $url ) = @_; my $id = check_id( $url ); my $count = single_result( "select count(*) from links where source = $id" ); return $count; } sub check_id { my $url = shift; $url = normalize( $url ); die "Improperly formed URL" unless $url =~ /^http:/; my $id = get_id( $url ) || die "This site does not appear in our URL list"; die "This site is not listed as a weblog" unless single_result("select id from blogs where id = $id"); #get_status( $id ) eq 'c'; return $id; } sub normalize { my $url = shift; my $c = URI->new( $url ); my $canon = $c->canonical(); return $canon->as_string; } sub die_bad_blog { die "This site is not in our blog list"; }