#!/usr/bin/perl -w -I/opt/eprints3/perl_lib

######################################################################
#
#  __LICENSE__
#
######################################################################

=pod

=head1 NAME

B<google_scholar> - import citation stats from Google Scholar

=head1 SYNOPSIS

B<google_scholar> I<repository_id> [B<options>]

=head1 DESCRIPTION

This command will read citation statistics for all peer-reviewed articles in
EPrints and update the database.

Requires a field defined on eprint:

	{
		'name' => 'gscholar',
		'type' => 'compound',
	#   'volatile' => 1,
		'fields' => [
			{ 'sub_name' => 'cluster', 'type' => 'text', },
			{ 'sub_name' => 'impact', 'type' => 'int', },
			{ 'sub_name' => 'datestamp', 'type' => 'time', },
		],
	},


=head1 ARGUMENTS

=over 8

=item I<repository_id> 

The ID of the EPrint repository to use.

=back

=head1 OPTIONS

=over 8

=item B<--help>

Print a brief help message and exit.

=item B<--man>

Print the full manual page and then exit.

=item B<--quiet>

Be vewwy vewwy quiet. This option will supress all output unless an error occurs.

=item B<--verbose>

Explain in detail what is going on.
May be repeated for greater effect.

Shows why a plugin is disabled.

=item B<--version>

Output version information and exit.

=back   

=cut


use Getopt::Long;
use Pod::Usage;
use strict;

use EPrints;
use GScholar;

my $version = 0;
my $verbose = 0;
my $quiet = 0;
my $purge = 1;
my $help = 0;
my $man = 0;
my $single = 0;
my $unset = 0;

GetOptions( 
	'help|?' => \$help,
	'man' => \$man,
	'version' => \$version,
	'verbose+' => \$verbose,
	'silent' => \$quiet,
	'quiet' => \$quiet,
	'single' => \$single,
	'unset' => \$unset,
) || pod2usage( 2 );
EPrints::Utils::cmd_version( "export_xml" ) if $version;
pod2usage( 1 ) if $help;
pod2usage( -exitstatus => 0, -verbose => 2 ) if $man;
pod2usage( 2 ) if( scalar @ARGV < 1 );

my $noise = 1;
$noise = 0 if( $quiet );
$noise = 1+$verbose if( $verbose );

# Set STDOUT to auto flush (without needing a \n)
$|=1;

my $repoid = shift @ARGV;

my $session = new EPrints::Session( 1, $repoid, $noise );
exit( 1 ) unless defined $session;

my $dataset = $session->dataset( "archive" );

my $searchexp = EPrints::Search->new(
	session => $session,
	dataset => $dataset,
);
$searchexp->add_field( $dataset->get_field( "type" ), "article" );
$searchexp->add_field( $dataset->get_field( "refereed" ), "TRUE" );

my $list = $searchexp->perform_search;

my $field_name = "gscholar";

my $is_volatile = $dataset->get_field( $field_name )->{ "volatile" };

my $fn = sub {
	my( $session, $dataset, $eprint, $ctx ) = @_;

	if( $unset && $eprint->exists_and_set( "gscholar" ) )
	{
		return;
	}

	print "Updating ".$dataset->confid.".".$eprint->get_id."\n";

	my $cite_data = GScholar::get_cites( $session, $eprint );
	if( $cite_data )
	{
		$cite_data->{datestamp} = EPrints::Time::get_iso_timestamp();
		if( $is_volatile )
		{
			$eprint->set_value( "gscholar", $cite_data );
			$eprint->commit;
		}
		else
		{
			my @date = split /[:\-TZ]/, delete $cite_data->{datestamp};
			for(qw( year month day hour minute second ))
			{
				$cite_data->{"datestamp_$_"} = shift @date;
			}
			my $sql = "UPDATE `".$dataset->get_sql_table_name."` SET ";
			my $first = 1;
			for(keys %$cite_data)
			{
				$sql .= ", " unless $first;
				$sql .= " `$field_name\_$_`='".$cite_data->{$_}."'";
				$first = 0;
			}
			$sql .= " WHERE `eprintid`=".$eprint->get_id;
			$session->get_database->do( $sql );
		}
	}
};

$list->map( $fn );

undef $list;
undef $searchexp;

$session->terminate();
exit;
