#!/usr/local/bin/perl -w
#
# Created: 06/18/04 09:04:06 EDT by Andy Harrison
#
# USAGE
#
#     gmailarchiver.pl [-f "imapfolder"] \
#       [--dump [--send] [-o outputfile ] [-u user -p passwd] \
#           [-e emailaddress] [--smtp smtp_server_name]]
#
#       [--count [-f IMAPFolder][-u user -p passwd]]
#       
#       [--mbox mbox_file_name][--send][-u user -p passwd] \
#           [-e e-mailaddress] [--smtp smtp_server_name ]
#
#       [--url mailman_mbox_file_url [-e e-mailaddress] \
#           [--smtp smtp_server_name][--send]]
#
#       [--mailman mailman_listinfo_url [-e e-mailaddress] \
#           [--smtp smtp_server_name][--send]]
#
# SEE ALSO
#
#   perldoc gmailarchiver.pl
#
#
# $Id: gmailarchiver.pl,v 1.6 2004/07/22 14:52:58 aharriso Exp aharriso $

use strict;

no warnings 'once';
$|++;

use Mail::Mailer;
use Mail::IMAPClient;
use File::Slurp "read_file";
use Getopt::Long qw/:config auto_help auto_version/;
use List::Util qw/reduce/;

use vars 
    qw/
        $opt_c $opt_d $opt_e $opt_f 
        $opt_h $opt_l $opt_m $opt_n 
        $opt_o $opt_p $opt_port $opt_s 
        $opt_smtp $opt_u $opt_v $opt_subject
        $opt_delete $opt_until $opt_mbox
        $opt_url $opt_mailman

        $host $port $imap $folder
    /;

local $main::VERSION = 
    '$Id: gmailarchiver.pl,v 1.6 2004/07/22 14:52:58 aharriso Exp aharriso $';

GetOptions(
    'c|count!'        => \$opt_c,        # count of messages
    'delete!'         => \$opt_delete,   # delete messages
    'dump!'           => \$opt_d,        # download messages
    'e|email=s'       => \$opt_e,        # e-mail address
    'f|folder=s'      => \$opt_f,        # folder to select
    'h|host=s'        => \$opt_h,        # hostname
    'help'            => sub { help() }, # display help
    'l|list!'         => \$opt_l,        # list folders
    'mbox=s'          => \$opt_mbox,     # mboxfile name
    'm|msg|message=i' => \$opt_m,        # dump individual message
    'mailman=s'       => \$opt_mailman,  # URL of mailman listinfo page
    'n|numbers!'      => \$opt_n,        # number of messages in folder
    'o|outputfile=s'  => \$opt_o,        # output file name
    'p|password=s'    => \$opt_p,        # imap password
    'port=i'          => \$opt_port,     # server port
    's|send!'         => \$opt_s,        # send message to e-mail 
                                         # address after downloading
    'smtp=s'          => \$opt_smtp,     # smtp server to use to send the 
                                         # outgoing archived messages
    'subject=s'       => \$opt_subject,  # Subject prefix
    'until=i'         => \$opt_until,    # delete until this message number
    'u|user=s'        => \$opt_u,        # imap username
    'url=s'           => \$opt_url,      # Archive URL
    'v|verbose'       => \$opt_v,        # print some details
);

eval {  
    require Data::Dumper; 
    $Data::Dumper::Indent = 3; 
} if $opt_v;

if ( $opt_c or $opt_d or $opt_n or $opt_l or $opt_delete ) { 
    # bare minimum options to bother connecting

    $host = $opt_h ? $opt_h : 'localhost';
    $port = $opt_port ? $opt_port : '143';

    print "::host::-", $host, "-:: ::port::-", $port, "-::\n" if $opt_v;

    # Connect to the IMAP server
    #
    $imap = Mail::IMAPClient->new(
        Server   => $host,
        Port     => $port,
        User     => $opt_u,
        Password => $opt_p,
    ) or die "$opt_u unable to connect to imap $host:$port. \n\nError: $@";


    $folder = $opt_f ? $opt_f : "INBOX";
    $imap->select( $folder ) or die "Couldn't select folder: $@\n";

} 

if ( $opt_d ) {

    my $filename = $opt_o ? $opt_o : "/tmp/imapdump.txt";
    print "filename : ", $opt_o, "\n" if $opt_o and $opt_v;
    if ( $opt_o ) { 
        $imap->message_to_file( $filename, ( $opt_m or $imap->messages ) ) 
            or die "error::$@, $!::\n";
    }

    my @message_list;
    if ( $opt_until ) {
        for ( $imap->messages ) {
            push @message_list, $_ if $_ <= $opt_until and $_ => $opt_m;
        }
    } else {
        
        @message_list = $opt_m ? $opt_m : $imap->messages;

    }
           
    for ( @message_list ) {

        if ( $opt_s and $opt_e ) {

            send_message ( 
                {
                    'To'       => $opt_e,
                    'From'     => $imap->get_header( $_, "From" ),
                    'Reply-To' => $opt_e,        # in case of bounces
                    'Subject'  => $opt_subject ? 
                                  $opt_subject . " " .  
                                  $imap->get_header( $_, "Subject" ) : 
                                  $imap->get_header( $_, "Subject" ),
                    'body'     => $imap->body_string( $_ ),
                } 
            );

        } elsif ( $opt_e and $opt_o and ! $opt_s ) {

            send_message ( 
                {
                    'To'      => $opt_e,
                    'From'    => "imap-to-gmail script",
                    'Subject' => "archive of $folder",
                    'body'    => reduce { $a . $b } read_file( $filename ),
                } 
            );

        } else {

            print $imap->bodypart_string( $_, 0 );
            print $imap->body_string( $_ );

        }
    }

    print "\n\n";


} elsif ( $opt_c ) {
    
    print "\n$folder contains ",$imap->message_count, " messages.\n\n";

} elsif ( $opt_n ) {

    print "Message numbers:\n\n";
    print $_, " " for $imap->messages;
    print "\n";

} elsif ( $opt_l ) {

    print $imap->list;

} elsif ( $opt_delete and $opt_m ) {

    if ( $opt_until ) {

        my @deletions;
        for ( $imap->messages ) {
            push @deletions, $_ if $_ <= $opt_until and $_ => $opt_m;
        }
        $imap->delete_message( \@deletions ) or
            die "Could not delete messages: $@\n";

    } else {

        $imap->delete_message( $opt_m ) or 
            die "Could not delete messages: $@\n";

    }

    $imap->expunge( $opt_f ) or die "Could not expunge: $@\n";


} elsif ( $opt_mbox and $opt_s and $opt_e and $opt_smtp ) {

    mbox_parse( $opt_mbox, $opt_e );

} elsif ( $opt_mailman ) {

    print "--mailman--$opt_mailman\n";
    grab_archives( $opt_mailman, "all" );

} elsif ( $opt_url ) {

    print "--url--$opt_url\n" if $opt_v;
    grab_archives( $opt_url, undef );

} else { 

    help();

}

$imap->logout or warn "Couldn't logout: $@\n" if $imap;

sub help {

    use Pod::Usage;
    pod2usage( -verbose => 2 );

}

sub grab_archives {

    print "::_::", Dumper( @_ ), "::\n" if $opt_v;
    my ( $url, $option ) = @_;

    print "::url::->", $url, "<-::\ngrab option::$option::\n" if $opt_v;

    die "not a valid mailman archive url: $!\n" 
        if ! $option eq "all" and $url =~ m/txt.gz/;

    die "Please install WWW::Mechanize Module: $@\n" unless
        eval { require WWW::Mechanize; };

    my $mech = WWW::Mechanize->new();  
    
    if ( $option eq "all" ) {
        $mech->get( $url ) or
            die "Unable to fetch: $url, $!\n";

        $mech->follow_link( text_regex => qr/Archives/ );

        my @archives_obj = 
            $mech->find_all_links( url_regex => qr/\.txt\.gz$/ );

        for ( @archives_obj ) {

            my $url = $_->url;

            my $fetched_filename = fetch_archive( $url, $mech );

            mbox_parse( $fetched_filename, $opt_e );
        }
    } else {

        print "step1** ::", $url, ":: **\n" if $opt_v;
        my $fetched_filename = fetch_archive( $url, $mech );
        mbox_parse( $fetched_filename, $opt_e );

    }

}

sub fetch_archive {

    my ( $url, $mech ) = @_;
    my $destination_file;
    my $gz_file;

    print Dumper( $url ) if $opt_v;

    if ( $url =~ m/^http:/ ) {
        die "Unable to load URI module: $@\n" unless
            eval { require URI; };
        my $link = URI->new( $url ); 
        my $path = $link->path;  
        my @filename = $link->path_segments( $link->path ); 
        print ":filename:", 
              Dumper( $filename[-1] ), 
              "\n::" if $opt_v;
        $gz_file = $filename[-1];

    } else {

        $gz_file = $url;

    }

    $destination_file = $gz_file;
    $destination_file =~ s/\.gz$//;

    $mech->get( $url, ":content_file" => $gz_file ) or 
        warn "Unable to fetch: $url, $!\n";

    print "::urlgunzip::", Dumper( $url ), "::\n" if $opt_v;

    gunzip( $gz_file, $destination_file ) and 
        unlink $url || die "Unable to gunzip: ", $url, " $!\n";

    return $destination_file or die "error fetching archive: $!\n";

}

sub mbox_parse {

    my $mbox_file   = shift;
    my $email       = shift;

    print "--file-->\n", Dumper( $mbox_file ), Dumper( $email ), 
          "<----\n" if $opt_v;
    die "Please install Mail::MboxParser Module: $@\n" unless
        eval { require Mail::MboxParser; };

    my $parseropts = {
        enable_cache    => 0,
        enable_grep     => 1
    };
    my $mb = Mail::MboxParser->new( $mbox_file,
                                     decode     => 'ALL',
                                     parseropts => $parseropts ) or 
        die "Problem reading mbox file: $@, $!\n";

    my $msg_counter;
    if ( $opt_m ) {
        for ( $msg_counter = 1 ; $msg_counter <= $opt_m ; $msg_counter++ ) {
            # Allows message range specification
            $mb->next_message;
        } 
    }

    while ( my $msg = $mb->next_message ) {

        send_message( 
            {
                'To'        =>  $email,
                'From'      =>  $msg->header->{ from },
                'Subject'   =>  $opt_subject ?
                                $opt_subject . " " .
                                $msg->header->{ subject } :
                                $msg->header->{ subject },
                'Date'      =>  $msg->header->{ date } ,
                'body'      =>  $msg->body->as_string,
            }
        );

        last if $opt_until and $msg_counter++ > $opt_until;
    }

}

sub send_message {

# leaving this line commented so I can quickly switch to test mode.
#    my $mailer = new Mail::Mailer 'testfile'
    my $mailer = new Mail::Mailer 'smtp', Server => $opt_smtp 
        if $opt_e and $opt_s and $opt_smtp || 
            die "Specify a valid smtp server with --smtp:  $@\n";

    my $message_body = $_[0]->{ 'body' };
    delete $_[0]->{ 'body' } if $message_body;

    print "----->\n", Dumper( $_[0]->{'Subject'} ), "<------\n" if $opt_v;
    $mailer->open( $_[0] );

    print ".";
    print "message body\n", ">" x 20, "\n", 
      $message_body,
      "\n", "<" x 20, "\nend message body\n" if $opt_v;

    print $mailer $message_body;
    $mailer->close;


}

# Lifted from CPAN.pm
# CPAN::Tarzip::gunzip
#
sub gunzip {
    
    die "Unable to load Compress::Zlib module: $@\n" unless
        eval { require Compress::Zlib; };

    die "Unable to load FileHandle module: $@\n" unless
        eval { require FileHandle; };

    my( $read, $write ) = @_;
    my($buffer,$fhw);

    $fhw = FileHandle->new(">$write")
	    or die("Could not open >$write: $!");

    my $gz = Compress::Zlib::gzopen($read, "rb")
	    or die("Cannot gzopen $read: $!\n");

    $fhw->print($buffer) while $gz->gzread($buffer) > 0 ;

    die("Error reading from $read: $!\n")
	    if $gz->gzerror != Compress::Zlib::Z_STREAM_END();

    $gz->gzclose() ;
    $fhw->close;

}

#
# $Log: gmailarchiver.pl,v $
# Revision 1.6  2004/07/22 14:52:58  aharriso
# code cleanup
# script name change to gmailarchiver
# CPAN friendly POD
#
# Revision 1.5  2004/07/21 15:05:29  aharriso
# added mbox support, mailman handling
#
# Revision 1.4  2004/07/20 13:14:53  aharriso
# added features to prefix a subject and delete messages
#
# Revision 1.3  2004/07/16 12:45:46  aharriso
# fixed the elsif $opt_d bug
#
# Revision 1.2  2004/07/02 12:01:09  aharriso
# minor changes
#
# Revision 1.1  2004/07/02 04:12:29  aharriso
# Initial revision
#
#

__END__

=head1 NAME

gmailarchiver.pl - Archive your IMAP Mail

=head1 SCRIPT CATEGORIES

Mail

=head1 README

I created this script for the purpose of moving some 
of my IMAP mail to my gmail.com account.

I've also added mbox support along with mailman mbox
archive support.

=head1 OSNAMES

any

=head1 PREREQUISITES

C<Mail::Mailer>

C<Mail::IMAPClient>

C<File::Slurp>

=head1 COREQUISITES

C<Mail::MboxParser> - for mbox format file handling

C<WWW::Mechanize> - to fetch Mailman (mbox format) archives

=head1 SYNOPSIS

=head2 ARGS

=over 15

=item B<-c> B<--count>

count of messages

=item B<--dump>

download messages

=item B<--delete> B<-m> I<message_id>

delete message matching I<message_id>.  You may also specify
B<--until> I<message_id> and it will delete all messages from the 
B<-m> I<message_id> to the B<--until> I<message_id>.

=item B<-e> B<--email> I<address>

destination e-mail address

=item B<-f> B<--folder> I<foldername>

folder to select

=item B<-h> B<--host> I<hostname>

IMAP server you want to access, default [C<localhost>]

=item B<-l> B<--list>

list folders

=item B<-m> I<id>

specify individual message id.  You may also use the B<--until> 
param to specify a range, as described for B<--delete>.

=item B<--mailman> I<url>

Specify the list-info url of the page from where you want to grab the
mailman mbox archives.

(I<http://lists.bestpractical.com/cgi-bin/mailman/listinfo/rt-users>)

=item B<--mbox> I<filename>

parse an mbox format file for sending to the specified address

=item B<-n> B<--numbers>

show message id numbers in folder

=item B<-o> B<--outputfile> C<filename>>

output file name

=item B<-p> B<--password> I<password>

IMAP password

=item B<--port> I<port_number>

Connect to IMAP server using specified port.  Default [C<143>]

=item B<-s> B<--send>

send messages to specified e-mail address after downloading

=item B<--smtp> I<smtp_server>

name of smtp server that will be used to send the 
outgoing archived messages.

=item B<--subject> I<subject>

To assist with filtering, you may specify a Subject prefix.

=item B<-u> B<--user> I<username>

IMAP username

=item B<--url> I<url>

Specify the full path to a mailman mbox format archive.

(I<http://lists.bestpractical.com/pipermail/rt-users/2004-July.txt.gz>)

=item B<-v> B<--verbose>

print some details

=back

=head2 EXAMPLES

=over 15

=item C<gmailarchiver.pl> C<-u> I<username> C<-p> I<password> C<--dump>
C<-o> I<outputfile> C<-f> I<INBOX.List1>

Dump all messages in I<INBOX.List1> to the single specified file

(Optionally, e-mail the file by adding C<-e> I<address>)

=item C<gmailarchiver.pl> C<-u> I<username> C<-p> I<password> C<-f> 
I<INBOX.Lists.FreeBSD.Questions> C<--dump> C<-m> I<1> C<--until> 
I<1000> C<-e> I<destination_address> C<--smtp> I<smtp_server_name>
C<--subject> I<'[FreeBSD-Questions Archive]'> C<--send>

Send the first 1000 messages of your freebsd-questions mailling list
folder with filterable subject prefix [FreeBSD-Questions Archive].

=item C<gmailarchiver.pl> C<-u> I<username> C<-p> I<password> C<--dump> C<-e> 
I<foo@example.com> C<--smtp> I<smtp_server_name> C<--send> C<--subject> 
I<subject_prefix>

Dump all messages and e-mail forward them individually

(Optionally, you can still specify C<-o> to also output to a file

=item C<gmailarchiver.pl> C<-u> I<username> C<-p> I<password> C<-c> C<-f> 
I<INBOX.List1>

Count messages in IMAP folder I<INBOX.List1>

=item C<gmailarchiver.pl> C<-u> I<username> C<-p> I<password> C<-l>

List all IMAP folders

=item C<gmailarchiver.pl> C<-u> I<username> C<-p> I<password> C<-n>

List message id numbers in folder I<INBOX.List1>

=item C<gmailarchiver.pl> C<-u> I<username> C<-p> I<password> C<--dump> C<-m> 
I<10> C<-f> I<INBOX.List1>

Dump message with id number I<10> from folder I<INBOX.List1>

=item C<gmailarchiver.pl> C<--mailman> 
I<http://lists.bestpractical.com/cgi-bin/mailman/listinfo/rt-users>
C<-e> I<destination_address> C<--smtp> I<smtp_server_name>
C<--subject> I<'[RT-Users Web Archive]'> C<--send>

Send all messages from the RT-Users mailman mailing list archive
to the specified e-mail address, prefixing each subject with 
the filterable string '[RT-Users Web Archive]'

=item C<gmailarchiver.pl> C<--url> 
I<http://lists.bestpractical.com/pipermail/rt-users/2004-July.txt.gz>
C<-e> I<destination_address> C<--smtp> I<smtp_server_name>
C<--subject> I<'[RT-Users Web Archive]'> C<--send>

Send all messages from the RT-Users mailman mailing list archive for
July 2004 to the specified e-mail address, prefixing each subject with
the filterable string '[RT-Users Web Archive]'

=back

=head1 ACKNOWLEDGEMENTS

built using Mail::IMAPClient by C<DJKERNEN@cpan.org>

lifted the gunzip routine from CPAN.pm written by
Andreas Koenig E<lt>andreas.koenig@anima.deE<gt>

=head1 SEE ALSO

L<Mail::Webmail::Gmail|http://search.cpan.org/~mincus/Mail-Webmail-Gmail-0.07/>

=head1 AUTHOR

Andy Harrison

 { 
   domain   => "gmail", 
   tld      => "com", 
   username => "aharrison" 
 }

=cut


