#!/usr/bin/env perl
# +-======-+ 
#  Copyright (c) 2003-2007 United States Government as represented by 
#  the Admistrator of the National Aeronautics and Space Administration.  
#  All Rights Reserved.
#  
#  THIS OPEN  SOURCE  AGREEMENT  ("AGREEMENT") DEFINES  THE  RIGHTS  OF USE,
#  REPRODUCTION,  DISTRIBUTION,  MODIFICATION AND REDISTRIBUTION OF CERTAIN 
#  COMPUTER SOFTWARE ORIGINALLY RELEASED BY THE UNITED STATES GOVERNMENT AS 
#  REPRESENTED BY THE GOVERNMENT AGENCY LISTED BELOW ("GOVERNMENT AGENCY").  
#  THE UNITED STATES GOVERNMENT, AS REPRESENTED BY GOVERNMENT AGENCY, IS AN 
#  INTENDED  THIRD-PARTY  BENEFICIARY  OF  ALL  SUBSEQUENT DISTRIBUTIONS OR 
#  REDISTRIBUTIONS  OF THE  SUBJECT  SOFTWARE.  ANYONE WHO USES, REPRODUCES, 
#  DISTRIBUTES, MODIFIES  OR REDISTRIBUTES THE SUBJECT SOFTWARE, AS DEFINED 
#  HEREIN, OR ANY PART THEREOF,  IS,  BY THAT ACTION, ACCEPTING IN FULL THE 
#  RESPONSIBILITIES AND OBLIGATIONS CONTAINED IN THIS AGREEMENT.
#  
#  Government Agency: National Aeronautics and Space Administration
#  Government Agency Original Software Designation: GSC-15354-1
#  Government Agency Original Software Title:  GEOS-5 GCM Modeling Software
#  User Registration Requested.  Please Visit http://opensource.gsfc.nasa.gov
#  Government Agency Point of Contact for Original Software:  
#  			Dale Hithon, SRA Assistant, (301) 286-2691
#  
# +-======-+ 
#
# acquire is a utility for retrieving data files from mass storage
# with caching.
#
# !REVISION HISTORY:
#
#  02May2000  da Silva  Initial code, man page.
#  06Jun2000  Owens     First prototype
#  29Jun2000  Owens     Error trapping and logging
#  20Sep2000  da Silva  Several mods.
#  29Sep2000  Owens     Added wildcard token support
#  11Oct2000  da Silva  Fixed bug in get_remote_list(): needed
#                       "undef return_list" in order to avoid
#                       current files to be refetched during
#                       future fetch.
#  09Mar2001  Owens     added -L to 'ls -l' commands, fixed 
#                       bug in two digit year, cleaned up 
#                       -strict operation
#  20Jun2001  Owens     linked functions to Remote_utils.pm
#  12Jul2001  Owens     fixed bug in Look Ahead
#  21Aug2001  Owens     added support for user@machine in rc file 
#  10Sep2001  Owens     added logic to clean spool directory
#  14Aug2002  E. Yeh    rename the oldName to standardName
#  17Sep2002  E. Yeh    set the default spool directory
#  25Jul2003  Owens     Mods to support MODIS cloud assimilation
#  20Jan2004  Owens     Added check for DO_DMGET env variable
#                       Fixed bug found in -li option (P. Norris)       
#             Lucchesi  Changed default dmget location 
#  02Feb2005  Owens     Modified Error handling to catch missing files 
#                       when running in ignore_obs mode (PR 1269)  
#  01Mar2005  Todling   On halem, use /gmao/das for spool area
#  30Jun2006  Owens     Added support for command line file template
#  31Aug2006  Owens     Added ignore_0 option to ignore zero-length files 
#  11Oct2007  Owens     Fix rename bug when file names are the same 
#  18Apr2008  Owens     Changed error_log messages to report unique error code
#------------------------------------------------------------------

use FindBin;                     # find location of the this script
use lib "$FindBin::Bin";         # make perl libraries available
use Err_Log;                     # enable error logging 
use Env;                         # make env vars readily available
use File::Basename;              # for basename(), dirname()
use File::Copy;                  # for copy()
use Getopt::Long;                # command line options
use Remote_utils;                # rget and rdmget
use Manipulate_time;

# Initialize the beast
# --------------------
  initialize();

# File lists
# ----------
  @current_dtg = make_dtg($nymd.":".$nhms, $increment, $time_steps , 0);
  ($p_1, $p_2) = make_listSTD(\@current_dtg, \@file_list, \@name_STD);
  @current_files = @$p_1;   # userId+machine+path+oldName (ymd)
  @name_std = @$p_2;        # oldName stdName pair (ymd)
# Retrieve current files
# ----------------------
  $fetch_err=fetch_remote(\@current_files , $user_dir, 0);
  if ($fetch_err) {
      if ($verbose){
        print "acquire: >>> $ERROR <<< fetch_remote(strict=$strict), fetch_err =  $fetch_err\n";
      }
      if ($strict) {
        exit ($fetch_err); 
      }
      if ($verbose){
        print "acquire: >>> $ERROR <<< fetch_remote() exception is ignored\n";
      }
  }
  @missing_files=fetch_local(\@current_files, \%REMSIZE);
  while (($retry_attempts <= $retry) && ($retry) && ($missing_files[1])) {
       fetch_remote(\@missing_files, $user_dir, 0);
       @missing_files = fetch_local(\@current_files, \%REMSIZE);
       $retry_attempts ++;
       if ($verbose) {
            print "Retry # $retry_attempts to retrieve @missing_files.\n";
       }
       if ($error_logging) {
            err_log (4, "acquire", "${nymd}","${expid}","99.2",
            {'err_desc' => "$0:retry $retry_attempts to retrieve @missing_files.",
             'log_name' => "$error_log" } );
       }
  }
# to rename oldName to standardName
#----------------------------------
  $exit_code = rname($p_1, $p_2);

# error checking
#---------------

  if  (($missing_files[1])&&($strict)){
       print "MISSING FILES:@missing_files\n";
       $exit_code = $#missing_files + 1;
  }else{
       $exit_code = 0;
  }

# Retrieve future files
# ---------------------
  if ($look_ahead) {
      if ( $look_inc ) {
           my $hh_inc  = int($look_inc/10000);
           my $mm_inc  = ( $look_inc % 10000 ) / 100;
           my $sec_inc = (60 * $mm_inc) + (3600 * $hh_inc) ;
         ( $next_nymd, $next_nhms ) = tick ($nymd, $nhms, $sec_inc);
      } 
      @future_dtg = make_dtg( $next_nymd.":".$next_nhms, $increment, $look_ahead , 1);
      @future_files = make_list(\@future_dtg, \@file_list);
      fetch_remote(\@future_files, $spool_dir, 1);
  }

exit ($exit_code);

#......................................................................

#
# Initialize : This routine sets default options, acquires the command line options, verifies
#              the specified directories for writing  and  checks for any incompatible option 
#              selections.

sub initialize {
     my ($status);
# Set default options

     $exit_code = 0;
     $retry_attempts = 0;
     $error_logging = 0;
     $opt_cp = 0;
     $opt_d = "./";
     $opt_dmc = "/usr/bin/dmget";
     $opt_e = 0;
     $opt_f = 0;
     $opt_h = 0;
     $opt_lfn = 0;
     $opt_la = 0;
     $opt_li = 0;
     $opt_nd = 0;
     $opt_p = 0;
     $opt_rc = "acquire.rc";
     $opt_rcfg = "DEFAULT";
     $opt_rt = 1;
     $opt_strict = 0;
     $opt_ignore_0 = 0;
     $opt_ssh = "deprecated";
     $opt_template = 0;
     $opt_v = 0;
     $opt_s = "/tmp/$USER/spool/";

# Command line options
# --------------------
    my $rc= GetOptions( "cp", "d=s", "dmc=s", "e=s", "f", "h", "lfn", "la=i", "li=i",
                        "nd", "p", "rc=s", "rcfg=s", "rt=i", "s=s","template=s", "ssh", "v", "strict","ignore_0");
     usage() if ( $opt_h || $#ARGV <3 || $ARGV[0] eq 'help' || !$rc );

     $user_dir = $opt_d if ($opt_d );
     $preserve = $opt_p;
     $copy = $opt_cp;
     $dmget_cmd = $opt_dmc;
     $dmget_cmd = $ENV{'FVDMGET'} if ( exists ($ENV{'FVDMGET'}));
     $error_log = $opt_e;
     if (( exists ($ENV{'ERROR_LOG_NAME'})) || ( $opt_e) ){
          $error_logging = 1;
          ($error_log = $ENV{'ERROR_LOG_NAME'})=~ s/-L // if ( exists ($ENV{'ERROR_LOG_NAME'}));
          $error_log = $opt_e if ( $opt_e );
          $error_logging = 0 if ( $opt_e == "999" );
          $expid = "x";
          $expid = $ENV{'EXPID'} if ( exists ($ENV{'EXPID'}));
     }
     $force = $opt_f;
     $look_ahead = $opt_la;
     $look_inc = $opt_li;
     $lfn = $opt_lfn;
     if ( ($opt_nd) || ( (exists ($ENV{'DO_DMGET'})) && ($ENV{'DO_DMGET'} == 0 ) ) ){
         $do_dmget = 0;
     }else{
         $do_dmget = 1;
     }
     $rcfile = $opt_rc;
     $retry = $opt_rt;
     $run_config = $opt_rcfg;
     $verbose = $opt_v;
     $strict = $opt_strict;
     $ignore_0 = $opt_ignore_0;
     $ignore_0 = $ENV{'IGNORE_0'} if ( exists ($ENV{'IGNORE_0'}));
     $spool_dir = $opt_s;
     if( verify_dir($opt_s) == 1 ) {    #failure
        $opt_s = "/tmp/$USER/spool/";
        if ( verify_dir($opt_s) == 1 ) { $opt_s = "/tmp/$USER/spool"; }
     }
     $tmplt = $opt_template;
     $nymd = $ARGV[0];
     $nhms = $ARGV[1];
     $increment = $ARGV[2];
     $time_steps = $ARGV[3];
     %REMSIZE={};
     $ERROR = "WARNING"; if($strict) { $ERROR = "ERROR" };

# check status of directories
 
     $status = verify_dir($user_dir);  if($status == 1) { exit (1); };   #failure
     $status = verify_dir($spool_dir); if($status == 1) { exit (1); };

# get list of files from rc file
     print "opt_template = $opt_template\n";
     if ( $opt_template eq "0" ) {    
          @file_list = read_rc( "$rcfile" );
     }else{
          @file_list = push( @file_list, $tmplt );
     }

# initialize listed files
     @listed_files ='';
# check for conflicting option settings

     if ($lfn && !$opt_p) {
         $lfn = 0;
          print "(acquire) WARNING: -lfn option requires -p option -- long file names will not be used.\n";
          if ($error_logging) {
                err_log (4, "acquire", "${nymd}","${expid}","99.3",
                {'err_desc' => "$0:WARNING: -p option incompatible with -lfn option -- long file names will not be used.",
                 'log_name' => "$error_log" });
          }
     }
}

#..........................................................................

#
# fetch_remote:
# Checks for files in the local spool directory and fetches them from 
# the remote location if they are not spooled locally.
#

sub fetch_remote {

     my ( $files, $destination, $bg ) = @_;
     
     undef @dmf_machine_list;
     undef @dmf_file_list;
     $dmf_file_list = '';
     my $fetch_needed = 0;
     my $fetch_error  = 0;
     my @mss_files = ();
     foreach my $file ( @$files){
          $size_remote = 0;
          if ($verbose){
               print "CHECKING: $file \n";
          }
          $size_remote = $REMSIZE{$file} if (exists($REMSIZE{$file}));
          if ( ! $size_remote ) {
               @size = rflist($file ,{ 'size' => "1", 'run_config' => "$run_config" });
               $size_remote = $size[0];
               $REMSIZE{"$file"} = $size_remote;
               print "REMSIZE{$file} = $REMSIZE{$file}\n";
          }
          if ($size_remote != -1) {
               if ( $force ) {
                    $size_user  = -1;
                    $size_spool = -1;
               }else{
                    $size_spool = get_local_size($file, $spool_dir );
                    $size_user  = get_local_size($file, $user_dir );
	       }
               if (( $size_user eq $size_remote ) && ( !$bg )) {
                    if ($verbose){
                         print "FOUND IN USER DIRECTORY:  $file \n";
                    }
               }else{
                    if ( $size_spool ne $size_remote ) {
                         my ($remote_user, $remote_machine, $remote_file) = splitrfile($file);
                         my $dmf_count = 0;
                         my $stored = 0;
          
                 if (! @dmf_machine_list){
                              $fetch_needed = 1;
                              push (@dmf_user_list, $remote_user);
                              push (@dmf_machine_list, ${remote_machine});
                              push (@{$dmf_file_list[0]}, $remote_file);

                         }else{
                              foreach $machine (@dmf_machine_list) {
                                   if ( ($machine eq ${remote_machine}) && (! $stored) ){
                                        push (@{$dmf_file_list[$dmf_count]}, $remote_file);
                                        $stored = 1;
                                        $dmf_count ++;
                                   }else{
                                        $dmf_count ++;
                                   }
                              }
                              if (! $stored ) {
                                   push (@dmf_user_list, $remote_user); 
                                   push (@dmf_machine_list,  "${remote_machine}");
                                   push (@{$dmf_file_list[$dmf_count]}, $remote_file);
                              }
                         }
                         if ($verbose){
                              print "- REMOTE FETCH REQUIRED: $file\n";
                              print "- LOCAL  SIZE: $size_spool \n" unless ( $size_spool == -1 );
                              print "- REMOTE SIZE: $size_remote \n";
                         }
                         remove_local($verbose, $file, $spool_dir );
                    }else{
                         if ($verbose){
                              print "FOUND IN SPOOL DIRECTORY:  $file \n";
                         }
                    }
               } 
          }else{ 
               if ($verbose) {
                 print "acquire.fetch_remote: >>> $ERROR <<< PROBLEM DETERMINING REMOTE SIZE, $file\n";
               }
               $fetch_error ++;
               push (@mss_files, $file);
          }              
     } # end foreach
     if ($fetch_needed) {
          if ( $preserve ) {
              $dest_dir = $spool_dir;     # fetch to spool dir for preservation
	  }else{
              $dest_dir = $destination;   # fetch to wherever calling sub desires
	  }
          if ( $bg ){
               if ($verbose){
                    print "Doing background fetch.\n";
               }
               dmfetch_bg(\@dmf_file_list, \@dmf_machine_list, \@dmf_user_list, $dest_dir );
          }else{
               if ($verbose){
                    print "Doing foreground fetch.\n";
               }
               dmfetch_fg(\@dmf_file_list, \@dmf_machine_list, \@dmf_user_list, $dest_dir );
          }
     }else{
          if ($verbose && $fetch_error) {
               print "ERROR - MISSING REMOTE FILES!\n";
          }else{
               print "NO FETCH NEEDED -- all files found locally\n";
          }
    }
    if ($fetch_error && $error_logging) {
            if ($verbose) {
              print "acquire.fetch_remote: >>> $ERROR <<< fetch_error = $fetch_error\n";
            }
            $log_rc=err_log (5, "acquire", "${nymd}","${expid}","99",
                  {'err_desc' => "MISSING: @mss_files",
                   'log_name' => "$error_log" } );
            if ($verbose && $log_rc) {
              print "acquire.fetch_remote: >>> $ERROR <<< err_log(), log_rc = $log_rc\n";
            }
          }
return ($fetch_error);
}

#............................................................................

#
# dmfetch_fg:
# Acquires dmf_file_list files from each machine in dmf_machine_list in the foreground. 
#

sub dmfetch_fg {
     
     my ( $dmf_file_list, $dmf_machine_list, $dmf_user_list, $destination ) = @_;
     my $count = 0;
     if ($do_dmget) {
          foreach $dmf_file_ref (@$dmf_file_list) {
               print " DOING DMGET OF REMOTE FILES on @$dmf_machine_list[$count] \n";
               rdmget(@$dmf_user_list[$count], @$dmf_machine_list[$count], \@${dmf_file_ref},
                                                                            { 'verbose'  => "$verbose",
                                                                              'dmget_bin' => "$dmget_cmd",
                                                                              'run_config' => "$run_config" } );
               $count++;
          }
          $count = 0;
     }else{
          if ( $verbose ) {
               print "NOT DOING DMGET OF REMOTE FILES \n";
          }
     }
     foreach $dmf_file_ref (@$dmf_file_list) {
          $remote_machine = @$dmf_machine_list[$count];
          $remote_user = @$dmf_user_list[$count];
          foreach $file_name ( @${dmf_file_ref} ){
              chomp($file_name);
              if ($lfn) {
                   $file_name = long_fn(${file_name});
                   $destination = ${destination}."/".${remote_machine}."%".${long_file_name} ;
              }
              $rget_retcode = rget( "${remote_user}\@$remote_machine:${file_name}", "$destination",
                                                                            {'debug' => "$verbose",
                                                                             'run_config' => "$run_config" });
          }
          $count ++;
     }

}

#.............................................................................

#
# dmfetch_bg:
# Issues a background dmget on the remote archive machine and acquires dmf_file_list files
# from each machine in dmf_machine_list in the foreground.
#

sub dmfetch_bg {

     my ( $dmf_file_list, $dmf_machine_list, $dmf_user_list, $destination ) = @_;
     my $count = 0;

     FORK: {
          if ( $pid = fork() ) { #parent here
               exit(0);  # parent is all done

          }elsif ( defined $pid) {
                open (SAVEOUT, ">&STDOUT");  # save stdout
                open (SAVEERR, ">&STDERR");  # save stderr
                open (STDOUT, ">$destination/logfile") or die "can't redirect stdout";
                open (STDERR, ">&" . STDOUT)    or die "can't redirect stderr";

                select STDERR; $| = 1;     # make it unbuffered
                select STDOUT; $| = 1;     # make it unbuffered

               if ($do_dmget) {
                    my $dmget_stat = 0;
                    foreach $dmf_file_ref (@$dmf_file_list) {
                        $rdmget_retcode=rdmget(@$dmf_user_list[count], @$dmf_machine_list[$count], \@${dmf_file_ref},
                                { 'verbose'    => "$verbose",
                                  'dmget_bin'  => "$dmget_cmd", 
                                  'run_config' => "$run_config" } );
                         print "ERROR in DMGET on @$dmf_machine_list[$count]\n" if ( ! $rdmget_retcode ); 
                         $count++;
                    }
                    $count = 0;
               }
               foreach $dmf_file_ref (@$dmf_file_list) {
                    $remote_machine = @$dmf_machine_list[$count];
                    $remote_user    = @$dmf_user_list[$count];
                    foreach $file_name ( @${dmf_file_ref} ){
                         if ($lfn) {
                              $file_name = long_fn(${file_name});
                              $destination = ${destination}."/".${remote_machine}."%".${long_file_name} ;
                         }
                         $rget_retcode = rget( "${remote_user}\@$remote_machine:${file_name}", "$destination",
                                                { 'debug'    => "$verbose",
                                                  'run_config' => "$run_config" } );
                         
                    }
                   $count ++;
               
               }
               close STDOUT;
               close STDERR;

               open (STDOUT, ">&SAVEOUT");  # restore stdout
               open (STDERR, ">&SAVEERR");  # restore stdout
  
               exit(0);# fork is finished 
          }elsif ( $! =~ /No more processes/) {  # recoverable error
               sleep 5;
               redo FORK;
          }else{
               die "Can't fork: $!\n";
          }
     }# end FORK
}

#......................................................................

#
# fetch_local:
# Verifies the user file size compared to the remote file size.
# If file sizes are different, checks the spool directory. If the
# file sizes in the spool directory matches those on the remote 
# system, then (move|symlink|copy) the file to the user directory.
#

sub fetch_local{
    my  %REMSIZE = %{ pop( @_ ) };
    my ( $files ) = @_; 
    my $file_failure;
    
    undef @failed_files;
    print "VERIFYING LOCAL COPIES\n" if ($verbose);
    $file_failure = 0;
    foreach my $file ( @$files){
       my $size_user   = get_local_size( $file, $user_dir );
       my $size_remote = $REMSIZE{$file};
       print "\$size_user = $size_user \$REMSIZE{$file} = $REMSIZE{$file} ignore_0 = $ignore_0 \n";
       unless ( ( $size_user == $REMSIZE{"$file"} ) || ( ( $REMSIZE{"$file"} == 0 ) && ( $ignore_0 ) ) ){
            my $size_spool  = get_local_size($file, $spool_dir );
            print "\$size_spool = $size_spool \$size_remote = $size_remote\n"; 
            if ( $size_spool == $size_remote ) {
	         my $base_file = basename($file);
	         my $dest_file =  ${user_dir}."/".${base_file};

                 if ( $lfn ) {
	              $long_file = long_fn($file);
	              $src_file = ${spool_dir}."/".${long_file};
                 }else{
                      $src_file = ${spool_dir}."/".${base_file};
                 }

#            If preserving files on spool...
#            -------------------------------
	         if ( $preserve ) {
                      if ($copy) {
                           print "COPYING: $src_file to $dest_file\n" if ($verbose);
                           my $rc = copy ( $src_file, $dest_file );
                           if (! $rc ) { 
                                print "ERROR: copy of $src_file to $dest_file failed!\n";
                                print "Return status is $!\n";
                                if ($error_logging) {
                                    err_log (5, "acquire", "${nymd}","${expid}","99.4",
                                            {'err_desc' => "$0:ERROR copy of $src_file to $dest_file failed!",
                                             'log_name' => "$error_log" } );
                                }
			   }
                      }else{
                           print "LINKING: $src_file to $dest_file\n" if ( $verbose );
                           symlink ( $src_file, $dest_file );
		      }

#            Otherwise simply move the file to user directory
#            ------------------------------------------------
	         }else{

                      if ($verbose){
                           print "MOVING: $src_file to $dest_file\n";
                      }
                      $rc = move($src_file, $dest_file);
                      if ( ! $rc ) { 
                           print "ERROR: failed when moving $src_file to $dest_file!\n";
                           print "       Return status is $!\n";
                           if ($error_logging) {
                                err_log (5, "acquire", "${nymd}","sub fetch_local","99.5",
                                        {'err_desc' => "$0:ERROR moving of $src_file to $dest_file failed!",
                                         'log_name' => "$error_log" } );
                           }
                           $exit_code ++;
                      } 

		 }
            }else{
                 print "File size ERROR: $file is wrong size. Will try again.\n" if ( $retry );
                 push (@failed_files, $file);
                 print "failed files = @failed_files\n";
                 $file_failure ++;

            }

      } # end unless     

    } # end foreach
    
    if ($file_failure) {
        return(@failed_files);
    } else { 
      return(0);
    }
}

#......................................................................

#
# remove_local:
# Deletes local files that need to be fetched. 
#

sub remove_local {
     
     my ( $verbose, $local, $spool ) = @_;
     undef $local_file;
     if ( $lfn ){ 
          $local_file = long_fn($local);
     } else {
          $local_file = basename($local);
           
     }
     $local_file = join "/", ${spool},${local_file};
     if ( -e $local_file ) {
          if ($verbose) {
               print "DELETING INCORRECT SIZE LOCAL FILE\n";
          }
          $rc=unlink<${local_file}>;
          if ( !($rc) && ($verbose) ) {
              print "ERROR DELETING LOCAL FILE\n";
          }
     }
return;
}


#......................................................................

#
# get_local_size:
# Returns size in bytes of specified file or -1 if the size can not be determined.
#

sub get_local_size {

     my ( $local, $spool ) = @_;
     undef @local_size;
     undef $local_file;
     if ( $lfn ){
          $local_file = long_fn($local);
     } else {
          $local_file = basename($local);

     }
     $local_file = join "/", ${spool},${local_file};
     $local_size = ( -s $local_file );

     return  ( $local_size || -1 );
}

#.......................................................................

#
# verify_dir:
# check write status of directory and create if needed 
#

sub verify_dir {

     my ( $dir ) = @_;
     if  ( ( -d $dir ) && ( -w $dir ) ) {
          if ($verbose) {
               print " $dir exists.\n";
          }
     } else {
          if ($verbose) {
               print "Creating  $dir.\n";
          }
          system ("mkdir -p $dir");
          system ("/bin/chmod 755 $dir");
     }

     if (( ! -d $dir ) || ( ! -w  $dir ) ) {
          if ($error_logging) {
               err_log (5, "acquire", "${nymd}","${expid}","99.1",
               {'err_desc' => "$0:FATAL ERROR  - can not write to $dir.",
                'log_name' => "$error_log" } );
          }
          print "FATAL ERROR Could not write to $dir.\n";
          return(1);      #failure
     } else {
          if ($verbose) {
               print "Using  $dir. \n";
          }
     }
     return(0);           #success
}


#......................................................................

#
# get_remote_list:
# Uses the specified remote shell command to obtain a list of the files
# matching the wildcards in the tokenized file name.
#

sub get_remote_list {

     my ( $remote ) = @_;
     my $file_name ='';
     my $full_name ='';
     my @remote_list = '';
     my ($remote_user, $remote_machine, $remote_file ) = splitrfile($remote);
     my $listed = 0;
     undef @return_list;
     foreach $test (@listed_files){
         if ( $test eq $remote_file ){ $listed = 1;}
     }
     if (! $listed ){   
           print "GET REMOTE LIST for $remote_file\n"; 
           @remote_list = rflist( "${remote}" );
           $error_code = $?;
           if (! $error_code ) {
                 chomp(@remote_list);
                 foreach $file_name (@remote_list) {
                     $full_name = "$remote_user"."@"."$remote_machine".":"."$file_name";
                     push @return_list, $full_name;
                     push @listed_files, $remote_file;
                 }
                 return (@return_list);
           }else{
                 print "NO REMOTE MATCHES\n" if $verbose;
                 return;
          }
     }else{
            return;
     }
}
#......................................................................
#
# read_rc:
#  Gets a list of tokenized file names from the specified rc file
#

sub read_rc {

     my ( $rcfile ) = @_;
     my (@arr,  $oldname, $stdname, $line, $count);
     @name_STD = ();         #standard name array( = . if undefined)
     unless ( open( RCFILE, $rcfile ) ) {
         print  "(acquire) ERROR: Configuration file '", $rcfile, "' can not be opened.\n";
         exit (1);
      }

      $rc_file_list = "";
      $count = 0;
      while ( <RCFILE> ) {
         chomp;
         if ((! /^#/ )&&(/.{1,}/))  {             # if not a comment and not a blank line
             $_ =~ tr/\t/ /s;   $_ =~ s/ //g;     # remove all white spaces
             @arr = split ("=>", $_);
             if(scalar(@arr) <= 1) {              # input acq file doesn't have stdname
               $stdname = ".";
               $oldname = $arr[0];
             } else {
               $stdname = $arr[1];                # stdname (with % and no path)
               $oldname = $arr[0];                # in machine+path+oldname (%) format
             } 
             chomp( $line = $oldname );           # remove any return characters
             ($line = $line) =~ s/^ *(.*) *$/$1/; # compress any leading, trailing
             ($line = $line) =~ s/\s+/ /g;        # and multiple embedded spaces
             $rc_file_list[$count] =  $line;
             $name_STD[$count] = $stdname;
             $count ++;
         }
      }
      close( RCFILE );

      return (@rc_file_list);
}

#......................................................................
#
# Makes a list of all possible file names for a particular date/time group.
#

    sub make_dtg_list {

    my ($nymd, $nhms ,$list ) = @_;
    undef @resolved_files;
    my $file;
    foreach $token_file (@$list){
         $file = token_resolve($token_file, $nymd, $nhms);
         if ( ($token_file =~ /%c/) || ($token_file =~ /%n/) ){
               push @resolved_files, get_remote_list( $file );
         }else{
               push @resolved_files, $file;
         }
    }
    return (@resolved_files);
}

#......................................................................
# will  mv oldname to the stdname after oldName file copied.
# oldlist is userId+machine+path+oldName.
# stdlist is oldName stdName pair.
sub rname {
    my ($oldlist, $stdlist) = @_;
    my (@a, $rcd, $oldname, $stdname, $old, $m, $n, $k, $l);
    $m = $l = 0;
# go through the oldlist
    for ($n = 0; $n < scalar(@$oldlist); $n++) {
      $k = rindex($$oldlist[$n], "/");
      $oldname = substr($$oldlist[$n], $k+1);
# go through the stdlist for the matching pair
      if(-e $oldname) {  # if oldname exist
        $l++;
        foreach $rcd (@$stdlist) {
          if(($rcd =~ /$oldname/) && (-e $oldname) ) {  # found
            ($old, $stdname) = split(/ /, $rcd);
# skip rm and ln if stdname is undefined 
            if(length($stdname) <= 3) {last;} # found but undefined
# Check for .gz 
            if(($oldname =~ /\.gz$/) && !($stdname  =~ /\.gz$/) ) {
              $stdname .= '.gz';
            } 
            if(!($oldname =~ /\.gz$/) && ($stdname  =~ /\.gz$/) ) {
              @a = split(".gz", $stdname);
              $stdname = $a[0];
            }
            if("$stdname" ne "$oldname") {
                if (-e $stdname) { unlink $stdname };
                rename $oldname, $stdname;
            }
            $m++;
            last;
          }
        }   #end of foreach $rcd
      }    #end of if(-e $oldname) {  
    }
    if($verbose) {  print(
      "Rename = $m and actual files = $l\n");  # m = l if every file found
    }
    return(0);
}

#......................................................................
#
# will return with old fileName and standard fileName pair
# $file  old format of fileName (with ymd no machine no path)
# $std   standard format (in % format no machine no path)
# Note: need to be updated if new std pattern added.
 
sub make_dtg_listSTD {

    my ($nymd, $nhms, $file, $std) = @_;
    my ($name_std, $sub1, @ar);

# Special pattern (updated for new std pattern):
#-----------------------------------------------
# ssmi_wentz_tpw pattern:   .f??. for std .f%c%c.
    if ( $std =~ /.f%c%c./ ) {
      $file =~ /.f(\d{2})./;
      $sub1 = $1; 
      $std =~ s/%c%c/$sub1/g;
    }
# tovs_l1c pattern:   *.n?. for std .$1_l1c.$2.
    if ($std =~ /.\$1_l1c.\$2./ ) {
      @ar = split(/\./, $file); 
      $std =~ s/\$1/$ar[0]/;  #first field (instrument name) for $1
      $std =~ s/\$2/$ar[1]/;  #character (sat) from 2nd field
    }
    $std = token_resolve( $std, $nymd, $nhms );
# set up return pair
#-------------------
    $name_std = $file . " " . $std; 
    return ($name_std);
}

#......................................................................

#
# make_dtg:
# Writes an array of all date:time pairs for a given starting point and interval.
#

sub make_dtg {

    my ( $dtg_pair, $increment, $time_step , $l_ahead ) = @_;
    my ($nymd, $nhms) = split /:/, $dtg_pair;
    undef @dtg_array ;
    my $counter = 0;
    my $hh = int($increment/10000);
    my $mm =  ( $increment % 10000 ) / 100;
    my $num_seconds =  (60 * $mm) + (3600 * $hh) ;
    my  @new_dtg = ( $nymd, $nhms );
    my  $new_pair = $new_dtg[0].":".$new_dtg[1];
    print "CURRENT DATE LIST: " if ( $verbose && !$l_ahead );
    print "FUTURE  DATE LIST: " if ( $verbose &&  $l_ahead );
    while ( $counter < $time_step ){
           push @dtg_array , $new_pair;
           $nymd = $new_dtg[0];
           $nhms = $new_dtg[1];
           print "$nymd/$nhms " if ( $verbose );
           @new_dtg = (tick( $nymd, $nhms, $num_seconds ) );
           $new_pair = $new_dtg[0].":".$new_dtg[1];
           $counter ++;
    }
   print "\n" if ( $verbose );

#   Save this for look-ahead feature
    $next_nymd = $new_dtg[0];
    $next_nhms = $new_dtg[1];

    return (@dtg_array);

}
#.......................................................................

#
# sort_unique:
# Sorts an array in ascii order and removes any duplicate entries.
#
sub sort_unique {

     my ( $raw_files ) = @_;
     undef %saw;
     my @unique = grep(!$saw{$_}++, @$raw_files);
     my @sorted = sort { $a cmp $b } @unique;

     return  (@sorted);
}

#.......................................................................

#
# make_list:
# Returns a sorted unique list of all possible file & date/time combinations
# from given lists of tokenized file names and dtg pairs.
# 


sub make_list {
     
     my ( $local_dtg , $file_list ) = @_;
     undef @raw_list;
     foreach my $dtg ( @$local_dtg ){
         my  @fields = split /:/, $dtg;
         my  @dtg_segment = make_dtg_list( $fields[0] , $fields[1] , \@$file_list );
         @raw_list = ( @raw_list, @dtg_segment);
     }
     my @sorted_list = sort_unique(\@raw_list);
     chomp (@sorted_list);
     return (@sorted_list);
}

#.......................................................................
#
# make_listSTD:
# Returns a sorted unique list of all possible file & date/time combinations
# from given lists of tokenized file names and dtg pairs and an unsorted
# not unique list of oldName stdName (ymd) pair (no userId no machine no path)
# Note: the second returned array may be longer (more records) than the
#       first one which is a sorted unique list.

sub make_listSTD {

     my ( $local_dtg, $file_list, $std_list ) = @_;
     undef @raw_list;
     my ($name, @std, $name_std, $m, $n, $m2, $x, @z, @sorted_list);
     $m2 = $n = 0;

     foreach my $dtg ( @$local_dtg ){   # dtg in yyyymmdd:hhmmss format
       my  @fields = split /:/, $dtg;   # yyyymmdd and hhmmss
       for ($m = 0; $m < scalar(@$file_list); $m++) {  
#original filename $z[0] is machine+path+filename (% form)
#            dtg_segment is userID+machine+path+name (ymd)
#standard filename $$std_list is  stdName (%form no machine no path) 
#               name_std[$m2] is oldName, stdName (ymd) pair (no machine no path)
         $z[0] = $$file_list[$m];  #machine+path+filename(%)
         my  @dtg_segment = make_dtg_list( $fields[0], $fields[1], \@z );
         for ($n = 0; $n < scalar(@dtg_segment); $n++) {
           $k = rindex($dtg_segment[$n], "/");
           $name = substr($dtg_segment[$n], $k+1);
           $name_std[$m2] = make_dtg_listSTD($fields[0],$fields[1],$name,$$std_list[$m]);
           $m2++;
         }
         @raw_list = ( @raw_list, @dtg_segment);
       }
     }
     my @sorted_list = sort_unique(\@raw_list);
     chomp (@sorted_list);
     return (\@sorted_list, \@name_std);
}

#.......................................................................

#
# long_fn:
# Concatenates remote file paths and names substituting "_" for "/" and "%" for ":"
# to create unique long file names for local files.
#

sub long_fn {
     my ( $file ) = @_;
     ($file = $file) =~ s?/?_?g;
     ($file = $file) =~ s/:/%/g;

     return ($file);
}
#.......................................................................

sub usage {

   print <<"EOF";

NAME
     acquire - Retrives files from mass storage with look-ahead capability

SYNOPSIS

     acquire [...options...]  bymd bhms ihms nstep

DESCRIPTION

     Acquire is a general purpose utility for retrieving files from
     mass storage with a look-ahead spooling capability. The full path
     names of the files to be retrieved are specified in a resource file
     (see RESOURCE FILE below) by means of GrADS-like templates. For
     example, a resource file could specify a file name of the form:

      gatun:/silo3/dao_ops/conv+tovs/ods/Y%y4/M%m2/r4ods.t%y4%m2%d2_%h2z

     The following parameters are required on input:

     bymd    beginning year-month-day, e.g., 19980101
     bhms    beginning hour-min-sec, e.g., 120000
     ihms    time step increment in hour-min-sec format
     nstep   number of timesteps for time looping.

     For example,

         acquire 19980107 120000 060000 6

     will generate the followings pairs of date/time

          nymd       nhms
         -------    -------
         19980107   120000
         19980107   180000
         19980108   000000
         19980108   060000
         19980108   120000
         19980108   180000

    For each one of these dates/time, acquire will expand the
    GrADS-like template and retrieve the corresponding files from
    mass storage. For the examples above, the files to be retrieved
    are:

    gatun:/silo3/dao_ops/conv+tovs/ods/Y1998/M01/r4ods.t19980107_12z
    gatun:/silo3/dao_ops/conv+tovs/ods/Y1998/M01/r4ods.t19980107_18z
    gatun:/silo3/dao_ops/conv+tovs/ods/Y1998/M01/r4ods.t19980108_00z
    gatun:/silo3/dao_ops/conv+tovs/ods/Y1998/M01/r4ods.t19980108_06z
    gatun:/silo3/dao_ops/conv+tovs/ods/Y1998/M01/r4ods.t19980108_12z
    gatun:/silo3/dao_ops/conv+tovs/ods/Y1998/M01/r4ods.t19980108_18z

    Upon transfer from mass storage, these files are made available
    at the user local directory (see "-d" option).

    The look-ahead capability allows the user to retrieve files for
    future use. For example,

         acquire -la 4 19980107 120000 060000 6

    will generate 4 additional date/time pairs:

          nymd       nhms
         -------    -------
         19980109   000000
         19980109   060000
         19980109   120000
         19980109   180000

   and transfer the files
    gatun:/silo3/dao_ops/conv+tovs/ods/Y1998/M01/r4ods.t19980109_00z
    gatun:/silo3/dao_ops/conv+tovs/ods/Y1998/M01/r4ods.t19980109_06z
    gatun:/silo3/dao_ops/conv+tovs/ods/Y1998/M01/r4ods.t19980109_12z
    gatun:/silo3/dao_ops/conv+tovs/ods/Y1998/M01/r4ods.t19980109_18z

   to a spool directory. Next time the user issues an "acquire" to
   retrieve these files, they will already be available locally.
   For this "look-ahead" transfer acquire forks itself, performing
   this task in the background.

   Acquire is also smart enough to avoid retrieving the same file
   more than once. If in the example above we replace the file name
   template with

     gatun:/silo3/dao_ops/conv+tovs/ods/Y%y4/M%m2/r4ods.t%y4%m2%d2

   only the files

    gatun:/silo3/dao_ops/conv+tovs/ods/Y1998/M01/r4ods.t19980107
    gatun:/silo3/dao_ops/conv+tovs/ods/Y1998/M01/r4ods.t19980108

   would be retrieved.

OPTIONS

     -cp           when files are to be preserved in spool directory
                    (see -p) a copy is made to the user destination directory
                    (the default is to make a symlink)
     -d path       destination (local) directory (default: ./)
     -dmc          dmget command to use on remote system, default 
                    is /usr/bin/dmget
     -e logfile    name of error log file. (default: no logging enabled)
     -f            forces remote copy whether the file exists locally or not
     -h            prints this page
     -lfn          use long file names in spool directory to
                    prevent file name conflicts
     -la lstep     Look ahead lstep time steps, that is, future data
                    can be pre-fetched and kept in spool directory
                    (default: lstep=0)
     -li hhmmss    interval to add to initial date and time before looking ahead
                     useful if there are data you want to skip over.  (default: 000000)
     -nd           remote file system is not DMF managed, do not use dmget
     -ignore_0     ignore zero length data files. 
     -p            preserve files in spool directory, simply making
                    a symlink() or copy to the local directory (see -cp); 
                    this is useful if the file is expected to be reused. 
                    In such cases one should monitor disk usage in the 
                    spool directory.
     -rc fname     resource file name (default: acquire.rc)
     -ru rem_user  user name on remote machine if different than local machine
                     (default:\$USER)
     -rt num_retry number of time to attempt transefers (default: 1)
     -s path       spool directory (default: /scratch1/\$USER/spool)
     -strict       returns non-zero exit code if not all files 
                    specified in the rc file are acquired
     -v            verbose mode (default is real quiet)


RESOURCE FILES

     Acquire resource files consist of comment lines starting with
     '#' or a remote file name templates of the form

           rhost:path

     This is better explained with an example:

     helios1:/u/joiner/tovs/l1b/%y4%m2/l1c_new/hirs.nj.hdf.t%y4%m2%d2

     In this example, the remote host (rhost) is "helios1".
     This file name "template" is a GrADS like pattern for matching
     remote file names (that is, files to be retrieved).
     Supported tokens are:

          %y4       year,  e.g., 1997
          %y2       year,  e.g., 97
          %m2       month, e.g., 12
          %d2       day,   e.g., 31
          %h2       hour,  e.g., 18
          %c        matches any character [?]
          %n        matches any single digit [0-9]

    In addition to GrADS-like tokens, any defined environment variable
    can be used in the remote file name template. For example,

     \${MHOST}:/u/joiner/tovs/l1b/%y4%m2/l1c_new/hirs.nj.hdf.t%y4%m2%d2

   would take the remote host name from the environment variable \$MHOST.

   In addition, a remote user can be specified using user\@machine

     dao_user\@gatun:/silo3/dao_ops/conv+tovs/ods/Y%y4/M%m2/r4ods.t%y4%m2%d2


ADDITIONAL FILES

     Acquire requires the libraries found in:
     Remote_utils.pm
     Extract_config.pm
     Err_Log.pm
    
     and it uses the remote shell specified in:
     Run_Config

SEE ALSO

    pesto - (P)ut (E)xperiments in Mass (Sto)rage

AUTHORS

     Tommy Owens (towens\@dao.gsfc.nasa.gov)
     Arlindo da Silva (dasilva\@dao.gsfc.nasa.gov)
     Rob Lucchesi (rob\@dao.gsfc.nasa.gov)

EOF

  exit(1)

}