#!/usr/bin/env perl # +-======-+ # Copyright (c) 2003-2007 United States Government as represented by # the Admistrator of the National Aeronautics and Space Administration. # All Rights Reserved. # # THIS OPEN SOURCE AGREEMENT ("AGREEMENT") DEFINES THE RIGHTS OF USE, # REPRODUCTION, DISTRIBUTION, MODIFICATION AND REDISTRIBUTION OF CERTAIN # COMPUTER SOFTWARE ORIGINALLY RELEASED BY THE UNITED STATES GOVERNMENT AS # REPRESENTED BY THE GOVERNMENT AGENCY LISTED BELOW ("GOVERNMENT AGENCY"). # THE UNITED STATES GOVERNMENT, AS REPRESENTED BY GOVERNMENT AGENCY, IS AN # INTENDED THIRD-PARTY BENEFICIARY OF ALL SUBSEQUENT DISTRIBUTIONS OR # REDISTRIBUTIONS OF THE SUBJECT SOFTWARE. ANYONE WHO USES, REPRODUCES, # DISTRIBUTES, MODIFIES OR REDISTRIBUTES THE SUBJECT SOFTWARE, AS DEFINED # HEREIN, OR ANY PART THEREOF, IS, BY THAT ACTION, ACCEPTING IN FULL THE # RESPONSIBILITIES AND OBLIGATIONS CONTAINED IN THIS AGREEMENT. # # Government Agency: National Aeronautics and Space Administration # Government Agency Original Software Designation: GSC-15354-1 # Government Agency Original Software Title: GEOS-5 GCM Modeling Software # User Registration Requested. Please Visit http://opensource.gsfc.nasa.gov # Government Agency Point of Contact for Original Software: # Dale Hithon, SRA Assistant, (301) 286-2691 # # +-======-+ # # Pesto - (P)ut (E)xperiment in Mass [Sto]rage # # !REVISION HISTORY: # # 26Dec1999 da Silva Initial code. # 27Dec1999 da Silva Added "-d" option # 13jan1999 da Silva Changed: pesto.rc -> pesto.arc, etc # 29jan2000 da Silva Changed MHOST --> PESTOROOT # 18Feb2000 da Silva Added "-clean" option # 03May2000 da Silva Added support for %n (number) and %c (character). # 02Nov2000 da Silva Made scp/ssh defaults # 22Aug2001 Owens Added support for processing data only for supplied date # 18Oct2001 Owens Added support for processing data only for supplied synoptic time # 17May2002 Owens Added support for non-zero exit status # 20May2003 Owens port to Perl v5.6.0 # 01Dec2006 Owens parallel transfers for multiple pesto.arc matches #------------------------------------------------------------------------ BEGIN{ $VERSION = "Version 1.0.beta.3 of 18Feb2000"; # Standard modules (local modules defined below) # ---------------- use Env; # make env vars available use Cwd; # make env vars available use FindBin; # so we can find where this script resides use File::Basename; # for basename(), dirname() use File::Path; # for mkpath use File::Copy; # for copy() and move() use Getopt::Long; # extended getopt function my %dirlist; # Default values # --------------- $myname = basename($0); $clean = 0; $date = ''; $die_away = 0; $help1 = 0; $help2 = 0; $dryrun = 0; $local_dir = $workdir = cwd(); $dirmode = 755; $mode = 644; $syntime = ''; $verbose = "0"; $local = "0"; $dolinks = "0"; GetOptions ('arc=s' => \$pestorc, 'clean' => \$clean, 'd=s' => \$local_dir, 'date=i' => \$date, 'fvhome=s' => \$fvhome, 'fvroot=s' => \$fvroot, 'mode=s' => \$mode, 'r=s' => \$pestoroot, 'h'=> \$help1, 'help' => \$help2, 'l' => \$local, 'ln' => \$dolinks, 'n' => \$dryrun, 'syntime=s' => \$syntime, 'verbose' => \$verbose, 'cp' => \$null, 'rcp' => \$null, 'rsh' => \$null, 'srb' => \$srb, 'expid=s' => \$expid ); if ( !defined $expid ) { if ( exists $ENV{'EXPID'} ) { $expid = $ENV{'EXPID'}; }else{ print "($myname) ERROR: No -expid or EXPID is set.\n\n"; $die_away = 1; } } if ( !defined $pestorc ) { if ( exists $ENV{'PESTOARC'} ) { $pestorc = $ENV{'PESTOARC'}; }else{ print "($myname) ERROR: No -arc or PESTOARC is set.\n\n"; $die_away = 1; } } if ( !defined $pestoroot ) { if ( exists $ENV{'PESTOROOT'} ) { $pestoroot = $ENV{'PESTOROOT'}; }else{ print "($myname) ERROR: No -r or PESTOROOT set.\n\n"; $die_away = 1; } } if ( !defined $fvhome ) { if ( exists $ENV{'FVHOME'} ) { $fvhome = $ENV{'FVHOME'}; }else{ $fvhome = "null"; } } if ( !defined $fvroot ) { if ( exists $ENV{'FVROOT'} ) { $fvroot = $ENV{'FVROOT'}; }else{ $fvroot = "NONE"; } } #-Set the path to be searched for required programs. $PROGRAM_PATH = $FindBin::Bin; @SEARCH_PATH = ( "${fvhome}/run", "${PROGRAM_PATH}", "${fvroot}/bin" ); if ( exists $ENV{'PATH'} ) { $ENV{'PATH'} = join( ':', @SEARCH_PATH, $ENV{'PATH'} ); }else{ $ENV{'PATH'} = join( ':', @SEARCH_PATH ); } $ENV{'PATH'} = join( ':', $ENV{'PATH'}, $ENV{'PBS_O_PATH'} ) if ( exists $ENV{'PBS_O_PATH'} ); }#End BEGIN block print "\@SEARCH_PATH = @SEARCH_PATH\n" if ( $verbose ); use lib ( @SEARCH_PATH ); # DAO Perl libraries # -------------- use Remote_utils; # DAO file transfer routines use Manipulate_time; # For token resolve # Prevent the accumulation of zombies if the parent fails to wait. # See www.perldoc.com/perl5.6.1/pod/func/fork.html $SIG{CHLD} = 'IGNORE'; # Record current environment # -------------------------- # system("/sbin/env") if ( $verbose ); if ( "$die_away" || "$help1" || "$help2" ) { warn "($0) errors encountered BEGIN block\n";usage();} if ( $verbose ) { print "\$verbose = $verbose\n"; print "\$pestorc = $pestorc\n"; print "\$clean = $clean\n"; print "\$pestoroot = $pestoroot\n"; print "\$date = $date\n"; print "\$dryrun = $dryrun\n"; print "\$local_dir = $local_dir\n"; print "\$mode = $mode\n"; print "\$syntime = $syntime\n"; print "\$verbose = $verbose\n"; print "\$workdir = $workdir\n"; } #-Change to local directory if different from cwd if ( $local_dir ne $workdir ) { chdir $local_dir or die "($0) FATAL ERROR: Unable to chdir to $local_dir\n"; } #-Check for .no_archiving file if ( -e ".no_archiving" ) { print ".no_archiving file detected - Nothing to do here\n"; exit 0; } #-Look for arc file if (! -e $pestorc ) { $pestorc_test = "$workdir/".basename($pestorc); if (! -e $pestorc_test ) { die "($myname): Unable to locate $pestorc or $pestorc_test\n"; } $pestorc = $pestorc_test; } # Reset PESTOROOT environment variable # ------------------------------------ $ENV{'PESTOROOT'} = $pestoroot; print "PESTOROOT = $ENV{'PESTOROOT'}\n" if ( $verbose ); # Echo input parameters # --------------------- if ( $verbose ){ print << "EOD"; Pesto - (P)ut (E)xperiment in Mass [Sto]rage Resource file name: $pestorc Archiving Destination: $pestoroot Local directory: $local_dir File mode: $mode Resource file contents: .............................................................. EOD } open( RCFILE, $pestorc ) or die "($myname) ERROR: Configuration file '", $pestorc, "' can not be opened.\n"; while ( ){ print $_ if ( $verbose ); chomp; push( @url_list, $_ ) if ((! /^#/ )&&(/.{1,}/)); } close( RCFILE ); print "\n . . . . . END of RC FILE . . . . . \n\n" if ( $verbose ); # # Process list of URLs # -------------------- foreach $url ( @url_list ){ ( $url = $url ) =~ ( s/\${PESTOROOT}/$pestoroot\//g ); ( $ruser, $rhost, $file_path ) = splitrfile ( $url ); print "($myname):Processing $url\n"; $file = basename($file_path); $dir = dirname( $file_path); $regexp = token_resolve($file, $date, $syntime); print "($myname):Regular expression to match = $regexp\n"; @matchfiles = glob("$regexp") or print "($myname):No match\n"; @files = grep (/^${expid}./, @matchfiles); if(($#files == 0)&&((! -l $files[0])||($dolinks))){ print "Single file case $files[0]\n"; &mkdir($verbose, $dir, $dirmode, $files[0], $mode, $myname, $file, $rhost, $ruser); &pastina($verbose, $clean, $dryrun, $dir, $dirmode, $files[0], $mode, $myname, $file, $rhost, $ruser); } if ( $#files > 0 ) { $found = $#files + 1; print "Multiple matches: $found files \n"; $counter = 0; foreach $filename ( @files ){ # check if file is a link if ((! -l $filename )||($dolinks)){ $rc=&mkdir($verbose, $dir, $dirmode, $files[$counter], $mode, $myname, $file, $rhost, $ruser); print "(pesto): mkdir result = $rc\n"; die "error creating directory " if ( !$rc ); # prevent scp blocking by host sleep(1) if (! $local ); sleep(4) if ( ($counter) && ( ($counter % 4) == 0 ) && (! $local ) ); print "Forking $filename\n"; $counter ++; $pid = fork; die "Can't fork!\n" if (!defined $pid); if ($pid == 0) { # child (clone) process &pastina($verbose, $clean, $dryrun, $dir, $dirmode, $filename, $mode, $myname, $file, $rhost, $ruser); exit(0); }#End child }#End link check }#End foreach # -------- Wait for spawned shells to complete print "Parent waiting for children to complete\n"; 1 while (wait() != -1); } if ( -e "${local_dir}/PESTO_FAILURE" ){ warn "pesto failed\n"; print "PESTO_FAILURE\n"; open( LISTING, "<${local_dir}/PESTO_FAILURE" ) or warn "cannot read ${local_dir}/PESTO_FAILURE\n"; while ( ) { $line =$_; print "PESTO_FAILURE: $line\n"; } print "PESTO_FAILURE\n"; close(LISTING); unlink( "${local_dir}/PESTO_FAILURE"); die "${local_dir}/PESTO_FAILURE\n"; } } exit(0); # ----------------- sub mkdir{ my ($verbose,$dir,$dirmode,$filename,$mode,$myname,$file,$rhost,$ruser) = @_; my $pasta_out = `pasta $filename $file`; my ( $y4val, $y2val, $m2val, $m3val, $d2val, $h2val, $sval, $n2val ) = split / /,$pasta_out; $d2val = "01" if ( $d2val eq "dy" ); $dir_date = "$y4val"."$m2val"."$d2val"; $dir_time = 0; $dir_time = "$h2val" if ( "$h2val" ne "hr" ); ( $dir = $dir ) =~ ( s/%s/$sval/g ); if ( $dir =~ /%/ ) { $archive_dir = token_resolve( $dir, $dir_date, $dir_time); }else{ $archive_dir = $dir; } unless ( $dirlist{$archive_dir} ) { print "(pesto): $archive_dir not found - Creating\n"; print "($myname): Doing mkdir of $archive_dir\n" if ( $verbose ); $mkdir_rc=mkdir_remote( ${ruser},${rhost},${archive_dir}, {'debug' => $verbose}); print "($myname): mkdir_rc=${mkdir_rc} Error doing mkdir of $archive_dir\n" if ( ! $mkdir_rc ); $chmod_rc=chmod_remote( ${ruser},${rhost},${archive_dir},${dirmode},{ 'recursive' => 0, 'debug' => $verbose}); print "($myname): chmod_rc=${chmod_rc} Error doing chmod on $archive_dir\n" if ( $chmod_rc ); $dirlist{$archive_dir} = 1; } else{ $mkdir_rc = 1; } return ($mkdir_rc); } sub pastina{ my ($verbose,$clean,$dryrun,$dir,$dirmode,$filename,$mode,$myname,$file,$rhost,$ruser) = @_; local $SIG{CHLD} = 'DEFAULT'; my $pasta_out = `pasta $filename $file`; my ( $y4val, $y2val, $m2val, $m3val, $d2val, $h2val, $sval, $n2val ) = split / /,$pasta_out; $d2val = "01" if ( $d2val eq "dy" ); $dir_date = "$y4val"."$m2val"."$d2val"; $dir_time = 0; $dir_time = "$h2val" if ( "$h2val" ne "hr" ); ( $dir = $dir ) =~ ( s/%s/$sval/g ); if ( $dir =~ /%/ ) { $archive_dir = token_resolve( $dir, $dir_date, $dir_time); }else{ $archive_dir = $dir; } if ( $dryrun ) { print "\n($myname):Will archive ${filename} to ${ruser},${rhost},$archive_dir\n"; }else{ print "Doing rput of ${filename} to ${ruser}\@${rhost}:${archive_dir}\n"; $rput_rc=rput( "${filename}", "${ruser}\@${rhost}:${archive_dir}/${filename}", {'direct' => '1', 'clean' => $clean, 'mode' => $mode, 'debug' => $verbose}); $error = $? >>8; if ( $error || (! $rput_rc) ){ open ( ERROR, "+>> ${local_dir}/PESTO_FAILURE" ) or die "cannot open ${local_dir}/PESTO_FAILURE\n"; print ERROR "FATAL ERROR REMOTE COPY STATUS for ${filename} = $rput_rc\n"; close ( ERROR ); print "($myname):rput error = $error\n"; warn "($myname):rput error = $error\n"; }else{ print "REMOTE COPY STATUS for ${filename} = $rput_rc\n"; $rc=unlink ${filename} if ( $rput_rc && $clean ); } } return(0); } # ----------------- sub usage { print << "EOF"; NAME Pesto - (P)ut (E)xperiment in Mass [Sto]rage SYNOPSIS pesto [-arc fname] [-d -ldir] [-h] [-n] [-r pestoroot] [-v] [-cp prog] [-rcp prog] [-rsh prog] [-m mode] DESCRIPTION Pesto is a general purpose script for archiving experiments on mass storage. It requires the utility "pasta" and a resource file (see RESOURCE FILE below). OPTIONS -arc fname archiving resource file name (default: $pestorc) -clean delete files after transfer -d ldir local directory for files being archived -date date date of files to process -h or -help displays this man page. -n dry-run mode, does not copy anything, just print what it would do -r pestoroot destination for archiving files (default: $pestoroot) -syntime time synoptic time of files to process -v verbose mode; default is real quiet -m mode destination files will have this mode (default: $mode); if "0", it does not change default file mode. RESOURCE FILE Pesto resource files consist of comment lines starting with '#' or a pesto archiving rule of the form: [pestoroot][remotedir]/template This is better explained with an example: \${PESTOROOT}%s/anal/%m3%y4/%s.prog.t%y4%m2%d2 \$PESTOROOT is an enviroment variables which can be redefined with the "-r" command line options; you can also use any other environment variable of your choice. For this particular example we have: pestoroot = \$PESTOROOT remotedir = %s/anal/%m3%y4 template = %s.prog.t%y4%m2%d2 The "template" is a GrADS like pattern for matching local file names (that is, files to be archived). Supported tokens are: %s experiment ID (a string followed by ".") %y4 year, e.g., 1997 %y2 year, e.g., 97 %m2 month, e.g., 12 %d2 day, e.g., 31 %h2 hour, e.g., 18 %n2 minute, e.g., 30 %n a single digit number: 0, 1, ..., 9 %c a single character Notice that there is no "%m3" allowed on input. The "%s" token is not part of the standard GrADS templates and stand for the experiment ID. So, we issue pesto -r dasilva\@gatun.gsfc.nasa.gov: and there is a file named "efake.prog.t19971231" in the local directory, pesto will generate the following archive (remote copy) command: $rcp efake.prog.t19971231 dasilva\@gatun:efake/anal/dec1997 Now, if you enter pesto -r /scratch1/dasilva/ pesto will generate the following archiving command: $cp efake.prog.t19971231 /scratch1/dasilva/efake/anal/dec1997 RESTRICTIONS 1. The local file name/template must contain enough information for deriving the remote directory name. 2. The token "%m3" is not allowed in the "template" but can appear in "remotedir". 3. The experiment id must be followed by a ".": template = %s.prog.t%y4%m2%d2 is OK template = %sprog.t%y4%m2%d2 is NOT ENVIRONMENT The following enviroment variables are recognized: PESTOARC default resource file name PESTOROOT default name for archiving destination VERSION $VERSION AUTHOR Arlindo da Silva (dasilva\@gsfc.nasa.gov), NASA/GSFC Tommy Owens (towens\@gmao.gsfc.nasa.gov), SAIC/GSFC EOF exit 1; }