Commit in java/sandbox/data-cat/src/main/python/hpsdatacat on MAIN
add_location.py+109added 653
add_metadata.py+92added 653
delete.py+24-8652 -> 653
find.py+47-48652 -> 653
register.py+45-81652 -> 653
util.py+110-9652 -> 653
+427-146
2 added + 4 modified, total 6 files
Check in working copy on data catalog scripts.

java/sandbox/data-cat/src/main/python/hpsdatacat
add_location.py added at 653
--- java/sandbox/data-cat/src/main/python/hpsdatacat/add_location.py	                        (rev 0)
+++ java/sandbox/data-cat/src/main/python/hpsdatacat/add_location.py	2014-06-02 23:50:37 UTC (rev 653)
@@ -0,0 +1,109 @@
+"""
+Wrapper for 'addLocation' command.
+
+The help from that command is the following:
+
+Usage: datacat addLocation [-options] <dataset name> <logical folder> <file path>
+
+parameters:
+  <dataset name>     Tag-name for the new dataset.
+  <logical folder>   Logical Folder Path under which the dataset lives.
+  <file path>        Location of file to add to Data Catalog.
+
+options:
+  --group <Dataset Group>   Dataset Group under which the dataset lives.
+  --site <Site=SLAC>        Site at which file exists on disk.  Defaults to SLAC if not specified.
+  --version <Version=-1>    Version ID of the dataset (Defaults to the Latest version if not specified.)
+
+Example command:
+
+python ./src/main/python/hpsdatacat/add_location.py -n hps_testrun_001351_recon -f \
+/HPS/testrun2012/data/recon -p /nfs/slac/g/hps3/data/datacat-test/data/hps_testrun_001351_test2.slcio \
+-s JLAB
+
+FIXME: Above command doesn't work right now!  
+  
+""" 
+
+from util import *
+
+__command = 'addLocation'
+
+# lowest level node in directory hierarchy
+group = get_default_group()
+
+# site
+site = get_default_site()
+
+parser = create_base_argparser(__command)
+parser.add_argument('-f', '--folder', help='folder where dataset is located')
+parser.add_argument('-n', '--name', help='dataset name')
+parser.add_argument('-p', '--path', help='physical file location')
+parser.add_argument('-g', '--group', help='dataset group')
+parser.add_argument('-s', '--site', help='dataset site')
+parser.add_argument('-v', '--version', help='dataset version')
+args = vars(parser.parse_args())
+
+connection_string, dry_run, mode = handle_standard_arguments(args)
+
+# connection    
+if connection_string == None:    
+    connection_string = get_ssh_connection_string()    
+    if connection_string == None:
+        raise Exception("Couldn't figure out a connection_string to use!")
+
+# folder arg (required)            
+if args['folder'] != None:
+    folder = args['folder']
+else:
+    raise Exception("The dataset folder is a required argument.")
+
+# dataset name arg (required)
+if args['name'] != None:
+    dataset_name = args['name']
+else:
+    raise Exception("The dataset name is a required argument.")
+
+# physical path arg (required)
+if args['path'] != None:
+    path = args['path']
+else:
+    raise Exception("The physical file path is a required argument.")
+
+# group arg (optional)
+if args['group'] != None:
+    group = args['group']
+
+# site arg (optional)    
+if args['site'] != None:
+    site = args['site']
+    check_valid_site(site)
+
+# version (optional)
+version = None    
+if args['version'] != None:
+    version = args['version']    
+
+# create base command line
+command_line = create_base_command_line(__command, connection_string, dry_run, mode)
+
+# append group
+if group != None:
+    command_line += ' --group %s' % group
+
+# append site    
+if site != None:
+    command_line += ' --site %s' % site
+    
+# append version    
+if version != None:    
+    command_line += ' --version %s' % version
+    
+# add dataset name, folder and physical path
+command_line += ' %s %s %s' % (dataset_name, folder, path)    
+
+# run the command
+lines, errors, return_value = run_process(command_line)
+
+# print results
+print_result(__command, return_value, errors)
\ No newline at end of file

java/sandbox/data-cat/src/main/python/hpsdatacat
add_metadata.py added at 653
--- java/sandbox/data-cat/src/main/python/hpsdatacat/add_metadata.py	                        (rev 0)
+++ java/sandbox/data-cat/src/main/python/hpsdatacat/add_metadata.py	2014-06-02 23:50:37 UTC (rev 653)
@@ -0,0 +1,92 @@
+"""
+Script for setting meta data on an existing dataset or group.
+This script cannot be currently used to set a new value on an 
+existing meta data field.
+"""
+
+import argparse
+
+from util import *
+
+# command this script will use
+__command = 'addMetaData'
+
+# create the argparser
+parser = create_base_argparser(__command)
+parser.add_argument('-f', '--folder', help='folder where dataset lives')
+parser.add_argument('-d', '--dataset', help='target dataset for meta data')
+parser.add_argument('-v', '--version', help='version ID of the dataset (defaults to latest)')
+parser.add_argument('-g', '--group', help='dataset group or group to tag when no dataset specified')
+parser.add_argument('-m', '--metadata', nargs='*', help='a single meta data definition')
+args = vars(parser.parse_args())
+
+# handle standard arguments
+connection_string, dry_run, mode = handle_standard_arguments(args)
+ 
+# connection string
+if connection_string == None:    
+    connection_string = get_ssh_connection_string()    
+    if connection_string == None:
+        raise Exception("Couldn't figure out a connection string to use!")
+ 
+# dataset
+if args['dataset'] != None:
+    dataset = args['dataset']
+else:
+    dataset = None    
+
+# group    
+if args['group'] != None:
+    group = args['group']
+else:
+    group = None    
+
+# dataset and/or group is required    
+if dataset == None and group == None:
+    raise Exception("A dataset or group is required.")
+
+# folder
+if args['folder'] != None:
+    folder = args['folder']
+else:
+    raise Exception("A folder is required.")    
+
+# version    
+if args['version'] != None:
+    version = args['version']
+else:
+    version = None    
+
+# metadata    
+if args['metadata'] == None:
+    raise Exception("At least one meta data definition is required.")            
+metadata = format_metadata(args['metadata'])
+if metadata == None:
+    raise Exception("Bad meta data definition.")    
+
+# create base command line
+command_line = create_base_command_line(__command, connection_string, dry_run, mode)
+
+# append dataset    
+if dataset != None:
+    command_line += ' --dataset %s' % dataset
+
+# append version    
+if version != None:
+    command_line.append += ' --version %s' % version
+
+# append group    
+if group != None:
+    command_line += ' --group %s' % group
+
+# append metadata    
+command_line += ' %s' % metadata
+
+# append folder
+command_line += ' %s' % folder
+
+# run the command
+lines, errors, return_value = run_process(command_line)
+
+# print end message
+print_result(__command, return_value, errors)
\ No newline at end of file

java/sandbox/data-cat/src/main/python/hpsdatacat
delete.py 652 -> 653
--- java/sandbox/data-cat/src/main/python/hpsdatacat/delete.py	2014-06-02 20:13:10 UTC (rev 652)
+++ java/sandbox/data-cat/src/main/python/hpsdatacat/delete.py	2014-06-02 23:50:37 UTC (rev 653)
@@ -7,29 +7,45 @@
 # import utility stuff from hpsdatacat
 from util import *
 
+__command = 'rm'
+
 # get connection_string string
 connection_string = get_ssh_connection_string()
 
 # get the command to use
-script_cmd = get_datacat_command( 'rm' )
+script_cmd = get_datacat_command(__command)
 
 # site
 site = get_default_site()
 
-parser = argparse.ArgumentParser(description='Register file in the data catalog')
+# command line parser
+parser = create_base_argparser(__command)
 parser.add_argument('-p', '--path', help='path to delete from the data catalog')
 args = vars(parser.parse_args())
 
+# handle the standard arguments
+connection_string, dry_run, mode = handle_standard_arguments(args)
+
+# connection string if not provided by command line
+if connection_string == None:
+    connection_string = get_ssh_connection_string()    
+    if connection_string == None:
+        raise Exception("Couldn't figure out a connection_string to use!")    
+
+# dataset path to delete
 if args['path'] == None:
     raise Exception('Missing path argument.')
 else:
     path = args['path']
+    
+# setup the command line with base options
+command_line = create_base_command_line(__command, connection_string, dry_run, mode)
 
-cmd = 'ssh %s %s --force %s' % (connection_string, script_cmd, path)
+# append this command's arguments
+command_line += ' --force %s' % path
 
-print "Executing command ..."
-print cmd
+# run command line
+lines, errors, return_value = run_process(command_line)
 
-lines, errors, return_value = run_process(cmd)
-
-print "return value: %s" % str(return_value) 
+# print the result
+print_result(__command, return_value, errors) 
\ No newline at end of file

java/sandbox/data-cat/src/main/python/hpsdatacat
find.py 652 -> 653
--- java/sandbox/data-cat/src/main/python/hpsdatacat/find.py	2014-06-02 20:13:10 UTC (rev 652)
+++ java/sandbox/data-cat/src/main/python/hpsdatacat/find.py	2014-06-02 23:50:37 UTC (rev 653)
@@ -12,12 +12,9 @@
 # import utility stuff from hpsdatacat
 from util import *
 
-# get connection_string string
-connection_string = get_ssh_connection_string()
+# data catalog command to be executed
+__command = 'find'
 
-# get the command to use
-script_cmd = get_datacat_command( 'find' )
-
 # default options for search command
 script_options = '--search-groups --recurse'
 
@@ -27,74 +24,76 @@
 # site
 site = get_default_site()
 
-#def escape_characters(raw_string):
-#    escaped_string = raw_string.replace('"', '\\"').replace(' ', '\\ ')
-#    return escaped_string 
-
-parser = argparse.ArgumentParser(description='Search for files in HPS data catalog')
+# command line parser
+parser = create_base_argparser(__command)
 parser.add_argument('-p', '--path', help='root path for search')
 parser.add_argument('-s', '--site', help='dataset site')
-parser.add_argument('-c', '--connection_string', help='SSH connection_string')
 parser.add_argument('-o', '--output', help='save results to output file')
 parser.add_argument('-q', '--query', help='data query for filtering results')
-
 args = vars(parser.parse_args())
+
+# get standard arguments
+connection_string, dry_run, mode = handle_standard_arguments(args)
     
+# folder for query    
 if args['path'] != None:
     path = args['path']
 else:
     path = default_path
-    
+
+# site    
 if args['site'] != None:
     site = args['site']
     if site != 'SLAC' and site != 'JLAB':
         raise Exception("Unrecognized site argument!")
     
-if args['connection_string'] != None:
-    connection_string = args['connection_string']
-    
+# connection string if not provided by command line
 if connection_string == None:
-    raise Exception("Could not determine connection_string string!")    
-    
-output = None
-if args['output'] != None:
-    output = args['output']
-    if os.path.isfile(output):
-        raise Exception('The output file already exists!')
-    
-query = ''    
+    connection_string = get_ssh_connection_string()    
+    if connection_string == None:
+        raise Exception("Couldn't figure out a connection_string to use!")    
+
+# meta data query            
+query = None
 if args['query'] != None:
-    #print args['query']
     query = '--filter \'%s\'' % args['query']
     query = escape_characters(query)
     print query
-                
-cmd = 'ssh %s %s %s --site %s %s %s' % (connection_string, script_cmd, script_options, site, query, path)
 
-print "Executing query ..."
-print cmd
+# setup the command line with base options
+command_line = create_base_command_line(__command, connection_string, dry_run, mode)
+               
+# add the standard options for this command                
+command_line += ' %s' % script_options 
 
-if output != None:
+# add site
+command_line += ' --site %s' % site
+
+# add meta data query if provided
+if query != None:
+    command_line += ' %s' % query
+    
+# add path
+command_line += ' %s' % path     
+ 
+# setup the output file if specified
+output = None
+if args['output'] != None:
+    output = args['output']
+    if os.path.isfile(output):
+        raise Exception('The output file already exists!')
     output_file = open(output, 'w')
 
-process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
-errors = []
-lines = []
-for line in process.stdout.readlines():
-    lines.append(line)
-    if 'Exception' in line:
-        errors.append(line)        
-        
-retval = process.wait()
+# run the command
+lines, errors, return_value = run_process(command_line)
 
-if (retval == 0 and len(errors) == 0):
+# print or save the output if command was successful
+if (return_value == 0 and len(errors) == 0):
     if output != None:
         for line in lines:
             output_file.write(line)
-        output_file.close()       
-    else:
-        for line in lines:
-            print line,
-else:
-    print 'The find command failed with errors!'
-    print errors
\ No newline at end of file
+        output_file.close()
+        print 'Output saved to file: %s' % output
+
+# print command result
+print_result(__command, return_value, errors, False)
\ No newline at end of file

java/sandbox/data-cat/src/main/python/hpsdatacat
register.py 652 -> 653
--- java/sandbox/data-cat/src/main/python/hpsdatacat/register.py	2014-06-02 20:13:10 UTC (rev 652)
+++ java/sandbox/data-cat/src/main/python/hpsdatacat/register.py	2014-06-02 23:50:37 UTC (rev 653)
@@ -8,45 +8,25 @@
 
 import argparse, os.path, subprocess, socket, getpass
 
-# path to script at SLAC
-script_cmd = '~srs/datacat/prod/datacat-hps registerDataset'
+from util import *
 
-# setup default connection_string
-connection_string = None
-domainname = socket.getfqdn()
-username = getpass.getuser()
-
-# FIXME: Make something like this work so JLAB users can connect to a generic account at SLAC.
-#if 'jlab' in domainname:
-#    if 'clashps' in username:
-#        connection_string = [log in to unmask]
-#elif 'slac' in domainname:
-if 'slac' in domainname:
-    connection_string = [log in to unmask] % getpass.getuser()
-
+__command = 'registerDataset'
+    
 # lowest level node in directory hierarchy
-group = 'HPS'
+group = get_default_group()
 
 # site
-site = 'SLAC'
+site = get_default_site()
 
-parser = argparse.ArgumentParser(description='Register file in HPS data catalog')
+parser = create_base_argparser(__command)
 parser.add_argument('-p', '--path', help='destination path in data catalog')
 parser.add_argument('-d', '--dataset', help='input physical dataset')
-parser.add_argument('-m', '--metadata', nargs='*', help='define meta data')
+parser.add_argument('-m', '--metadata', help='define a meta data field value with format key=value', action='append')
 parser.add_argument('-g', '--group', help='dataset group')
 parser.add_argument('-s', '--site', help='dataset site')
-parser.add_argument('-c', '--connection_string', help='SSH connection_string')
-parser.add_argument('-v', '--verbose', help='turn verbose mode on', action='store_true')
-parser.add_argument('-D', '--dry-run', help='dry run only', action='store_true')
 args = vars(parser.parse_args())
 
-verbose = False
-if args['verbose'] != None:
-    verbose = True    
-
-if verbose:
-    print args
+connection_string, dry_run, mode = handle_standard_arguments(args)
     
 if args['path'] != None:
     path = args['path']
@@ -58,66 +38,50 @@
 else:    
     raise Exception('dataset is required!')
 
-ext = os.path.splitext(dataset)[1][1:]
+file_extension = os.path.splitext(dataset)[1][1:]
 
 if args['group'] != None:
     group = args['group']
     
-metadata = ''
+metadata = None
+raw_metadata = args['metadata']
 if args['metadata'] != None:
-    print args['metadata']
-    for var in args['metadata']:
-        equals = var.find('=')
-        if (len(var) < 3 or equals < 0):
-            raise Exception("Bad meta data variable format!")
-        metadata += '--define %s ' % (var)
+    metadata = format_metadata(raw_metadata)
 
 if args['site'] != None:
     site = args['site']
-    if site != 'SLAC' and site != 'JLAB':
-        raise Exception("Unrecognized site!")
-    
-if args['connection_string'] != None:
-    connection_string = args['connection_string']
-    
-if connection_string == None:
-    raise Exception("Couldn't figure out a connection_string to use!")    
-    
-dry_run = False
-if args['dry_run']:
-    dry_run = True
-                
-cmd = 'ssh %s %s --group %s --site %s %s %s %s %s' % (connection_string, script_cmd, group, site, metadata, ext, path, dataset)
+    check_valid_site(site)
 
-if verbose:
-    print "Executing command ..."
-    print cmd 
+# Try to figure out a default connection string if none was supplied.    
+if connection_string == None:    
+    connection_string = get_ssh_connection_string()    
+    if connection_string == None:
+        raise Exception("Couldn't figure out a connection_string to use!")    
 
-if dry_run:
-    print 'Configured for dry run, so command will not be executed.'
-else:
-    process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
-    errors = []
-    for line in process.stdout.readlines():
-        if verbose:
-            print line,
-        if 'Exception' in line:
-            errors.append(line)              
-        
-    retval = process.wait()
-        
-    if len(errors) != 0 or retval != 0:
-        print 'The registration failed with errors!'
-        print errors            
-    else:
-        print 'Added data to catalog ...'
-        print '  dataset: %s' % dataset
-        print '  path: %s' % path
-        print '  group: %s' % group
-        print '  site: %s' % site
-        
-        #print '  metadata: ' % str(args['metadata'])
-    
-    print 'return value: %d' % retval
-    
-print 'Done!'    
\ No newline at end of file
+# create base command line
+command_line = create_base_command_line(__command, connection_string, dry_run, mode)
+
+# append group and site
+command_line += ' --group %s --site %s' % (group, site)
+
+# append meta data
+if metadata != None:
+    command_line += ' %s' % metadata    
+
+# append file type, path and dataset
+command_line += ' %s %s %s' % (file_extension, path, dataset)
+
+# run the command
+lines, errors, return_value = run_process(command_line)
+
+# print dataset information if command was successful
+if return_value == 0:
+    print 'Added dataset to catalog ...'
+    print '  dataset: %s' % dataset
+    print '  path: %s' % path
+    print '  group: %s' % group
+    print '  site: %s' % site
+    print '  metadata: %s' % str(raw_metadata)
+
+# print command result
+print_result(__command, return_value, errors)
\ No newline at end of file

java/sandbox/data-cat/src/main/python/hpsdatacat
util.py 652 -> 653
--- java/sandbox/data-cat/src/main/python/hpsdatacat/util.py	2014-06-02 20:13:10 UTC (rev 652)
+++ java/sandbox/data-cat/src/main/python/hpsdatacat/util.py	2014-06-02 23:50:37 UTC (rev 653)
@@ -1,17 +1,31 @@
-import getpass, socket, subprocess
+"""
+Utility function library for HPS data catalog wrapper scripts.
+"""
 
+# Python lib imports
+import getpass, socket, subprocess, argparse
+
 # location of data catalog script at SLAC
 __datacat_script = '~srs/datacat/prod/datacat-hps'
 
 # commands that have script wrappers for them
-__valid_commands = ( 'rm', 'registerDataset', 'addLocation', 'addMetaData', 'find' )
+__valid_commands = ('rm', 'registerDataset', 'addLocation', 'addMetaData', 'find')
 
+# valid mode settings
+__valid_modes = ('PROD', 'DEV', 'TEST')
+
 # default site for data catalog search
 __default_site = 'SLAC'
 
+# valid sites
+__valid_sites = ('SLAC', 'JLAB')
+
 # default base path for datacatalog search
 __default_search_path = '/HPS'
 
+# default dataset group
+__default_group = 'HPS'
+
 """
 Get the default site.
 """
@@ -23,6 +37,12 @@
 """
 def get_default_search_path():
     return __default_search_path
+
+"""
+Get the default dataset group.
+"""
+def get_default_group():
+    return __default_group
                      
 """
 Simple utility to return the full script command.
@@ -32,7 +52,6 @@
     if command not in __valid_commands:
         raise Exception("Unknown command: " % command)
     return '%s %s' % (__datacat_script, command)
-    
 
 """
 Get an SSH connection_string string for the SLAC or JLAB sites.
@@ -61,21 +80,25 @@
 """
 Run a process in a shell and return the output lines, errors, and return value (in that order). 
 """
-def run_process(cmd, printToScreen=True):
+def run_process(command, printToScreen=True):
     
-    process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+    if printToScreen:
+        print "Executing command ..."
+        print command
+    
+    process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
     errors = []
     lines = []
     for line in process.stdout.readlines():
-        if printToScreen == True:
+        if printToScreen:
             print line,            
         if 'Exception' in line:
             errors.append(line)
         lines.append(line)
         
-    retval = process.wait()
+    return_value = process.wait()
     
-    return lines, errors, retval
+    return lines, errors, return_value
 
 """
 Escape characters for the SSH command.
@@ -83,4 +106,82 @@
 """
 def escape_characters(raw_string):
     return raw_string.replace('"', '\\"').replace(' ', '\\ ').replace('&', '\\&')
-    
\ No newline at end of file
+
+"""
+Format meta data for a command from the command line arguments.
+This function will return None if raw_metadata is empty.
+"""
+def format_metadata(raw_metadata):
+    metadata = ''
+    for var in raw_metadata:
+        equals = var.find('=')
+        if (len(var) < 3 or equals < 0):
+            raise Exception("Bad meta data variable format.")
+        metadata += '--define %s ' % var
+    if len(metadata) == 0:
+        metadata = None
+    return metadata
+
+"""
+Create the basic argparser for data catalog commands.
+"""
+def create_base_argparser(command):
+    if command not in __valid_commands:
+        raise Exception("Unknown command: %s" % command)
+    parser = argparse.ArgumentParser(description='Execute the %s command on the data catalog' % command)
+    parser.add_argument('-D', '--dry-run', help='perform dry run only with no database commits', action='store_true')
+    parser.add_argument('-M', '--mode', help='set data source as PROD, DEV, or TEST')
+    parser.add_argument('-c', '--connection_string', help='SSH connection string in form user@host')
+    return parser
+
+"""
+Parse and return standard arguments from the base parser.
+"""
+def handle_standard_arguments(args):
+    
+    connection_string = None
+    if args['connection_string'] != None:
+        connection_string = args['connection_string']
+
+    dry_run = False
+    if args['dry_run'] == True:
+        dry_run = True
+        
+    mode = None
+    if args['mode'] != None:
+        mode = None
+
+    return connection_string, dry_run, mode
+
+"""
+Print the result of running the command.
+"""
+def print_result(command, return_value, errors, printSuccessful=True):
+    if return_value != 0 or len(errors) != 0:
+        print "Command %s returned with errors ..." % command
+        for error in errors:
+            print error
+    else:
+        if printSuccessful:
+            print "Command %s was successful!" % command
+    if (return_value != 0) or printSuccessful:
+        print "return_value: %s" % str(return_value)
+
+"""
+Create the basic SSH command.
+"""
+def create_base_command_line(command, connection_string, dry_run, mode):        
+    command_line = 'ssh %s' % (connection_string)
+    if mode != None:
+        command_line += ' %s' % mode
+    if dry_run:
+        command_line += ' --nocommit'
+    command_line += ' %s' % (get_datacat_command(command))
+    return command_line
+
+"""
+Check if site is valid.
+"""
+def check_valid_site(site):
+    if site not in __valid_sites:
+        raise Exception("Site is not valid: " + site)
\ No newline at end of file
SVNspam 0.1