Print

Print


Commit in java/sandbox/data-cat/src/main/python/hpsdatacat on MAIN
add_location.py+23-69665 -> 666
add_metadata.py+21-59665 -> 666
delete.py+7-32665 -> 666
extract_metadata.py+5-2665 -> 666
find.py+10-39665 -> 666
register.py+24-52665 -> 666
util.py+24-29665 -> 666
+114-282
7 modified files
Improve command line parsing and processing of arguments for all scripts.  Tag certain arguments as required so argparse will handle command line syntax.  Other minor improvements.  (All scripts tested with these changes.)

java/sandbox/data-cat/src/main/python/hpsdatacat
add_location.py 665 -> 666
--- java/sandbox/data-cat/src/main/python/hpsdatacat/add_location.py	2014-06-04 19:56:35 UTC (rev 665)
+++ java/sandbox/data-cat/src/main/python/hpsdatacat/add_location.py	2014-06-04 20:15:51 UTC (rev 666)
@@ -1,94 +1,48 @@
 #!/usr/bin/env python
 
 """
-Add an additional location for a dataset using the 'addLocation' command.
-
-Example command:
-
-python ./src/main/python/hpsdatacat/add_location.py -n hps_testrun_001351_recon -f \
-/HPS/testrun2012/data/recon -p /nfs/slac/g/hps3/data/datacat-test/data/hps_testrun_001351_test2.slcio \
--s JLAB
-
-FIXME: Above command doesn't work right now because JLAB isn't a valid site!  
-  
+Add an additional location for a dataset using the 'addLocation' command.  
 """ 
 
 from util import *
 
 __command = 'addLocation'
 
-# lowest level node in directory hierarchy
-group = get_default_group()
-
-# site
-site = get_default_site()
-
+# create the parser
 parser = create_base_argparser(__command)
-parser.add_argument('-f', '--folder', help='folder where dataset is located')
-parser.add_argument('-n', '--name', help='dataset name')
-parser.add_argument('-p', '--path', help='physical file location')
-parser.add_argument('-g', '--group', help='dataset group')
-parser.add_argument('-s', '--site', help='dataset site')
+parser.add_argument('-p', '--logical_path', help='logical_path in data catalog where dataset is located', required=True)
+parser.add_argument('-n', '--name', help='original dataset name with no dataset_name extension', required=True)
+parser.add_argument('-f', '--file', help='new physical file location', required=True)
+parser.add_argument('-g', '--group', help='dataset group', default=get_default_group())
+parser.add_argument('-s', '--site', help='new dataset site', default=get_default_site())
 parser.add_argument('-v', '--version', help='dataset version')
 args = vars(parser.parse_args())
 
-connection_string, dry_run, mode = handle_standard_arguments(args)
-
-# connection    
-if connection_string == None:    
-    connection_string = get_ssh_connection_string()    
-    if connection_string == None:
-        raise Exception("Couldn't figure out a connection_string to use!")
-
-# folder arg (required)            
-if args['folder'] != None:
-    folder = args['folder']
-else:
-    raise Exception("The dataset folder is a required argument.")
-
-# dataset name arg (required)
-if args['name'] != None:
-    dataset_name = args['name']
-else:
-    raise Exception("The dataset name is a required argument.")
-
-# physical path arg (required)
-if args['path'] != None:
-    path = args['path']
-else:
-    raise Exception("The physical file path is a required argument.")
-
-# group arg (optional)
-if args['group'] != None:
-    group = args['group']
-
-# site arg (optional)    
-if args['site'] != None:
-    site = args['site']
-    check_valid_site(site)
-
-# version (optional)
+# process command line arguments
+connection, dry_run, mode = handle_standard_arguments(args)
+if connection == None:    
+    connection = get_ssh_connection_string()    
+    if connection == None:
+        raise Exception("Couldn't figure out a connection to use!")
+logical_path = args['logical_path']
+dataset_name = args['name']
+dataset_name = args['file']
+group = args['group']
+site = args['site']
+check_valid_site(site)
 version = None    
 if args['version'] != None:
     version = args['version']    
 
-# create base command line
-command_line = create_base_command_line(__command, connection_string, dry_run, mode)
-
-# append group
+# build command line
+command_line = create_base_command_line(__command, connection, dry_run, mode)
 if group != None:
     command_line += ' --group %s' % group
-
-# append site    
 if site != None:
     command_line += ' --site %s' % site
-    
-# append version    
 if version != None:    
-    command_line += ' --version %s' % version
-    
-# add dataset name, folder and physical path
-command_line += ' %s %s %s' % (dataset_name, folder, path)    
+    command_line += ' --version %s' % version    
+command_line += ' %s %s %s' % (dataset_name, logical_path, dataset_name)
 
 # run the command
 lines, errors, return_value = run_process(command_line)

java/sandbox/data-cat/src/main/python/hpsdatacat
add_metadata.py 665 -> 666
--- java/sandbox/data-cat/src/main/python/hpsdatacat/add_metadata.py	2014-06-04 19:56:35 UTC (rev 665)
+++ java/sandbox/data-cat/src/main/python/hpsdatacat/add_metadata.py	2014-06-04 20:15:51 UTC (rev 666)
@@ -4,8 +4,6 @@
 Set meta data on an existing dataset or group using the 'addMetaData' command.
 """
 
-import argparse
-
 from util import *
 
 # command this script will use
@@ -13,78 +11,42 @@
 
 # create the argparser
 parser = create_base_argparser(__command)
-parser.add_argument('-f', '--folder', help='folder where local_file lives')
-parser.add_argument('-d', '--local_file', help='target local_file for meta data')
-parser.add_argument('-v', '--version', help='version ID of the local_file (defaults to latest)')
-parser.add_argument('-g', '--group', help='local_file group or group to tag when no local_file specified')
-parser.add_argument('-m', '--metadata', nargs='*', help='a single meta data definition')
+parser.add_argument('-p', '--path', help='logical folder in data catalog', required=True)
+parser.add_argument('-g', '--group', help='dataset group or group to tag when no physical file is specified', required=True)
+parser.add_argument('-d', '--define', nargs='*', help='define one field in key=value format', required=True)
+parser.add_argument('-n', '--name', help='dataset name')
+parser.add_argument('-v', '--version', help='version ID of the dataset (defaults to latest)')
 args = vars(parser.parse_args())
 
 # handle standard arguments
-connection_string, dry_run, mode = handle_standard_arguments(args)
- 
-# connection string
-if connection_string == None:    
-    connection_string = get_ssh_connection_string()    
-    if connection_string == None:
-        raise Exception("Couldn't figure out a connection string to use!")
- 
-# local_file
-if args['local_file'] != None:
-    local_file = args['local_file']
-else:
-    local_file = None    
+connection, dry_run, mode = handle_standard_arguments(args)
+  
+# dataset_name
+dataset_name = args['name']
+group = args['group']    
+if dataset_name == None and group == None:
+    raise Exception("A dataset name or a group is required.")
+logical_path = args['path']
+version = args['version']
 
-# group    
-if args['group'] != None:
-    group = args['group']
-else:
-    group = None    
-
-# local_file and/or group is required    
-if local_file == None and group == None:
-    raise Exception("A local_file or group is required.")
-
-# folder
-if args['folder'] != None:
-    folder = args['folder']
-else:
-    raise Exception("A folder is required.")    
-
-# version    
-if args['version'] != None:
-    version = args['version']
-else:
-    version = None    
-
 # metadata    
-if args['metadata'] == None:
+if args['define'] == None:
     raise Exception("At least one meta data definition is required.")            
-metadata = format_metadata(args['metadata'])
+metadata = format_metadata_definition(args['define'])
 if metadata == None:
     raise Exception("Bad meta data definition.")    
 
-# create base command line
-command_line = create_base_command_line(__command, connection_string, dry_run, mode)
-
-# append local_file    
-if local_file != None:
-    command_line += ' --local_file %s' % local_file
-
-# append version    
+# build command line
+command_line = create_base_command_line(__command, connection, dry_run, mode)    
+if dataset_name != None:
+    command_line += ' --dataset %s' % dataset_name    
 if version != None:
     command_line.append += ' --version %s' % version
-
-# append group    
 if group != None:
     command_line += ' --group %s' % group
-
-# append metadata    
 command_line += ' %s' % metadata
+command_line += ' %s' % logical_path
 
-# append folder
-command_line += ' %s' % folder
-
 # run the command
 lines, errors, return_value = run_process(command_line)
 

java/sandbox/data-cat/src/main/python/hpsdatacat
delete.py 665 -> 666
--- java/sandbox/data-cat/src/main/python/hpsdatacat/delete.py	2014-06-04 19:56:35 UTC (rev 665)
+++ java/sandbox/data-cat/src/main/python/hpsdatacat/delete.py	2014-06-04 20:15:51 UTC (rev 666)
@@ -4,48 +4,23 @@
 Delete a file from the data catalog by using the 'rm' command.
 """
 
-import argparse
-
-# import utility stuff from hpsdatacat
 from util import *
 
 __command = 'rm'
 
-# get connection_string string
-connection_string = get_ssh_connection_string()
-
-# get the command to use
-script_cmd = get_datacat_command(__command)
-
-# site
-site = get_default_site()
-
 # command line parser
 parser = create_base_argparser(__command)
-parser.add_argument('-p', '--path', help='path to delete from the data catalog')
+parser.add_argument('-p', '--path', help='path to delete from the data catalog (dataset or folder)', required=True)
 args = vars(parser.parse_args())
 
-# handle the standard arguments
-connection_string, dry_run, mode = handle_standard_arguments(args)
-
-# connection string if not provided by command line
-if connection_string == None:
-    connection_string = get_ssh_connection_string()    
-    if connection_string == None:
-        raise Exception("Couldn't figure out a connection_string to use!")    
-
-# dataset path to delete
-if args['path'] == None:
-    raise Exception('Missing path argument.')
-else:
-    path = args['path']
+# process command line arguments
+connection, dry_run, mode = handle_standard_arguments(args)
+logical_path = args['path']
     
-# setup the command line with base options
-command_line = create_base_command_line(__command, connection_string, dry_run, mode)
+# build command line
+command_line = create_base_command_line(__command, connection, dry_run, mode)
+command_line += ' --force %s' % logical_path
 
-# append this command's arguments
-command_line += ' --force %s' % path
-
 # run command line
 lines, errors, return_value = run_process(command_line)
 

java/sandbox/data-cat/src/main/python/hpsdatacat
extract_metadata.py 665 -> 666
--- java/sandbox/data-cat/src/main/python/hpsdatacat/extract_metadata.py	2014-06-04 19:56:35 UTC (rev 665)
+++ java/sandbox/data-cat/src/main/python/hpsdatacat/extract_metadata.py	2014-06-04 20:15:51 UTC (rev 666)
@@ -91,8 +91,8 @@
     def extract_metadata(self, file_path):
         self.metadata = {}
         suppress_print()
-        file = ROOT.TFile(file_path)
-        tree = file.Get("HPS_Event")
+        root_file = ROOT.TFile(file_path)
+        tree = root_file.Get("HPS_Event")
         tree.GetEntry(0)
         run_number = tree.GetLeaf("run_number").GetValue(0)
         restore_print()
@@ -128,6 +128,7 @@
             return extractor
     return None
 
+# when run from command line takes a single argument which is a local file path
 if __name__ == '__main__':
 
     if len(sys.argv) < 2:
@@ -141,6 +142,8 @@
     print "Extracted meta data ..."
     print extractor.get_metadata()
     
+    # Uncomment to do tests ...
+    
     # test on LCIO file
     #lcioFile = '/nfs/slac/g/hps3/data/datacat-test/data/hps_testrun_001351.slcio'    
     #print 'testing LcioMetaDataExtractor on %s' % lcioFile    

java/sandbox/data-cat/src/main/python/hpsdatacat
find.py 665 -> 666
--- java/sandbox/data-cat/src/main/python/hpsdatacat/find.py	2014-06-04 19:56:35 UTC (rev 665)
+++ java/sandbox/data-cat/src/main/python/hpsdatacat/find.py	2014-06-04 20:15:51 UTC (rev 666)
@@ -4,7 +4,7 @@
 Find files in the data catalog using the 'find' command.
 """
 
-import argparse, os.path, subprocess, socket, getpass
+import os.path, subprocess, socket, getpass
 from util import *
 
 # data catalog command to be executed
@@ -13,41 +13,20 @@
 # default options for search command
 __script_options = '--search-groups --recurse'
 
-# default path in the data catalog
-default_path = get_default_search_path()
-
-# site
-site = get_default_site()
-
 # command line parser
 parser = create_base_argparser(__command)
-parser.add_argument('-p', '--path', help='root path for search')
-parser.add_argument('-s', '--site', help='dataset site')
+parser.add_argument('-p', '--path', help='root logical_path for search', default=get_default_search_path())
+parser.add_argument('-s', '--site', help='dataset site', default=get_default_site())
 parser.add_argument('-o', '--output', help='save results to output file')
 parser.add_argument('-q', '--query', help='data query for filtering results')
 args = vars(parser.parse_args())
 
 # get standard arguments
-connection_string, dry_run, mode = handle_standard_arguments(args)
+connection, dry_run, mode = handle_standard_arguments(args)    
+logical_path = args['path']
+site = args['site']
+check_valid_site(site)
     
-# folder for query    
-if args['path'] != None:
-    path = args['path']
-else:
-    path = default_path
-
-# site    
-if args['site'] != None:
-    site = args['site']
-    if site != 'SLAC' and site != 'JLAB':
-        raise Exception("Unrecognized site argument!")
-    
-# connection string if not provided by command line
-if connection_string == None:
-    connection_string = get_ssh_connection_string()    
-    if connection_string == None:
-        raise Exception("Couldn't figure out a connection_string to use!")    
-
 # meta data query            
 query = None
 if args['query'] != None:
@@ -55,21 +34,13 @@
     query = escape_characters(query)
     print query
 
-# setup the command line with base options
-command_line = create_base_command_line(__command, connection_string, dry_run, mode)
-               
-# add the standard options for this command                
+# build the command line
+command_line = create_base_command_line(__command, connection, dry_run, mode)
 command_line += ' %s' % __script_options 
-
-# add site
 command_line += ' --site %s' % site
-
-# add meta data query if provided
 if query != None:
     command_line += ' %s' % query
-    
-# add path
-command_line += ' %s' % path     
+command_line += ' %s' % logical_path     
  
 # setup the output file if specified
 output = None

java/sandbox/data-cat/src/main/python/hpsdatacat
register.py 665 -> 666
--- java/sandbox/data-cat/src/main/python/hpsdatacat/register.py	2014-06-04 19:56:35 UTC (rev 665)
+++ java/sandbox/data-cat/src/main/python/hpsdatacat/register.py	2014-06-04 20:15:51 UTC (rev 666)
@@ -4,79 +4,51 @@
 Register new files in the data catalog using the 'registerDataset' command. 
 """
 
-import argparse, os.path, subprocess, socket, getpass
+import os.path, subprocess, socket, getpass
 
 from util import *
 
 __command = 'registerDataset'
     
-# lowest level node in directory hierarchy
-group = get_default_group()
-
-# site
-site = get_default_site()
-
+# create command line parser
 parser = create_base_argparser(__command)
-parser.add_argument('-p', '--path', help='destination folder in the data catalog')
-parser.add_argument('-f', '--file', help='input physical file to register')
-parser.add_argument('-d', '--define', help='define a single meta data field with format key=value', action='append')
-parser.add_argument('-g', '--group', help='group under the path')
-parser.add_argument('-s', '--site', help='site of the new file e.g. SLAC or JLAB')
+parser.add_argument('-p', '--logical_path', help='destination logical logical_path in the data catalog', required=True)
+parser.add_argument('-f', '--file', help='input physical file to register', required=True)
+parser.add_argument('-d', '--define', help='define a field with format key=value', action='append')
+parser.add_argument('-g', '--group', help='group under the logical_path', default=get_default_group())
+parser.add_argument('-s', '--site', help='site of the new file', default=get_default_site())
 args = vars(parser.parse_args())
 
-connection_string, dry_run, mode = handle_standard_arguments(args)
+# process command line arguments
+connection, dry_run, mode = handle_standard_arguments(args)            
+logical_path = args['logical_path']
+dataset_name = args['file']
+file_extension = os.path.splitext(dataset_name)[1][1:]
+group = args['group']
+site = args['site']
+check_valid_site(site)
     
-if args['path'] != None:
-    path = args['path']
-else:
-    raise Exception('The destination path is required!')
-
-if args['file'] != None:
-    local_file = args['file']
-else:    
-    raise Exception('The local file is required!')
-
-file_extension = os.path.splitext(local_file)[1][1:]
-
-if args['group'] != None:
-    group = args['group']
-    
+# build meta data definitions
 metadata = None
 raw_metadata = args['define']
 if args['define'] != None:
-    metadata = format_metadata(raw_metadata)
+    metadata = format_metadata_definition(raw_metadata)
 
-if args['site'] != None:
-    site = args['site']
-    check_valid_site(site)
-
-# Try to figure out a default connection string if none was supplied.    
-if connection_string == None:    
-    connection_string = get_ssh_connection_string()    
-    if connection_string == None:
-        raise Exception("Couldn't figure out a connection_string to use!")    
-
-# create base command line
-command_line = create_base_command_line(__command, connection_string, dry_run, mode)
-
-# append group and site
+# build command line
+command_line = create_base_command_line(__command, connection, dry_run, mode)
 command_line += ' --group %s --site %s' % (group, site)
-
-# append meta data
 if metadata != None:
     command_line += ' %s' % metadata    
+command_line += ' %s %s %s' % (file_extension, logical_path, dataset_name)
 
-# append file type, path and local_file
-command_line += ' %s %s %s' % (file_extension, path, local_file)
-
 # run the command
 lines, errors, return_value = run_process(command_line)
 
-# print local_file information if command was successful
+# print dataset_name information for new dataset
 if return_value == 0:
-    print 'Added local_file to catalog ...'
-    print '  local_file: %s' % local_file
-    print '  path: %s' % path
+    print 'Added dataset to catalog ...'
+    print '  file: %s' % dataset_name
+    print '  logical_path: %s' % logical_path
     print '  group: %s' % group
     print '  site: %s' % site
     print '  metadata: %s' % str(raw_metadata)

java/sandbox/data-cat/src/main/python/hpsdatacat
util.py 665 -> 666
--- java/sandbox/data-cat/src/main/python/hpsdatacat/util.py	2014-06-04 19:56:35 UTC (rev 665)
+++ java/sandbox/data-cat/src/main/python/hpsdatacat/util.py	2014-06-04 20:15:51 UTC (rev 666)
@@ -55,15 +55,15 @@
     return '%s %s' % (__datacat_script, command)
 
 """
-Get an SSH connection_string string for the SLAC or JLAB sites.
+Get an SSH connection string for the SLAC or JLAB sites.
 This function will return null if not running at those sites,
 in which case the caller needs to provide their own (usually
 through a command line argument to one of the scripts).
 """
 def get_ssh_connection_string():
 
-    # setup default connection_string
-    connection_string = None
+    # setup default connection
+    connection = None
     domainname = socket.getfqdn()    
 
     if 'slac' in domainname:
@@ -74,9 +74,9 @@
         username = None
         
     if username != None:         
-        connection_string = [log in to unmask] % username
+        connection = [log in to unmask] % username
     
-    return connection_string
+    return connection
 
 """
 Run a process in a shell and return the output lines, errors, and return value (in that order). 
@@ -109,10 +109,11 @@
     return raw_string.replace('"', '\\"').replace(' ', '\\ ').replace('&', '\\&')
 
 """
-Format meta data for an SSH command from the command line arguments.
-This function will return None if raw_metadata is empty.
+Format meta data string for the 'registerDataset' or 'addMetaData' command line arguments 
+from a supplied list of 'key=value' strings.  
+This function will return None if the raw_metadata has length zero.
 """
-def format_metadata(raw_metadata):
+def format_metadata_definition(raw_metadata):
     metadata = ''
     for var in raw_metadata:
         equals = var.find('=')
@@ -124,7 +125,8 @@
     return metadata
 
 """
-Create the basic argparser for data catalog commands.
+Create the basic argparser for data catalog commands which includes handling
+of dry run, mode and connection settings.  These are all optional.
 """
 def create_base_argparser(command):
     if command not in __valid_commands:
@@ -132,28 +134,21 @@
     parser = argparse.ArgumentParser(description='Execute the %s command on the data catalog' % command)
     parser.add_argument('-D', '--dry-run', help='perform dry run only with no database commits', action='store_true')
     parser.add_argument('-M', '--mode', help='set data source as PROD, DEV, or TEST')
-    parser.add_argument('-c', '--connection-string', help='SSH connection string in form user@host')
+    parser.add_argument('-c', '--connection', help='SSH connection string in form user@host', default=get_ssh_connection_string())
     return parser
 
 """
 Parse and return standard arguments from the base parser.
 """
-def handle_standard_arguments(args):
-    
-    connection_string = None
-    if args['connection_string'] != None:
-        connection_string = args['connection_string']
+def handle_standard_arguments(args):    
+    if args['connection'] != None:
+        connection = args['connection']
+    else:
+        raise Exception("Could not figure out SSH connection!")
+    dry_run = args['dry_run']        
+    mode = args['mode']
+    return connection, dry_run, mode
 
-    dry_run = False
-    if args['dry_run'] == True:
-        dry_run = True
-        
-    mode = None
-    if args['mode'] != None:
-        mode = None
-
-    return connection_string, dry_run, mode
-
 """
 Print the results of running a command.
 """
@@ -171,13 +166,13 @@
 """
 Create the basic SSH command from common arguments.
 """
-def create_base_command_line(command, connection_string, dry_run, mode):        
-    command_line = 'ssh %s' % (connection_string)
+def create_base_command_line(command, connection, dry_run, mode):        
+    command_line = 'ssh %s' % (connection)
+    command_line += ' %s' % (get_datacat_command(command))
     if mode != None:
-        command_line += ' %s' % mode
+        command_line += ' --mode %s' % mode
     if dry_run:
         command_line += ' --nocommit'
-    command_line += ' %s' % (get_datacat_command(command))
     return command_line
 
 """
SVNspam 0.1