2 added + 4 modified, total 6 files
java/sandbox/data-cat/src/main/python/hpsdatacat
--- java/sandbox/data-cat/src/main/python/hpsdatacat/add_location.py (rev 0)
+++ java/sandbox/data-cat/src/main/python/hpsdatacat/add_location.py 2014-06-02 23:50:37 UTC (rev 653)
@@ -0,0 +1,109 @@
+"""
+Wrapper for 'addLocation' command.
+
+The help from that command is the following:
+
+Usage: datacat addLocation [-options] <dataset name> <logical folder> <file path>
+
+parameters:
+ <dataset name> Tag-name for the new dataset.
+ <logical folder> Logical Folder Path under which the dataset lives.
+ <file path> Location of file to add to Data Catalog.
+
+options:
+ --group <Dataset Group> Dataset Group under which the dataset lives.
+ --site <Site=SLAC> Site at which file exists on disk. Defaults to SLAC if not specified.
+ --version <Version=-1> Version ID of the dataset (Defaults to the Latest version if not specified.)
+
+Example command:
+
+python ./src/main/python/hpsdatacat/add_location.py -n hps_testrun_001351_recon -f \
+/HPS/testrun2012/data/recon -p /nfs/slac/g/hps3/data/datacat-test/data/hps_testrun_001351_test2.slcio \
+-s JLAB
+
+FIXME: Above command doesn't work right now!
+
+"""
+
+from util import *
+
+__command = 'addLocation'
+
+# lowest level node in directory hierarchy
+group = get_default_group()
+
+# site
+site = get_default_site()
+
+parser = create_base_argparser(__command)
+parser.add_argument('-f', '--folder', help='folder where dataset is located')
+parser.add_argument('-n', '--name', help='dataset name')
+parser.add_argument('-p', '--path', help='physical file location')
+parser.add_argument('-g', '--group', help='dataset group')
+parser.add_argument('-s', '--site', help='dataset site')
+parser.add_argument('-v', '--version', help='dataset version')
+args = vars(parser.parse_args())
+
+connection_string, dry_run, mode = handle_standard_arguments(args)
+
+# connection
+if connection_string == None:
+ connection_string = get_ssh_connection_string()
+ if connection_string == None:
+ raise Exception("Couldn't figure out a connection_string to use!")
+
+# folder arg (required)
+if args['folder'] != None:
+ folder = args['folder']
+else:
+ raise Exception("The dataset folder is a required argument.")
+
+# dataset name arg (required)
+if args['name'] != None:
+ dataset_name = args['name']
+else:
+ raise Exception("The dataset name is a required argument.")
+
+# physical path arg (required)
+if args['path'] != None:
+ path = args['path']
+else:
+ raise Exception("The physical file path is a required argument.")
+
+# group arg (optional)
+if args['group'] != None:
+ group = args['group']
+
+# site arg (optional)
+if args['site'] != None:
+ site = args['site']
+ check_valid_site(site)
+
+# version (optional)
+version = None
+if args['version'] != None:
+ version = args['version']
+
+# create base command line
+command_line = create_base_command_line(__command, connection_string, dry_run, mode)
+
+# append group
+if group != None:
+ command_line += ' --group %s' % group
+
+# append site
+if site != None:
+ command_line += ' --site %s' % site
+
+# append version
+if version != None:
+ command_line += ' --version %s' % version
+
+# add dataset name, folder and physical path
+command_line += ' %s %s %s' % (dataset_name, folder, path)
+
+# run the command
+lines, errors, return_value = run_process(command_line)
+
+# print results
+print_result(__command, return_value, errors)
\ No newline at end of file
java/sandbox/data-cat/src/main/python/hpsdatacat
--- java/sandbox/data-cat/src/main/python/hpsdatacat/add_metadata.py (rev 0)
+++ java/sandbox/data-cat/src/main/python/hpsdatacat/add_metadata.py 2014-06-02 23:50:37 UTC (rev 653)
@@ -0,0 +1,92 @@
+"""
+Script for setting meta data on an existing dataset or group.
+This script cannot be currently used to set a new value on an
+existing meta data field.
+"""
+
+import argparse
+
+from util import *
+
+# command this script will use
+__command = 'addMetaData'
+
+# create the argparser
+parser = create_base_argparser(__command)
+parser.add_argument('-f', '--folder', help='folder where dataset lives')
+parser.add_argument('-d', '--dataset', help='target dataset for meta data')
+parser.add_argument('-v', '--version', help='version ID of the dataset (defaults to latest)')
+parser.add_argument('-g', '--group', help='dataset group or group to tag when no dataset specified')
+parser.add_argument('-m', '--metadata', nargs='*', help='a single meta data definition')
+args = vars(parser.parse_args())
+
+# handle standard arguments
+connection_string, dry_run, mode = handle_standard_arguments(args)
+
+# connection string
+if connection_string == None:
+ connection_string = get_ssh_connection_string()
+ if connection_string == None:
+ raise Exception("Couldn't figure out a connection string to use!")
+
+# dataset
+if args['dataset'] != None:
+ dataset = args['dataset']
+else:
+ dataset = None
+
+# group
+if args['group'] != None:
+ group = args['group']
+else:
+ group = None
+
+# dataset and/or group is required
+if dataset == None and group == None:
+ raise Exception("A dataset or group is required.")
+
+# folder
+if args['folder'] != None:
+ folder = args['folder']
+else:
+ raise Exception("A folder is required.")
+
+# version
+if args['version'] != None:
+ version = args['version']
+else:
+ version = None
+
+# metadata
+if args['metadata'] == None:
+ raise Exception("At least one meta data definition is required.")
+metadata = format_metadata(args['metadata'])
+if metadata == None:
+ raise Exception("Bad meta data definition.")
+
+# create base command line
+command_line = create_base_command_line(__command, connection_string, dry_run, mode)
+
+# append dataset
+if dataset != None:
+ command_line += ' --dataset %s' % dataset
+
+# append version
+if version != None:
+ command_line.append += ' --version %s' % version
+
+# append group
+if group != None:
+ command_line += ' --group %s' % group
+
+# append metadata
+command_line += ' %s' % metadata
+
+# append folder
+command_line += ' %s' % folder
+
+# run the command
+lines, errors, return_value = run_process(command_line)
+
+# print end message
+print_result(__command, return_value, errors)
\ No newline at end of file
java/sandbox/data-cat/src/main/python/hpsdatacat
--- java/sandbox/data-cat/src/main/python/hpsdatacat/delete.py 2014-06-02 20:13:10 UTC (rev 652)
+++ java/sandbox/data-cat/src/main/python/hpsdatacat/delete.py 2014-06-02 23:50:37 UTC (rev 653)
@@ -7,29 +7,45 @@
# import utility stuff from hpsdatacat
from util import *
+__command = 'rm'
+
# get connection_string string
connection_string = get_ssh_connection_string()
# get the command to use
-script_cmd = get_datacat_command( 'rm' )
+script_cmd = get_datacat_command(__command)
# site
site = get_default_site()
-parser = argparse.ArgumentParser(description='Register file in the data catalog')
+# command line parser
+parser = create_base_argparser(__command)
parser.add_argument('-p', '--path', help='path to delete from the data catalog')
args = vars(parser.parse_args())
+# handle the standard arguments
+connection_string, dry_run, mode = handle_standard_arguments(args)
+
+# connection string if not provided by command line
+if connection_string == None:
+ connection_string = get_ssh_connection_string()
+ if connection_string == None:
+ raise Exception("Couldn't figure out a connection_string to use!")
+
+# dataset path to delete
if args['path'] == None:
raise Exception('Missing path argument.')
else:
path = args['path']
+
+# setup the command line with base options
+command_line = create_base_command_line(__command, connection_string, dry_run, mode)
-cmd = 'ssh %s %s --force %s' % (connection_string, script_cmd, path)
+# append this command's arguments
+command_line += ' --force %s' % path
-print "Executing command ..."
-print cmd
+# run command line
+lines, errors, return_value = run_process(command_line)
-lines, errors, return_value = run_process(cmd)
-
-print "return value: %s" % str(return_value)
+# print the result
+print_result(__command, return_value, errors)
\ No newline at end of file
java/sandbox/data-cat/src/main/python/hpsdatacat
--- java/sandbox/data-cat/src/main/python/hpsdatacat/find.py 2014-06-02 20:13:10 UTC (rev 652)
+++ java/sandbox/data-cat/src/main/python/hpsdatacat/find.py 2014-06-02 23:50:37 UTC (rev 653)
@@ -12,12 +12,9 @@
# import utility stuff from hpsdatacat
from util import *
-# get connection_string string
-connection_string = get_ssh_connection_string()
+# data catalog command to be executed
+__command = 'find'
-# get the command to use
-script_cmd = get_datacat_command( 'find' )
-
# default options for search command
script_options = '--search-groups --recurse'
@@ -27,74 +24,76 @@
# site
site = get_default_site()
-#def escape_characters(raw_string):
-# escaped_string = raw_string.replace('"', '\\"').replace(' ', '\\ ')
-# return escaped_string
-
-parser = argparse.ArgumentParser(description='Search for files in HPS data catalog')
+# command line parser
+parser = create_base_argparser(__command)
parser.add_argument('-p', '--path', help='root path for search')
parser.add_argument('-s', '--site', help='dataset site')
-parser.add_argument('-c', '--connection_string', help='SSH connection_string')
parser.add_argument('-o', '--output', help='save results to output file')
parser.add_argument('-q', '--query', help='data query for filtering results')
-
args = vars(parser.parse_args())
+
+# get standard arguments
+connection_string, dry_run, mode = handle_standard_arguments(args)
+# folder for query
if args['path'] != None:
path = args['path']
else:
path = default_path
-
+
+# site
if args['site'] != None:
site = args['site']
if site != 'SLAC' and site != 'JLAB':
raise Exception("Unrecognized site argument!")
-if args['connection_string'] != None:
- connection_string = args['connection_string']
-
+# connection string if not provided by command line
if connection_string == None:
- raise Exception("Could not determine connection_string string!")
-
-output = None
-if args['output'] != None:
- output = args['output']
- if os.path.isfile(output):
- raise Exception('The output file already exists!')
-
-query = ''
+ connection_string = get_ssh_connection_string()
+ if connection_string == None:
+ raise Exception("Couldn't figure out a connection_string to use!")
+
+# meta data query
+query = None
if args['query'] != None:
- #print args['query']
query = '--filter \'%s\'' % args['query']
query = escape_characters(query)
print query
-
-cmd = 'ssh %s %s %s --site %s %s %s' % (connection_string, script_cmd, script_options, site, query, path)
-print "Executing query ..."
-print cmd
+# setup the command line with base options
+command_line = create_base_command_line(__command, connection_string, dry_run, mode)
+
+# add the standard options for this command
+command_line += ' %s' % script_options
-if output != None:
+# add site
+command_line += ' --site %s' % site
+
+# add meta data query if provided
+if query != None:
+ command_line += ' %s' % query
+
+# add path
+command_line += ' %s' % path
+
+# setup the output file if specified
+output = None
+if args['output'] != None:
+ output = args['output']
+ if os.path.isfile(output):
+ raise Exception('The output file already exists!')
output_file = open(output, 'w')
-process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
-errors = []
-lines = []
-for line in process.stdout.readlines():
- lines.append(line)
- if 'Exception' in line:
- errors.append(line)
-
-retval = process.wait()
+# run the command
+lines, errors, return_value = run_process(command_line)
-if (retval == 0 and len(errors) == 0):
+# print or save the output if command was successful
+if (return_value == 0 and len(errors) == 0):
if output != None:
for line in lines:
output_file.write(line)
- output_file.close()
- else:
- for line in lines:
- print line,
-else:
- print 'The find command failed with errors!'
- print errors
\ No newline at end of file
+ output_file.close()
+ print 'Output saved to file: %s' % output
+
+# print command result
+print_result(__command, return_value, errors, False)
\ No newline at end of file
java/sandbox/data-cat/src/main/python/hpsdatacat
--- java/sandbox/data-cat/src/main/python/hpsdatacat/register.py 2014-06-02 20:13:10 UTC (rev 652)
+++ java/sandbox/data-cat/src/main/python/hpsdatacat/register.py 2014-06-02 23:50:37 UTC (rev 653)
@@ -8,45 +8,25 @@
import argparse, os.path, subprocess, socket, getpass
-# path to script at SLAC
-script_cmd = '~srs/datacat/prod/datacat-hps registerDataset'
+from util import *
-# setup default connection_string
-connection_string = None
-domainname = socket.getfqdn()
-username = getpass.getuser()
-
-# FIXME: Make something like this work so JLAB users can connect to a generic account at SLAC.
-#if 'jlab' in domainname:
-# if 'clashps' in username:
-# connection_string = [log in to unmask]
-#elif 'slac' in domainname:
-if 'slac' in domainname:
- connection_string = [log in to unmask] % getpass.getuser()
-
+__command = 'registerDataset'
+
# lowest level node in directory hierarchy
-group = 'HPS'
+group = get_default_group()
# site
-site = 'SLAC'
+site = get_default_site()
-parser = argparse.ArgumentParser(description='Register file in HPS data catalog')
+parser = create_base_argparser(__command)
parser.add_argument('-p', '--path', help='destination path in data catalog')
parser.add_argument('-d', '--dataset', help='input physical dataset')
-parser.add_argument('-m', '--metadata', nargs='*', help='define meta data')
+parser.add_argument('-m', '--metadata', help='define a meta data field value with format key=value', action='append')
parser.add_argument('-g', '--group', help='dataset group')
parser.add_argument('-s', '--site', help='dataset site')
-parser.add_argument('-c', '--connection_string', help='SSH connection_string')
-parser.add_argument('-v', '--verbose', help='turn verbose mode on', action='store_true')
-parser.add_argument('-D', '--dry-run', help='dry run only', action='store_true')
args = vars(parser.parse_args())
-verbose = False
-if args['verbose'] != None:
- verbose = True
-
-if verbose:
- print args
+connection_string, dry_run, mode = handle_standard_arguments(args)
if args['path'] != None:
path = args['path']
@@ -58,66 +38,50 @@
else:
raise Exception('dataset is required!')
-ext = os.path.splitext(dataset)[1][1:]
+file_extension = os.path.splitext(dataset)[1][1:]
if args['group'] != None:
group = args['group']
-metadata = ''
+metadata = None
+raw_metadata = args['metadata']
if args['metadata'] != None:
- print args['metadata']
- for var in args['metadata']:
- equals = var.find('=')
- if (len(var) < 3 or equals < 0):
- raise Exception("Bad meta data variable format!")
- metadata += '--define %s ' % (var)
+ metadata = format_metadata(raw_metadata)
if args['site'] != None:
site = args['site']
- if site != 'SLAC' and site != 'JLAB':
- raise Exception("Unrecognized site!")
-
-if args['connection_string'] != None:
- connection_string = args['connection_string']
-
-if connection_string == None:
- raise Exception("Couldn't figure out a connection_string to use!")
-
-dry_run = False
-if args['dry_run']:
- dry_run = True
-
-cmd = 'ssh %s %s --group %s --site %s %s %s %s %s' % (connection_string, script_cmd, group, site, metadata, ext, path, dataset)
+ check_valid_site(site)
-if verbose:
- print "Executing command ..."
- print cmd
+# Try to figure out a default connection string if none was supplied.
+if connection_string == None:
+ connection_string = get_ssh_connection_string()
+ if connection_string == None:
+ raise Exception("Couldn't figure out a connection_string to use!")
-if dry_run:
- print 'Configured for dry run, so command will not be executed.'
-else:
- process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
- errors = []
- for line in process.stdout.readlines():
- if verbose:
- print line,
- if 'Exception' in line:
- errors.append(line)
-
- retval = process.wait()
-
- if len(errors) != 0 or retval != 0:
- print 'The registration failed with errors!'
- print errors
- else:
- print 'Added data to catalog ...'
- print ' dataset: %s' % dataset
- print ' path: %s' % path
- print ' group: %s' % group
- print ' site: %s' % site
-
- #print ' metadata: ' % str(args['metadata'])
-
- print 'return value: %d' % retval
-
-print 'Done!'
\ No newline at end of file
+# create base command line
+command_line = create_base_command_line(__command, connection_string, dry_run, mode)
+
+# append group and site
+command_line += ' --group %s --site %s' % (group, site)
+
+# append meta data
+if metadata != None:
+ command_line += ' %s' % metadata
+
+# append file type, path and dataset
+command_line += ' %s %s %s' % (file_extension, path, dataset)
+
+# run the command
+lines, errors, return_value = run_process(command_line)
+
+# print dataset information if command was successful
+if return_value == 0:
+ print 'Added dataset to catalog ...'
+ print ' dataset: %s' % dataset
+ print ' path: %s' % path
+ print ' group: %s' % group
+ print ' site: %s' % site
+ print ' metadata: %s' % str(raw_metadata)
+
+# print command result
+print_result(__command, return_value, errors)
\ No newline at end of file
java/sandbox/data-cat/src/main/python/hpsdatacat
--- java/sandbox/data-cat/src/main/python/hpsdatacat/util.py 2014-06-02 20:13:10 UTC (rev 652)
+++ java/sandbox/data-cat/src/main/python/hpsdatacat/util.py 2014-06-02 23:50:37 UTC (rev 653)
@@ -1,17 +1,31 @@
-import getpass, socket, subprocess
+"""
+Utility function library for HPS data catalog wrapper scripts.
+"""
+# Python lib imports
+import getpass, socket, subprocess, argparse
+
# location of data catalog script at SLAC
__datacat_script = '~srs/datacat/prod/datacat-hps'
# commands that have script wrappers for them
-__valid_commands = ( 'rm', 'registerDataset', 'addLocation', 'addMetaData', 'find' )
+__valid_commands = ('rm', 'registerDataset', 'addLocation', 'addMetaData', 'find')
+# valid mode settings
+__valid_modes = ('PROD', 'DEV', 'TEST')
+
# default site for data catalog search
__default_site = 'SLAC'
+# valid sites
+__valid_sites = ('SLAC', 'JLAB')
+
# default base path for datacatalog search
__default_search_path = '/HPS'
+# default dataset group
+__default_group = 'HPS'
+
"""
Get the default site.
"""
@@ -23,6 +37,12 @@
"""
def get_default_search_path():
return __default_search_path
+
+"""
+Get the default dataset group.
+"""
+def get_default_group():
+ return __default_group
"""
Simple utility to return the full script command.
@@ -32,7 +52,6 @@
if command not in __valid_commands:
raise Exception("Unknown command: " % command)
return '%s %s' % (__datacat_script, command)
-
"""
Get an SSH connection_string string for the SLAC or JLAB sites.
@@ -61,21 +80,25 @@
"""
Run a process in a shell and return the output lines, errors, and return value (in that order).
"""
-def run_process(cmd, printToScreen=True):
+def run_process(command, printToScreen=True):
- process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+ if printToScreen:
+ print "Executing command ..."
+ print command
+
+ process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
errors = []
lines = []
for line in process.stdout.readlines():
- if printToScreen == True:
+ if printToScreen:
print line,
if 'Exception' in line:
errors.append(line)
lines.append(line)
- retval = process.wait()
+ return_value = process.wait()
- return lines, errors, retval
+ return lines, errors, return_value
"""
Escape characters for the SSH command.
@@ -83,4 +106,82 @@
"""
def escape_characters(raw_string):
return raw_string.replace('"', '\\"').replace(' ', '\\ ').replace('&', '\\&')
-
\ No newline at end of file
+
+"""
+Format meta data for a command from the command line arguments.
+This function will return None if raw_metadata is empty.
+"""
+def format_metadata(raw_metadata):
+ metadata = ''
+ for var in raw_metadata:
+ equals = var.find('=')
+ if (len(var) < 3 or equals < 0):
+ raise Exception("Bad meta data variable format.")
+ metadata += '--define %s ' % var
+ if len(metadata) == 0:
+ metadata = None
+ return metadata
+
+"""
+Create the basic argparser for data catalog commands.
+"""
+def create_base_argparser(command):
+ if command not in __valid_commands:
+ raise Exception("Unknown command: %s" % command)
+ parser = argparse.ArgumentParser(description='Execute the %s command on the data catalog' % command)
+ parser.add_argument('-D', '--dry-run', help='perform dry run only with no database commits', action='store_true')
+ parser.add_argument('-M', '--mode', help='set data source as PROD, DEV, or TEST')
+ parser.add_argument('-c', '--connection_string', help='SSH connection string in form user@host')
+ return parser
+
+"""
+Parse and return standard arguments from the base parser.
+"""
+def handle_standard_arguments(args):
+
+ connection_string = None
+ if args['connection_string'] != None:
+ connection_string = args['connection_string']
+
+ dry_run = False
+ if args['dry_run'] == True:
+ dry_run = True
+
+ mode = None
+ if args['mode'] != None:
+ mode = None
+
+ return connection_string, dry_run, mode
+
+"""
+Print the result of running the command.
+"""
+def print_result(command, return_value, errors, printSuccessful=True):
+ if return_value != 0 or len(errors) != 0:
+ print "Command %s returned with errors ..." % command
+ for error in errors:
+ print error
+ else:
+ if printSuccessful:
+ print "Command %s was successful!" % command
+ if (return_value != 0) or printSuccessful:
+ print "return_value: %s" % str(return_value)
+
+"""
+Create the basic SSH command.
+"""
+def create_base_command_line(command, connection_string, dry_run, mode):
+ command_line = 'ssh %s' % (connection_string)
+ if mode != None:
+ command_line += ' %s' % mode
+ if dry_run:
+ command_line += ' --nocommit'
+ command_line += ' %s' % (get_datacat_command(command))
+ return command_line
+
+"""
+Check if site is valid.
+"""
+def check_valid_site(site):
+ if site not in __valid_sites:
+ raise Exception("Site is not valid: " + site)
\ No newline at end of file
SVNspam 0.1