Author: [log in to unmask] Date: Thu Feb 19 13:13:55 2015 New Revision: 2167 Log: update for dqm, future run, slac locations Modified: java/trunk/datacat/src/main/python/hpsdatacat/engrun/engrun_metadata.py java/trunk/datacat/src/main/python/hpsdatacat/engrun/engrun_register.py java/trunk/datacat/src/main/python/hpsdatacat/engrun/engrun_util.py Modified: java/trunk/datacat/src/main/python/hpsdatacat/engrun/engrun_metadata.py ============================================================================= --- java/trunk/datacat/src/main/python/hpsdatacat/engrun/engrun_metadata.py (original) +++ java/trunk/datacat/src/main/python/hpsdatacat/engrun/engrun_metadata.py Thu Feb 19 13:13:55 2015 @@ -64,7 +64,7 @@ if isADC==1: # ignore SLOT lines: - if re.search('SLOT',key)!=None: + if key.find('SLOT')>=0: continue # may have SVT FADC later, so specify: @@ -78,7 +78,7 @@ mtd['SSP_HPS_'+__IOSRC[xx[1]]]=1 # special case #2, interpret threshold: - elif isADC and re.search('ALLCH_PED',key)!=None: + elif isADC and key.find('ALLCH_PED')>=0: if prevthresh=='0': mtd['ECALFADC_THRESH']=0 @@ -86,7 +86,7 @@ mtd['ECALFADC_THRESH']=int('%.0f'%(float(prevthresh)-float(xx[0])+1)) # special case #3, save threshold: - elif isADC and re.search('ALLCH_TET',key)!=None: + elif isADC and key.find('ALLCH_TET')>=0: prevthresh=xx[0] # 2-column lines, only one possibility: Modified: java/trunk/datacat/src/main/python/hpsdatacat/engrun/engrun_register.py ============================================================================= --- java/trunk/datacat/src/main/python/hpsdatacat/engrun/engrun_register.py (original) +++ java/trunk/datacat/src/main/python/hpsdatacat/engrun/engrun_register.py Thu Feb 19 13:13:55 2015 @@ -2,17 +2,15 @@ import re,os,sys import engrun_util as ERU import engrun_metadata as ERM + +ERU.SetRunPeriod("engrun2014") USAGE='Usage: engrun_register.py filepath [outputfile]' DEBUG=0 #DEBUG=1 -# no sshing, just print commands: SSHREG=0 - -# one ssh for each file to register: -# (do this for automating) #SSHREG=1 if len(sys.argv)!=3 and len(sys.argv)!=2: @@ -28,7 +26,6 @@ else: OUTFILE=open(OUTFILE, 'w') - # DATA CATALOG PATH MUST ALREADY EXIST (MAKE IT MANUALLY) DCPATH=ERU.GetDCPath(FILEPATH) @@ -36,7 +33,7 @@ # THIS WILL NEED CHANGING TO RUN LOCALLY AT SLAC # WE CAN CHOOSE WHETHER TO SSH BASED ON FILEPATH, or HOSTNAME (see Jeremy's util.py) -DCLISTCMD=ERU.__SSH+' '+ERU.__DATACAT+' find '+DCOPTS+' '+ERU.__DCSRCHOPTS+' '+DCPATH +DCLISTCMD=ERU.__SSH+' '+ERU.__DATACAT+' find '+DCOPTS+' '+ERU.__DCSRCHOPTS+' '+'/HPS'#DCPATH DCREGCMD=ERU.__DATACAT+' registerDataset '+DCOPTS @@ -46,6 +43,7 @@ # Get list of files to register: FILELIST=ERU.ListRealFiles(FILEPATH) +NFILES2CATALOG=0; if len(FILELIST)==0: sys.exit('No Files: '+FILEPATH) @@ -74,6 +72,8 @@ if DEBUG>1: sys.stderr.write('Invalid File Number for '+filename+'\n') continue + + NFILES2CATALOG += 1 metadata={} metadata['Run']=runno @@ -149,6 +149,9 @@ print >> OUTFILE, cmd +if NFILES2CATALOG==0: + print 'All Files Already Cataloged' + if DEBUG!=0: ERU.DumpTable() ERU.DumpNames() Modified: java/trunk/datacat/src/main/python/hpsdatacat/engrun/engrun_util.py ============================================================================= --- java/trunk/datacat/src/main/python/hpsdatacat/engrun/engrun_util.py (original) +++ java/trunk/datacat/src/main/python/hpsdatacat/engrun/engrun_util.py Thu Feb 19 13:13:55 2015 @@ -6,10 +6,7 @@ __SSH='ssh [log in to unmask] __DATACAT='~srs/datacat/prod/datacat-hps' -__DCSRCHOPTS='--search-groups --show-non-ok-locations' - -__RUNMIN=2000 -__RUNMAX=4000 +__DCSRCHOPTS='--recurse --search-groups --show-non-ok-locations' __ALLRUNS=[] __ALLMETADATA=[] @@ -17,6 +14,23 @@ __RAWFILENAMEFORMAT='hps_%.6d.evio.%d' +__HPSRUNPERIOD=None + +__HPSRUNPERIODS={'simulation':[-9999,100], + 'testrun2012':[100,2000], + 'engrun2014':[2000,4000], + 'engrun2015':[4000,9999]} + +def SetRunPeriod(runperiod): + global __HPSRUNPERIOD + if not __HPSRUNPERIODS.has_key(runperiod): + sys.exit('SetRunPeriod: Missing run period: '+runperiod) + __HPSRUNPERIOD=''.join(runperiod) + +# +# For Reconstructed Files on Tape/Disk, path must match /pass\d+/.* +# Everything before that will be ignored. +# ################################## # CATALOG STRUCTURE (terminal directories are "GROUPS") @@ -42,7 +56,7 @@ return runlist -# THIS NEEDS FIXING FOR OTHER TYPES (e.g. DQM): +# Data Catalog Groups (RAW,RECON,DQM,DST,...): def GetGroup(filename): dt=GetDataType(filename) if dt==None: @@ -51,50 +65,80 @@ return 'RAW' elif dt[1]=='SLCIO': return 'RECON' - elif dt[1]=='TEST': - return 'DST' + elif filename.lower().endswith('.root'): + if filename.lower().find('dqm')>=0: + return 'DQM' + elif filename.lower().find('dst')>=0: + return 'DST' else: sys.exit('GetGroup: Not Ready for this DataType: '+filename) -# THESE WILL HAVE TO CHANGE FOR THE NEXT RUN -# SHOULD DO IT AUTOMATICALLY BASED ON RUN NUMBER -def GetDCRunPeriod(filepath): -# runno=GetRunNumber(filepath) - return 'engrun2014' -def GetMSSRunPeriod(filepath): - return 'engrun' - -# ONLY READY FOR FILES AT JLAB, UPDATE FOR SLAC: + +## path stub on tape at JLab: +#def GetMSSRunPeriod(filepath): +# runno=GetRunNumber(filepath)[0] +# if runno<10: +# return 'simulation' +# if runno<2500: +# return 'testrun2012' +# elif runno<4000: +# return 'engrun' +# else: +# return 'engrun2015' + +## path stub for the Data Catalog +#def GetDCRunPeriod(filepath): +# runno=GetRunNumber(filepath)[0] +# hpsrp=__HPSRUNPERIODS +# for xx in hpsrp.keys(): +# if runno>=hpsrp[xx][0] and runno<hpsrp[xx][1]: +# return xx +# return None + +# full path for the Data Catalog: def GetDCPath(filepath): - if re.match(__HPSMSSPATH,filepath)==None: - sys.exit('GetDCPath: Must start with '+__HPSMSSPATH+': '+filepath) + + print __HPSRUNPERIOD + prefix=__HPSDCPATH+'/'+__HPSRUNPERIOD #GetDCRunPeriod(filepath) + if not os.path.isdir(filepath): sys.exit('Directory does not exist: '+filepath) - filepath=filepath.lstrip(__HPSMSSPATH).lstrip('/').rstrip('/') - prefix=__HPSDCPATH+'/'+GetDCRunPeriod(filepath) - if filepath=='data': - return prefix - else: - subdirs=filepath.split('/') - subdirs.reverse() - if subdirs.pop().find('engrun')>=0: - npass=GetPass(filepath) - if npass==None: - sys.exit('GetDCPath: Unresolved Pass: '+filepath) - return prefix+'/pass%d'%(npass) - sys.exit('GetDCPath: Not ready for this: '+filepath) + + # it's got to be a raw EVIO file: + if filepath.rstrip().rstrip('/') == __HPSMSSPATH+'/data': + # fix name for 2014 run: + if prefix.endswith('engrun2014'): + return __HPSDCPATH+'/engrun' + + # throw away everything before '/pass#/' + tmp=re.search('/(pass\d+)/(.*)',filepath) + if (tmp == None): + sys.exit('GetDCPath: Not raw data on tape, Not a pass#:\n'+filepath) + + passN=tmp.group(1) + + # No longer used: + #pathstub=tmp.group(2).lstrip('/').rstrip('/') + #subdirs=pathstub.split('/').reverse() + #if subdirs.pop()=='engrun': + # return prefix+'/'+passN + #sys.exit('GetDCPath: Not ready for this: '+filepath) + + return prefix+'/'+passN+'/' + + # UPDATE THIS IF WE GET NEW DATATYPES AVAILABLE IN THE CATALOG: # Returns [fileformat,datatype] REQUIRED by the catalog def GetDataType(filename): - if re.search('evio',os.path.basename(filename))!=None: + basename=os.path.basename(filename).lower() + if basename.find('.evio')>0: return ['evio','EVIO'] - elif re.search('slcio',os.path.basename(filename))!=None: + elif basename.endswith('.slcio')>0 or basename.endswith('.lcio')>0: return ['slcio','SLCIO'] - elif re.search('dst',os.path.basename(filename))!=None: + elif basename.endswith('.root'): return ['root','TEST'] - else: - return ['unspecified','TEST'] + return ['unspecified','TEST'] def GetPass(filename): match=re.search('pass\d+',filename) @@ -104,6 +148,8 @@ def GetSite(filepath): if re.match('/mss',filepath)!=None: + return 'JLAB' + elif re.match('/work',filepath)!=None: return 'JLAB' else: return 'SLAC' @@ -133,7 +179,8 @@ if integers[ii].lstrip('0')=='': continue xx=int(integers[ii].lstrip('0')) - if xx>__RUNMIN and xx<__RUNMAX: + if xx>=__HPSRUNPERIODS[__HPSRUNPERIOD][0] and \ + xx< __HPSRUNPERIODS[__HPSRUNPERIOD][1]: runno=xx if ii<len(integers)-1: if integers[ii+1].lstrip('0')=='':