Print

Print


Author: [log in to unmask]
Date: Thu Feb 19 13:13:55 2015
New Revision: 2167

Log:
update for dqm, future run, slac locations

Modified:
    java/trunk/datacat/src/main/python/hpsdatacat/engrun/engrun_metadata.py
    java/trunk/datacat/src/main/python/hpsdatacat/engrun/engrun_register.py
    java/trunk/datacat/src/main/python/hpsdatacat/engrun/engrun_util.py

Modified: java/trunk/datacat/src/main/python/hpsdatacat/engrun/engrun_metadata.py
 =============================================================================
--- java/trunk/datacat/src/main/python/hpsdatacat/engrun/engrun_metadata.py	(original)
+++ java/trunk/datacat/src/main/python/hpsdatacat/engrun/engrun_metadata.py	Thu Feb 19 13:13:55 2015
@@ -64,7 +64,7 @@
     if isADC==1:
 
       # ignore SLOT lines:
-      if re.search('SLOT',key)!=None:
+      if key.find('SLOT')>=0:
         continue
 
       # may have SVT FADC later, so specify:
@@ -78,7 +78,7 @@
         mtd['SSP_HPS_'+__IOSRC[xx[1]]]=1
 
     # special case #2, interpret threshold:
-    elif isADC and re.search('ALLCH_PED',key)!=None:
+    elif isADC and key.find('ALLCH_PED')>=0:
 
       if prevthresh=='0':
         mtd['ECALFADC_THRESH']=0
@@ -86,7 +86,7 @@
         mtd['ECALFADC_THRESH']=int('%.0f'%(float(prevthresh)-float(xx[0])+1))
 
     # special case #3, save threshold:
-    elif isADC and re.search('ALLCH_TET',key)!=None:
+    elif isADC and key.find('ALLCH_TET')>=0:
       prevthresh=xx[0]
 
     # 2-column lines, only one possibility:

Modified: java/trunk/datacat/src/main/python/hpsdatacat/engrun/engrun_register.py
 =============================================================================
--- java/trunk/datacat/src/main/python/hpsdatacat/engrun/engrun_register.py	(original)
+++ java/trunk/datacat/src/main/python/hpsdatacat/engrun/engrun_register.py	Thu Feb 19 13:13:55 2015
@@ -2,17 +2,15 @@
 import re,os,sys
 import engrun_util as ERU
 import engrun_metadata as ERM
+
+ERU.SetRunPeriod("engrun2014")
 
 USAGE='Usage:  engrun_register.py filepath [outputfile]'
 
 DEBUG=0
 #DEBUG=1
 
-# no sshing, just print commands:
 SSHREG=0
-
-# one ssh for each file to register:
-# (do this for automating)
 #SSHREG=1
 
 if len(sys.argv)!=3 and len(sys.argv)!=2:
@@ -28,7 +26,6 @@
   else:
     OUTFILE=open(OUTFILE, 'w')
 
-
 # DATA CATALOG PATH MUST ALREADY EXIST (MAKE IT MANUALLY)
 DCPATH=ERU.GetDCPath(FILEPATH)
 
@@ -36,7 +33,7 @@
 
 # THIS WILL NEED CHANGING TO RUN LOCALLY AT SLAC
 # WE CAN CHOOSE WHETHER TO SSH BASED ON FILEPATH, or HOSTNAME (see Jeremy's util.py)
-DCLISTCMD=ERU.__SSH+' '+ERU.__DATACAT+' find '+DCOPTS+' '+ERU.__DCSRCHOPTS+' '+DCPATH
+DCLISTCMD=ERU.__SSH+' '+ERU.__DATACAT+' find '+DCOPTS+' '+ERU.__DCSRCHOPTS+' '+'/HPS'#DCPATH
 
 DCREGCMD=ERU.__DATACAT+' registerDataset '+DCOPTS
 
@@ -46,6 +43,7 @@
 # Get list of files to register:
 FILELIST=ERU.ListRealFiles(FILEPATH)
 
+NFILES2CATALOG=0;
 
 if len(FILELIST)==0:
   sys.exit('No Files:  '+FILEPATH)
@@ -74,6 +72,8 @@
     if DEBUG>1:
       sys.stderr.write('Invalid File Number for '+filename+'\n')
     continue
+
+  NFILES2CATALOG += 1
 
   metadata={}
   metadata['Run']=runno
@@ -149,6 +149,9 @@
     print >> OUTFILE, cmd
 
 
+if NFILES2CATALOG==0:
+  print 'All Files Already Cataloged'
+
 if DEBUG!=0:
   ERU.DumpTable()
   ERU.DumpNames()

Modified: java/trunk/datacat/src/main/python/hpsdatacat/engrun/engrun_util.py
 =============================================================================
--- java/trunk/datacat/src/main/python/hpsdatacat/engrun/engrun_util.py	(original)
+++ java/trunk/datacat/src/main/python/hpsdatacat/engrun/engrun_util.py	Thu Feb 19 13:13:55 2015
@@ -6,10 +6,7 @@
 
 __SSH='ssh [log in to unmask]
 __DATACAT='~srs/datacat/prod/datacat-hps'
-__DCSRCHOPTS='--search-groups --show-non-ok-locations'
-
-__RUNMIN=2000
-__RUNMAX=4000
+__DCSRCHOPTS='--recurse --search-groups --show-non-ok-locations'
 
 __ALLRUNS=[]
 __ALLMETADATA=[]
@@ -17,6 +14,23 @@
 
 __RAWFILENAMEFORMAT='hps_%.6d.evio.%d'
 
+__HPSRUNPERIOD=None
+
+__HPSRUNPERIODS={'simulation':[-9999,100],
+                 'testrun2012':[100,2000],
+                 'engrun2014':[2000,4000],
+                 'engrun2015':[4000,9999]}
+
+def SetRunPeriod(runperiod):
+  global __HPSRUNPERIOD
+  if not __HPSRUNPERIODS.has_key(runperiod):
+    sys.exit('SetRunPeriod:  Missing run period:  '+runperiod)
+  __HPSRUNPERIOD=''.join(runperiod)
+
+#
+# For Reconstructed Files on Tape/Disk, path must match /pass\d+/.*
+# Everything before that will be ignored.
+#
 
 ##################################
 # CATALOG STRUCTURE (terminal directories are "GROUPS")
@@ -42,7 +56,7 @@
   return runlist
 
 
-# THIS NEEDS FIXING FOR OTHER TYPES (e.g. DQM):
+# Data Catalog Groups (RAW,RECON,DQM,DST,...):
 def GetGroup(filename):
   dt=GetDataType(filename)
   if dt==None:
@@ -51,50 +65,80 @@
     return 'RAW'
   elif dt[1]=='SLCIO':
     return 'RECON'
-  elif dt[1]=='TEST':
-    return 'DST'
+  elif filename.lower().endswith('.root'):
+    if filename.lower().find('dqm')>=0:
+      return 'DQM'
+    elif filename.lower().find('dst')>=0:
+      return 'DST'
   else:
     sys.exit('GetGroup:  Not Ready for this DataType:  '+filename)
 
-# THESE WILL HAVE TO CHANGE FOR THE NEXT RUN
-# SHOULD DO IT AUTOMATICALLY BASED ON RUN NUMBER
-def GetDCRunPeriod(filepath):
-#  runno=GetRunNumber(filepath)
-  return 'engrun2014'
-def GetMSSRunPeriod(filepath):
-  return 'engrun'
-
-# ONLY READY FOR FILES AT JLAB, UPDATE FOR SLAC:
+
+## path stub on tape at JLab:
+#def GetMSSRunPeriod(filepath):
+#  runno=GetRunNumber(filepath)[0]
+#  if runno<10:
+#    return 'simulation'
+#  if runno<2500:
+#    return 'testrun2012'
+#  elif runno<4000:
+#    return 'engrun'
+#  else:
+#    return 'engrun2015'
+
+## path stub for the Data Catalog
+#def GetDCRunPeriod(filepath):
+#  runno=GetRunNumber(filepath)[0]
+#  hpsrp=__HPSRUNPERIODS
+#  for xx in hpsrp.keys():
+#    if runno>=hpsrp[xx][0] and runno<hpsrp[xx][1]:
+#      return xx
+#  return None
+
+# full path for the Data Catalog:
 def GetDCPath(filepath):
-  if re.match(__HPSMSSPATH,filepath)==None:
-    sys.exit('GetDCPath:  Must start with '+__HPSMSSPATH+':  '+filepath)
+
+  print __HPSRUNPERIOD
+  prefix=__HPSDCPATH+'/'+__HPSRUNPERIOD #GetDCRunPeriod(filepath)
+
   if not os.path.isdir(filepath):
     sys.exit('Directory does not exist:  '+filepath)
-  filepath=filepath.lstrip(__HPSMSSPATH).lstrip('/').rstrip('/')
-  prefix=__HPSDCPATH+'/'+GetDCRunPeriod(filepath)
-  if filepath=='data':
-    return prefix
-  else:
-    subdirs=filepath.split('/')
-    subdirs.reverse()
-    if subdirs.pop().find('engrun')>=0:
-      npass=GetPass(filepath)
-      if npass==None:
-        sys.exit('GetDCPath:  Unresolved Pass:  '+filepath)
-      return prefix+'/pass%d'%(npass)
-  sys.exit('GetDCPath: Not ready for this:  '+filepath)
+
+  # it's got to be a raw EVIO file:
+  if filepath.rstrip().rstrip('/') == __HPSMSSPATH+'/data':
+    # fix name for 2014 run:
+    if prefix.endswith('engrun2014'):
+      return __HPSDCPATH+'/engrun'
+
+  # throw away everything before '/pass#/'
+  tmp=re.search('/(pass\d+)/(.*)',filepath)
+  if (tmp == None):
+    sys.exit('GetDCPath:  Not raw data on tape, Not a pass#:\n'+filepath)
+
+  passN=tmp.group(1)
+
+  # No longer used:
+  #pathstub=tmp.group(2).lstrip('/').rstrip('/')
+  #subdirs=pathstub.split('/').reverse()
+  #if subdirs.pop()=='engrun':
+  #  return prefix+'/'+passN
+  #sys.exit('GetDCPath: Not ready for this:  '+filepath)
+
+  return prefix+'/'+passN+'/'
+
+
 
 # UPDATE THIS IF WE GET NEW DATATYPES AVAILABLE IN THE CATALOG:
 # Returns [fileformat,datatype] REQUIRED by the catalog
 def GetDataType(filename):
-  if re.search('evio',os.path.basename(filename))!=None:
+  basename=os.path.basename(filename).lower()
+  if basename.find('.evio')>0:
     return ['evio','EVIO']
-  elif re.search('slcio',os.path.basename(filename))!=None:
+  elif basename.endswith('.slcio')>0 or basename.endswith('.lcio')>0:
     return ['slcio','SLCIO']
-  elif re.search('dst',os.path.basename(filename))!=None:
+  elif basename.endswith('.root'):
     return ['root','TEST']
-  else:
-    return ['unspecified','TEST']
+  return ['unspecified','TEST']
 
 def GetPass(filename):
   match=re.search('pass\d+',filename)
@@ -104,6 +148,8 @@
 
 def GetSite(filepath):
   if re.match('/mss',filepath)!=None:
+    return 'JLAB'
+  elif re.match('/work',filepath)!=None:
     return 'JLAB'
   else:
     return 'SLAC'
@@ -133,7 +179,8 @@
     if integers[ii].lstrip('0')=='':
       continue
     xx=int(integers[ii].lstrip('0'))
-    if xx>__RUNMIN and xx<__RUNMAX:
+    if xx>=__HPSRUNPERIODS[__HPSRUNPERIOD][0] and \
+       xx< __HPSRUNPERIODS[__HPSRUNPERIOD][1]:
       runno=xx
       if ii<len(integers)-1:
         if integers[ii+1].lstrip('0')=='':