Print

Print


Author: [log in to unmask]
Date: Tue Sep 29 13:13:41 2015
New Revision: 3738

Log:
Miscellaneous updates to data crawler.

Added:
    java/trunk/crawler/src/main/java/org/hps/crawler/AidaMetadataReader.java
    java/trunk/crawler/src/main/java/org/hps/crawler/RootDqmMetadataReader.java
Modified:
    java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerConfig.java
    java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerFileUtilities.java
    java/trunk/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java
    java/trunk/crawler/src/main/java/org/hps/crawler/DatacatUtilities.java
    java/trunk/crawler/src/main/java/org/hps/crawler/EvioMetadataReader.java
    java/trunk/crawler/src/main/java/org/hps/crawler/FileFormatFilter.java
    java/trunk/crawler/src/main/java/org/hps/crawler/RootDstMetadataReader.java

Added: java/trunk/crawler/src/main/java/org/hps/crawler/AidaMetadataReader.java
 =============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/AidaMetadataReader.java	(added)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/AidaMetadataReader.java	Tue Sep 29 13:13:41 2015
@@ -0,0 +1,30 @@
+package org.hps.crawler;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * This is a metadata reader for ROOT DQM files.
+ * <p>
+ * It currently only gets the run number from the file name.
+ *
+ * @author Jeremy McCormick, SLAC
+ */
+public class AidaMetadataReader implements FileMetadataReader {
+
+    /**
+     * Get the metadata for a ROOT DQM file.
+     *
+     * @return the metadata for a ROOT DQM file
+     */
+    @Override
+    public Map<String, Object> getMetadata(final File file) throws IOException {
+        final Map<String, Object> metadata = new HashMap<String, Object>();
+        final int run = CrawlerFileUtilities.getRunFromFileName(file);
+        metadata.put("runMin", run);
+        metadata.put("runMax", run);
+        return metadata;
+    }
+}

Modified: java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerConfig.java
 =============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerConfig.java	(original)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerConfig.java	Tue Sep 29 13:13:41 2015
@@ -108,7 +108,7 @@
         this.formats.add(format);
         return this;
     }
-    
+
     /**
      * Get the database connection parameters.
      *
@@ -162,7 +162,7 @@
     Integer maxDepth() {
         return maxDepth;
     }
-  
+
     /**
      * Get the root directory for the file search.
      *

Modified: java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerFileUtilities.java
 =============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerFileUtilities.java	(original)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerFileUtilities.java	Tue Sep 29 13:13:41 2015
@@ -2,14 +2,21 @@
 
 import java.io.File;
 
+/**
+ * File utilities for crawler.
+ *
+ * @author Jeremy McCormick, SLAC
+ */
 public class CrawlerFileUtilities {
-         
-    static boolean isHpsFile(File file) {
-        return file.getName().startsWith("hps");
-    }
-    
-    static int getRunFromFileName(File file) {
-        String name = file.getName();
-        return Integer.parseInt(name.substring(4, 8));
+
+    /**
+     * Get run number from file name assuming it looks like "hps_001234".
+     *
+     * @param file the file
+     * @return the run number
+     */
+    static int getRunFromFileName(final File file) {
+        final String name = file.getName();
+        return Integer.parseInt(name.substring(4, 10));
     }
 }

Modified: java/trunk/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java
 =============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java	(original)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java	Tue Sep 29 13:13:41 2015
@@ -152,7 +152,7 @@
 
     /**
      * Main method.
-     * 
+     *
      * @param args the command line arguments
      */
     public static void main(final String[] args) {
@@ -309,9 +309,9 @@
 
             // List of run numbers.
             if (cl.hasOption("r")) {
-                Set<Integer> acceptRuns = new HashSet<Integer>();
-                for (String arg : cl.getOptionValues("r")) {
-                    acceptRuns.add(Integer.parseInt(arg));                
+                final Set<Integer> acceptRuns = new HashSet<Integer>();
+                for (final String arg : cl.getOptionValues("r")) {
+                    acceptRuns.add(Integer.parseInt(arg));
                 }
                 config.setAcceptRuns(acceptRuns);
             }

Modified: java/trunk/crawler/src/main/java/org/hps/crawler/DatacatUtilities.java
 =============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/DatacatUtilities.java	(original)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/DatacatUtilities.java	Tue Sep 29 13:13:41 2015
@@ -17,6 +17,9 @@
  */
 class DatacatUtilities {
 
+    /**
+     * Static map of strings to dataset file formats.
+     */
     static Map<String, DatasetFileFormat> formatMap = new HashMap<String, DatasetFileFormat>();
     static {
         for (final DatasetFileFormat format : DatasetFileFormat.values()) {
@@ -24,6 +27,14 @@
         }
     }
 
+    /**
+     * Add a file to the data catalog.
+     *
+     * @param datacatClient the data catalog client
+     * @param folder the target folder in the data catalog
+     * @param file the file with the full path
+     * @param metadata the file's meta data
+     */
     static void addFile(final DatacatClient datacatClient, final String folder, final File file,
             final Map<String, Object> metadata) {
         final DatasetFileFormat fileFormat = DatacatUtilities.getFileFormat(file);
@@ -52,12 +63,19 @@
         return response;
     }
 
+    /**
+     * Create metadata for a file.
+     *
+     * @param file the file
+     * @return the metadata for the file
+     */
     static Map<String, Object> createMetadata(final File file) {
         final DatasetFileFormat fileFormat = DatacatUtilities.getFileFormat(file);
         final DatasetDataType dataType = DatacatUtilities.getDataType(file);
         final FileMetadataReader reader = DatacatUtilities.getFileMetaDataReader(fileFormat, dataType);
         if (reader == null) {
-            throw new RuntimeException("No metadata reader found for format " + fileFormat.name() + " and type " + dataType.name() + ".");
+            throw new RuntimeException("No metadata reader found for format " + fileFormat.name() + " and type "
+                    + dataType.name() + ".");
         }
         Map<String, Object> metadata;
         try {
@@ -68,6 +86,12 @@
         return metadata;
     }
 
+    /**
+     * Get the data type for a file.
+     *
+     * @param file the file
+     * @return the file's data type
+     */
     static DatasetDataType getDataType(final File file) {
         final DatasetFileFormat fileFormat = getFileFormat(file);
         DatasetDataType dataType = null;
@@ -79,7 +103,6 @@
         } else if (fileFormat.equals(DatasetFileFormat.LCIO)) {
             dataType = DatasetDataType.RECON;
         } else if (fileFormat.equals(DatasetFileFormat.ROOT)) {
-            // FIXME: This should probably open the file and determine what it contains.
             if (file.getName().contains("_dqm")) {
                 dataType = DatasetDataType.DQM;
             } else if (file.getName().contains("_dst")) {
@@ -94,6 +117,12 @@
         return dataType;
     }
 
+    /**
+     * Get the file format of a file.
+     *
+     * @param pathname the file
+     * @return the file format of the file
+     */
     static DatasetFileFormat getFileFormat(final File pathname) {
         String name = pathname.getName();
         if (name.contains(DatasetFileFormat.EVIO.extension()) && !name.endsWith(DatasetFileFormat.EVIO.extension())) {
@@ -103,6 +132,13 @@
         return formatMap.get(extension);
     }
 
+    /**
+     * Get a metadata reader for a given combination of file format and data type.
+     *
+     * @param fileFormat the file format
+     * @param dataType the data type
+     * @return the file metadata reader
+     */
     static FileMetadataReader getFileMetaDataReader(final DatasetFileFormat fileFormat, final DatasetDataType dataType) {
         FileMetadataReader reader = null;
         if (fileFormat.equals(DatasetFileFormat.LCIO)) {
@@ -111,19 +147,20 @@
             reader = new EvioMetadataReader();
         } else if (fileFormat.equals(DatasetFileFormat.ROOT) && dataType.equals(DatasetDataType.DST)) {
             reader = new RootDstMetadataReader();
+        } else if (fileFormat.equals(DatasetFileFormat.ROOT) && dataType.equals(DatasetDataType.DQM)) {
+            reader = new RootDqmMetadataReader();
+        } else if (fileFormat.equals(DatasetFileFormat.AIDA)) {
+            reader = new AidaMetadataReader();
         }
         return reader;
     }
 
-    static String getFolder(final String rootDir, final File file) {
-        String stripDir = rootDir;
-        if (!stripDir.endsWith("/")) {
-            stripDir += "/";
-        }
-        final String folder = file.getParentFile().getPath().replace(stripDir, "");
-        return folder;
-    }
-
+    /**
+     * Strip the file number from an EVIO file name.
+     *
+     * @param name the EVIO file name
+     * @return the file name stripped of the file number
+     */
     static String stripEvioFileNumber(final String name) {
         String strippedName = name;
         if (!name.endsWith(DatasetFileFormat.EVIO.extension())) {

Modified: java/trunk/crawler/src/main/java/org/hps/crawler/EvioMetadataReader.java
 =============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/EvioMetadataReader.java	(original)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/EvioMetadataReader.java	Tue Sep 29 13:13:41 2015
@@ -16,25 +16,28 @@
 import org.jlab.coda.jevio.EvioReader;
 import org.lcsim.util.log.DefaultLogFormatter;
 import org.lcsim.util.log.LogUtil;
- 
+
 /**
  * Reads metadata from EVIO files.
- * 
+ *
  * @author Jeremy McCormick, SLAC
  */
 public class EvioMetadataReader implements FileMetadataReader {
-    
+
+    /**
+     * Class logger.
+     */
     private static Logger LOGGER = LogUtil.create(EvioMetadataReader.class, new DefaultLogFormatter(), Level.ALL);
-   
+
     /**
      * Get the EVIO file metadata.
-     * 
+     *
      * @param file the EVIO file
      * @return the metadata map of key and value pairs
      */
     @Override
-    public Map<String, Object> getMetadata(File file) throws IOException {
-      
+    public Map<String, Object> getMetadata(final File file) throws IOException {
+
         Date startDate = null;
         Date endDate = null;
         int badEventCount = 0;
@@ -47,18 +50,18 @@
         Integer endEvent = null;
         Integer startEvent = null;
         Long lastTimestamp = null;
-        
+
         EvioReader evioReader = null;
         try {
             evioReader = EvioFileUtilities.open(file, false);
-        } catch (EvioException e) {
+        } catch (final EvioException e) {
             throw new IOException(e);
         }
-        
-        int fileNumber = EvioFileUtilities.getSequenceFromName(file);
-        
+
+        final int fileNumber = EvioFileUtilities.getSequenceFromName(file);
+
         EvioEvent evioEvent = null;
-        
+
         while (true) {
             try {
                 evioEvent = evioReader.parseNextEvent();
@@ -114,22 +117,22 @@
                 }
                 ++eventCount;
             }
-        }    
-        
+        }
+
         // Set end date from last valid timestamp.
         if (endDate == null) {
             endDate = new Date(lastTimestamp);
             LOGGER.info("set end date to " + endDate + " from last timestamp " + lastTimestamp);
         }
-        
+
         // Set end event number.
         if (eventIdData != null) {
             endEvent = eventIdData[0];
             LOGGER.info("set end event " + endEvent);
         }
-        
-        Map<String, Object> metaDataMap = new HashMap<String, Object>();
-        
+
+        final Map<String, Object> metaDataMap = new HashMap<String, Object>();
+
         metaDataMap.put("runMin", run);
         metaDataMap.put("runMax", run);
         metaDataMap.put("eventCount", eventCount);
@@ -142,7 +145,7 @@
         metaDataMap.put("endEvent", endEvent);
         metaDataMap.put("hasEnd", hasEnd ? 1 : 0);
         metaDataMap.put("hasPrestart", hasPrestart ? 1 : 0);
-        
+
         return metaDataMap;
-    }                             
+    }
 }

Modified: java/trunk/crawler/src/main/java/org/hps/crawler/FileFormatFilter.java
 =============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/FileFormatFilter.java	(original)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/FileFormatFilter.java	Tue Sep 29 13:13:41 2015
@@ -14,7 +14,7 @@
  * Filter files on their format.
  * <p>
  * Only files matching the format will be accepted by the file visitor.
- * 
+ *
  * @author Jeremy McCormick, SLAC
  */
 public class FileFormatFilter implements FileFilter {
@@ -23,18 +23,18 @@
      * Setup logger.
      */
     private static final Logger LOGGER = LogUtil.create(FileFormatFilter.class, new DefaultLogFormatter(), Level.ALL);
-    
+
     /**
      * The file format.
      */
-    private Set<DatasetFileFormat> formats;
-    
+    private final Set<DatasetFileFormat> formats;
+
     /**
      * Create a new filter with the given format.
-     * 
+     *
      * @param format the file format
      */
-    FileFormatFilter(Set<DatasetFileFormat> formats) {
+    FileFormatFilter(final Set<DatasetFileFormat> formats) {
         if (formats == null) {
             throw new IllegalArgumentException("The formats collection is null.");
         }
@@ -43,18 +43,18 @@
         }
         this.formats = formats;
     }
-    
+
     /**
      * Returns <code>true</code> if the file should be accepted, e.g. it matches the filer's format.
-     * 
+     *
      * @param pathname the file's full path
      */
     @Override
-    public boolean accept(File pathname) {
+    public boolean accept(final File pathname) {
         LOGGER.info(pathname.getPath());
-        DatasetFileFormat fileFormat = DatacatUtilities.getFileFormat(pathname);
+        final DatasetFileFormat fileFormat = DatacatUtilities.getFileFormat(pathname);
         if (fileFormat != null) {
-            LOGGER.info("file " + pathname.getPath() + " has format " + fileFormat.name());        
+            LOGGER.info("file " + pathname.getPath() + " has format " + fileFormat.name());
             return formats.contains(fileFormat);
         } else {
             LOGGER.info("rejected file " + pathname.getPath() + " with unknown format");

Added: java/trunk/crawler/src/main/java/org/hps/crawler/RootDqmMetadataReader.java
 =============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/RootDqmMetadataReader.java	(added)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/RootDqmMetadataReader.java	Tue Sep 29 13:13:41 2015
@@ -0,0 +1,30 @@
+package org.hps.crawler;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * This is a metadata reader for ROOT DQM files.
+ * <p>
+ * It currently only gets the run number from the file name.
+ *
+ * @author Jeremy McCormick, SLAC
+ */
+public class RootDqmMetadataReader implements FileMetadataReader {
+
+    /**
+     * Get the metadata for a ROOT DQM file.
+     *
+     * @return the metadata for a ROOT DQM file
+     */
+    @Override
+    public Map<String, Object> getMetadata(final File file) throws IOException {
+        final Map<String, Object> metadata = new HashMap<String, Object>();
+        final int run = CrawlerFileUtilities.getRunFromFileName(file);
+        metadata.put("runMin", run);
+        metadata.put("runMax", run);
+        return metadata;
+    }
+}

Modified: java/trunk/crawler/src/main/java/org/hps/crawler/RootDstMetadataReader.java
 =============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/RootDstMetadataReader.java	(original)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/RootDstMetadataReader.java	Tue Sep 29 13:13:41 2015
@@ -2,7 +2,6 @@
 
 import hep.io.root.RootClassNotFound;
 import hep.io.root.RootFileReader;
-import hep.io.root.interfaces.TBranch;
 import hep.io.root.interfaces.TLeafElement;
 import hep.io.root.interfaces.TObjArray;
 import hep.io.root.interfaces.TTree;
@@ -16,33 +15,34 @@
  * This is a very simple metadata reader for ROOT DST files.
  * <p>
  * It currently only sets the standard metadata for event count and run number.
- * 
+ *
  * @author Jeremy McCormick, SLAC
  */
 public class RootDstMetadataReader implements FileMetadataReader {
 
     /**
      * Get the metadata for a ROOT DST file.
-     * 
+     *
      * @return the metadata for a ROOT DST file
      */
-    public Map<String, Object> getMetadata(File file) throws IOException {
-        Map<String, Object> metadata = new HashMap<String, Object>();        
+    @Override
+    public Map<String, Object> getMetadata(final File file) throws IOException {
+        final Map<String, Object> metadata = new HashMap<String, Object>();
         RootFileReader rootReader = null;
-        long eventCount = 0;        
+        long eventCount = 0;
         int runMin = 0;
         int runMax = 0;
         long size = 0;
         try {
             rootReader = new RootFileReader(file.getAbsolutePath());
-            TTree tree = (TTree) rootReader.get("HPS_Event");
-            //TBranch branch = tree.getBranch("Event");
+            final TTree tree = (TTree) rootReader.get("HPS_Event");
+            // TBranch branch = tree.getBranch("Event");
             eventCount = tree.getEntries();
             size = tree.getTotBytes();
-            TObjArray leaves = tree.getLeaves();
-            
-            for (Object object : leaves) {
-                TLeafElement leaf = (TLeafElement) object;
+            final TObjArray leaves = tree.getLeaves();
+
+            for (final Object object : leaves) {
+                final TLeafElement leaf = (TLeafElement) object;
                 if ("run_number".equals(leaf.getName())) {
                     runMin = (int) leaf.getWrappedValue(0);
                     runMax = (int) leaf.getWrappedValue(0);
@@ -55,11 +55,11 @@
             if (rootReader != null) {
                 rootReader.close();
             }
-        }        
+        }
         metadata.put("eventCount", eventCount);
         metadata.put("runMin", runMin);
         metadata.put("runMax", runMax);
         metadata.put("size", size);
         return metadata;
-    }    
+    }
 }