Print

Print


Author: [log in to unmask]
Date: Wed Sep 23 07:49:16 2015
New Revision: 3677

Log:
Updates to crawler and run database.

Added:
    java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerFeature.java
    java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerFileUtilities.java
    java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerFileVisitor.java
      - copied, changed from r3655, java/trunk/crawler/src/main/java/org/hps/crawler/EvioFileVisitor.java
    java/trunk/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java
    java/trunk/crawler/src/main/java/org/hps/crawler/DatacatUtilities.java
    java/trunk/crawler/src/main/java/org/hps/crawler/EvioMetadataReader.java
    java/trunk/crawler/src/main/java/org/hps/crawler/FileFormatFilter.java
    java/trunk/crawler/src/main/java/org/hps/crawler/FileMetadataReader.java
    java/trunk/crawler/src/main/java/org/hps/crawler/FileSet.java
    java/trunk/crawler/src/main/java/org/hps/crawler/LcioMetadataReader.java
    java/trunk/crawler/src/main/java/org/hps/crawler/RootDstMetadataReader.java
    java/trunk/run-database/src/main/java/org/hps/run/database/RunProcessor.java
Removed:
    java/trunk/crawler/src/main/java/org/hps/crawler/EvioDatacatUtilities.java
    java/trunk/crawler/src/main/java/org/hps/crawler/EvioFileVisitor.java
    java/trunk/crawler/src/main/java/org/hps/crawler/RunProcessor.java
Modified:
    java/trunk/crawler/pom.xml
    java/trunk/crawler/src/main/java/org/hps/crawler/Crawler.java
    java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerConfig.java
    java/trunk/run-database/src/main/java/org/hps/run/database/RunSummary.java
    java/trunk/run-database/src/main/java/org/hps/run/database/RunSummaryDaoImpl.java
    java/trunk/run-database/src/main/java/org/hps/run/database/RunSummaryImpl.java

Modified: java/trunk/crawler/pom.xml
 =============================================================================
--- java/trunk/crawler/pom.xml	(original)
+++ java/trunk/crawler/pom.xml	Wed Sep 23 07:49:16 2015
@@ -19,5 +19,10 @@
             <groupId>org.hps</groupId>
             <artifactId>hps-run-database</artifactId>
         </dependency>
+        <dependency>
+            <groupId>org.freehep</groupId>
+            <artifactId>freehep-rootio</artifactId>
+            <version>2.2.1</version>
+        </dependency>
     </dependencies>
 </project>

Modified: java/trunk/crawler/src/main/java/org/hps/crawler/Crawler.java
 =============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/Crawler.java	(original)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/Crawler.java	Wed Sep 23 07:49:16 2015
@@ -10,7 +10,6 @@
 import java.util.Date;
 import java.util.EnumSet;
 import java.util.HashSet;
-import java.util.List;
 import java.util.Set;
 import java.util.logging.Level;
 import java.util.logging.Logger;
@@ -21,9 +20,12 @@
 import org.apache.commons.cli.ParseException;
 import org.apache.commons.cli.PosixParser;
 import org.hps.conditions.database.ConnectionParameters;
-import org.hps.record.evio.EvioFileMetadata;
+import org.hps.datacat.client.DatacatClient;
+import org.hps.datacat.client.DatacatClientFactory;
+import org.hps.datacat.client.DatasetFileFormat;
 import org.hps.run.database.RunDatabaseDaoFactory;
 import org.hps.run.database.RunManager;
+import org.hps.run.database.RunProcessor;
 import org.hps.run.database.RunSummary;
 import org.hps.run.database.RunSummaryDao;
 import org.hps.run.database.RunSummaryImpl;
@@ -31,45 +33,75 @@
 import org.lcsim.util.log.LogUtil;
 
 /**
- * Search for EVIO files in a directory tree, group the files that are found by run, extract meta data from these files,
- * and optionally update a run database with the information that was found.
+ * Crawls a directory tree for data files and performs tasks related to this information.
+ * <p>
+ * The crawler can find EVIO, LCIO, or ROOT files in a directory tree and then perform various tasks based on
+ * information extracted from them.
  *
  * @author Jeremy McCormick, SLAC
  */
 public final class Crawler {
 
     /**
+     * Make a list of available features for printing help.
+     */
+    private static String AVAILABLE_FEATURES;
+
+    /**
+     * Make a list of available features for printing help.
+     */
+    private static String AVAILABLE_FORMATS;
+
+    /**
      * Setup the logger.
      */
-    private static final Logger LOGGER = LogUtil.create(Crawler.class, new DefaultLogFormatter(), Level.ALL);
+    private static final Logger LOGGER = LogUtil.create(Crawler.class, new DefaultLogFormatter(), Level.CONFIG);
 
     /**
      * Constant for milliseconds conversion.
      */
     private static final long MILLISECONDS = 1000L;
-
     /**
      * Command line options for the crawler.
      */
     private static final Options OPTIONS = new Options();
+
+    static {
+        final StringBuffer buffer = new StringBuffer();
+        for (final CrawlerFeature feature : CrawlerFeature.values()) {
+            buffer.append(feature.name() + " ");
+        }
+        buffer.setLength(buffer.length() - 1);
+        AVAILABLE_FEATURES = buffer.toString();
+    }
+    static {
+        final StringBuffer buffer = new StringBuffer();
+        for (final DatasetFileFormat format : DatasetFileFormat.values()) {
+            buffer.append(format.name() + " ");
+        }
+        buffer.setLength(buffer.length() - 1);
+        AVAILABLE_FORMATS = buffer.toString();
+    }
 
     /**
      * Statically define the command options.
      */
     static {
         OPTIONS.addOption("b", "min-date", true, "min date for a file (example \"2015-03-26 11:28:59\")");
-        OPTIONS.addOption("c", "datacat", true, "update the data catalog using the specified folder (off by default)");
+        OPTIONS.addOption("c", "datacat", true, "use the specified datacat folder");
         OPTIONS.addOption("C", "cache", false, "cache files from MSS (JLAB only and not for batch farm use!)");
+        OPTIONS.addOption("e", "enable", true, "enable a feature: " + AVAILABLE_FEATURES);
+        OPTIONS.addOption("D", "default-features", false, "enable default features");
+        OPTIONS.addOption("F", "default-formats", false, "enable default file filters");
+        OPTIONS.addOption("f", "format", true, "add a file format for filtering: " + AVAILABLE_FORMATS);
         OPTIONS.addOption("p", "connection-properties", true, "database connection properties file (required)");
         OPTIONS.addOption("d", "directory", true, "root directory to start crawling (default is current dir)");
         OPTIONS.addOption("E", "evio-processor", true, "class name of an EvioEventProcessor to execute");
         OPTIONS.addOption("h", "help", false, "print help and exit (overrides all other arguments)");
-        OPTIONS.addOption("i", "insert", false, "insert information into the run database (not done by default)");
         OPTIONS.addOption("L", "log-level", true, "set the log level (INFO, FINE, etc.)");
         OPTIONS.addOption("r", "run", true, "add a run number to accept (others will be excluded)");
         OPTIONS.addOption("t", "timestamp-file", true, "existing or new timestamp file name");
         OPTIONS.addOption("w", "max-cache-wait", true, "time per run allowed for file caching in seconds");
-        OPTIONS.addOption("u", "update", false, "allow replacement of existing data in the run db (off by default)");
         OPTIONS.addOption("x", "max-depth", true, "max depth to crawl");
     }
 
@@ -87,29 +119,6 @@
     }
 
     /**
-     * Process all the runs that were found.
-     *
-     * @param runs the run log containing the list of run summaries
-     * @throws Exception if there is an error processing one of the runs
-     */
-    static RunProcessor processRun(final RunSummary runSummary) throws Exception {
-
-        LOGGER.info("processing run" + runSummary.getRun());
-
-        // Create a processor to process all the EVIO events in the run.
-        LOGGER.info("creating run processor for " + runSummary.getRun());
-        final RunProcessor runProcessor = new RunProcessor((RunSummaryImpl) runSummary);
-
-        // Process all of the files from the run.
-        LOGGER.info("processing run " + runSummary.getRun());
-        runProcessor.processRun();
-
-        LOGGER.getHandlers()[0].flush();
-        
-        return runProcessor;
-    }
-
-    /**
      * The class for managing the file caching using the 'jcache' command.
      */
     private final JCacheManager cacheManager = new JCacheManager();
@@ -154,6 +163,30 @@
     }
 
     /**
+     * Create a run processor from the current configuration.
+     *
+     * @return the run processor
+     */
+    private RunProcessor createEvioRunProcessor(final RunSummaryImpl runSummary) {
+
+        final RunProcessor runProcessor = new RunProcessor(runSummary);
+
+        final Set<CrawlerFeature> features = config.getFeatures();
+
+        if (features.contains(CrawlerFeature.EPICS)) {
+            runProcessor.addEpicsProcessor();
+        }
+        if (features.contains(CrawlerFeature.SCALERS)) {
+            runProcessor.addScalerProcessor();
+        }
+        if (features.contains(CrawlerFeature.TRIGGER)) {
+            runProcessor.addTriggerTimeProcessor();
+        }
+
+        return runProcessor;
+    }
+
+    /**
      * Parse command line options and create a new {@link Crawler} object from the configuration.
      *
      * @param args the command line arguments
@@ -161,9 +194,9 @@
      */
     private Crawler parse(final String args[]) {
 
-        LOGGER.info("parsing command line options");
-
-        config = new CrawlerConfig();
+        LOGGER.config("parsing command line options");
+
+        this.config = new CrawlerConfig();
 
         try {
             final CommandLine cl = this.parser.parse(OPTIONS, args);
@@ -176,11 +209,37 @@
             // Log level.
             if (cl.hasOption("L")) {
                 final Level level = Level.parse(cl.getOptionValue("L"));
-                LOGGER.info("setting log level to " + level);
+                LOGGER.config("setting log level to " + level);
                 LOGGER.setLevel(level);
             }
 
+            // Enable default features.
+            if (cl.hasOption("D")) {
+                LOGGER.config("enabling default features");
+                this.config.addDefaultFeatures();
+            }
+
+            // Enable default file formats.
+            if (cl.hasOption("F")) {
+                LOGGER.config("enabling default file formats");
+                this.config.addDefaultFileFormats();
+            }
+
+            // Root directory for file crawling.
+            if (cl.hasOption("d")) {
+                final File rootDir = new File(cl.getOptionValue("d"));
+                if (!rootDir.exists()) {
+                    throw new IllegalArgumentException("The directory does not exist.");
+                }
+                if (!rootDir.isDirectory()) {
+                    throw new IllegalArgumentException("The specified path is not a directory.");
+                }
+                config.setRootDir(rootDir);
+                LOGGER.config("root dir set to " + config.rootDir());
+            }
+
             // Database connection properties file (this is not optional).
+            // FIXME: This only needs to be set for updating the run database.
             if (cl.hasOption("p")) {
                 final String dbPropPath = cl.getOptionValue("p");
                 final File dbPropFile = new File(dbPropPath);
@@ -194,19 +253,6 @@
             } else {
                 throw new RuntimeException(
                         "The -p switch providing the database connection properties file is a required argument.");
-            }
-
-            // Root directory for file crawling.
-            if (cl.hasOption("d")) {
-                final File rootDir = new File(cl.getOptionValue("d"));
-                if (!rootDir.exists()) {
-                    throw new IllegalArgumentException("The directory does not exist.");
-                }
-                if (!rootDir.isDirectory()) {
-                    throw new IllegalArgumentException("The specified path is not a directory.");
-                }
-                config.setRootDir(rootDir);
-                LOGGER.config("root dir for crawling set to " + config.rootDir());
             }
 
             // Timestamp file for date filtering.
@@ -247,12 +293,6 @@
                 config.setAcceptRuns(acceptRuns);
             }
 
-            // Enable updating of run database.
-            if (cl.hasOption("i")) {
-                config.setUpdateRunLog(true);
-                LOGGER.config("inserting into run database is enabled");
-            }
-
             // Enable file cache usage for running at JLAB.
             if (cl.hasOption("C")) {
                 config.setUseFileCache(true);
@@ -264,12 +304,6 @@
                 final Long waitTime = Long.parseLong(cl.getOptionValue("w")) * MILLISECONDS;
                 config.setWaitTime(waitTime);
                 LOGGER.config("max time for file caching set to " + config.waitTime());
-            }
-
-            // Allow deletion and replacement of records in run database.
-            if (cl.hasOption("u")) {
-                config.setAllowUpdates(true);
-                LOGGER.config("deletion and replacement of existing runs in the database is enabled");
             }
 
             // User supplied timestamp string that is converted to a date for file filtering.
@@ -315,13 +349,62 @@
                 if (datacatFolder == null) {
                     throw new IllegalArgumentException("missing -c argument with data catalog folder");
                 }
+
+                // Set datacat folder.
                 LOGGER.config("using data catalog folder " + datacatFolder);
                 config.setDatacatFolder(datacatFolder);
-                config.setUpdateDatacat(true);
-            }
-
+
+                // Assume datacat should be enabled if folder name was given.
+                config.getFeatures().add(CrawlerFeature.DATACAT);
+                LOGGER.config(CrawlerFeature.DATACAT + " is enabled");
+            }
+
+            // Configure enabled features.
+            if (cl.hasOption("e")) {
+                for (final String arg : cl.getOptionValues("e")) {
+                    CrawlerFeature feature = null;
+                    try {
+                        feature = CrawlerFeature.valueOf(arg);
+                    } catch (IllegalArgumentException | NullPointerException e) {
+                        throw new IllegalArgumentException("The feature " + arg + " is not valid.", e);
+                    }
+                    this.config.addFeature(feature);
+                }
+            }
+
+            // Configure enabled file formats.
+            if (cl.hasOption("f")) {
+                for (final String arg : cl.getOptionValues("f")) {
+                    DatasetFileFormat format = null;
+                    try {
+                        format = DatasetFileFormat.valueOf(arg);
+                    } catch (IllegalArgumentException | NullPointerException e) {
+                        throw new IllegalArgumentException("The feature " + arg + " is not valid.", e);
+                    }
+                    LOGGER.config("adding format " + format.name());
+                    this.config.addFileFormat(format);
+                }
+            }
         } catch (final ParseException e) {
             throw new RuntimeException("Error parsing options.", e);
+        }
+
+        // Check that there is at least one file format enabled for filtering.
+        if (this.config.getFileFormats().isEmpty()) {
+            throw new IllegalStateException(
+                    "There are no file formats enabled.  Enable defaults using -F or add a format using the -f switch.");
+        }
+
+        // Print a message if no features are enabled; this is not a fatal error but the job won't update anything.
+        if (this.config.getFeatures().isEmpty()) {
+            LOGGER.warning("no features are enabled");
+        }
+
+        // Check that EVIO file filter is active if run database is being updated.
+        // Don't add by default because the user may have made a mistake in the options they provided.
+        if (this.config.getFeatures().contains(CrawlerFeature.RUNDB_INSERT)
+                && !this.config.getFileFormats().contains(DatasetFileFormat.EVIO)) {
+            throw new IllegalStateException("Run database is enabled without EVIO file filter active.");
         }
 
         // Configure the max wait time for file caching operations.
@@ -340,11 +423,35 @@
      */
     private void printUsage() {
         final HelpFormatter help = new HelpFormatter();
-        help.printHelp("EvioFileCrawler", "", OPTIONS, "");
+        // FIXME: include more info here and improve the way this looks (line width should be increased)
+        help.printHelp("Crawler [options]", "", OPTIONS, "");
         System.exit(0);
     }
 
     /**
+     * Process a run using its run summary.
+     *
+     * @param runs the run log containing the list of run summaries
+     * @throws Exception if there is an error processing one of the runs
+     */
+    private RunProcessor processRun(final RunSummary runSummary) throws Exception {
+
+        LOGGER.info("processing run" + runSummary.getRun());
+
+        // Create a processor to process all the EVIO events in the run.
+        LOGGER.info("creating run processor for " + runSummary.getRun());
+        final RunProcessor runProcessor = this.createEvioRunProcessor((RunSummaryImpl) runSummary);
+
+        // Process all of the files from the run.
+        LOGGER.info("processing run " + runSummary.getRun());
+        runProcessor.processRun();
+
+        LOGGER.getHandlers()[0].flush();
+
+        return runProcessor;
+    }
+
+    /**
      * Run the full crawler job.
      *
      * @throws Exception if there is some error during the job
@@ -354,7 +461,18 @@
         LOGGER.info("starting Crawler job");
 
         // Create the file visitor for crawling the root directory with the given date filter.
-        final EvioFileVisitor visitor = new EvioFileVisitor(config.timestamp());
+        final CrawlerFileVisitor visitor = new CrawlerFileVisitor();
+
+        if (config.timestamp() != null) {
+            // Add date filter if timestamp is supplied.
+            visitor.addFilter(new DateFileFilter(config.timestamp()));
+        }
+
+        // Add file format filter.
+        for (final DatasetFileFormat fileFormat : config.getFileFormats()) {
+            LOGGER.info("adding file format filter for " + fileFormat.name());
+        }
+        visitor.addFilter(new FileFormatFilter(config.getFileFormats()));
 
         // Walk the file tree using the visitor.
         this.walk(visitor);
@@ -362,33 +480,45 @@
         // Get the list of run data created by the visitor.
         final RunSummaryMap runMap = visitor.getRunMap();
 
-        // Process all runs that were found.        
-        for (RunSummary runSummary : runMap.getRunSummaries()) {
-        
+        LOGGER.info("found " + runMap.size() + " runs from crawl job");
+
+        // Process all runs that were found.
+        for (final RunSummary runSummary : runMap.getRunSummaries()) {
+
             if (runSummary == null) {
                 throw new IllegalArgumentException("The run summary is null for some weird reason.");
             }
-            
-            LOGGER.info("starting full processing of run " + runSummary.getRun());
-            
+
+            LOGGER.info("starting processing of run " + runSummary.getRun());
+
             // Cache files from MSS.
             this.cacheFiles(runSummary);
 
-            // Process the run's files.
-            RunProcessor runProcessor = processRun(runSummary);
-                        
-            // Execute the run database update.
-            this.updateRunDatabase(runSummary);
+            // Process the run's EVIO files.
+            if (!runSummary.getFiles(DatasetFileFormat.EVIO).isEmpty()) {
+                final RunProcessor runProcessor = this.processRun(runSummary);
+            }
+
+            if (config.getFeatures().contains(CrawlerFeature.RUNDB_INSERT)) {
+                // Execute the run database update.
+                this.updateRunDatabase(runSummary);
+            } else {
+                LOGGER.info("updating run database is not enabled");
+            }
 
             // Update the data catalog.
-            this.updateDatacat(runProcessor.getEvioFileMetaData());
-            
+            if (this.config.getFeatures().contains(CrawlerFeature.DATACAT)) {
+                this.updateDatacat(runSummary);
+            }
+
             LOGGER.info("completed full processing of run " + runSummary);
-        }       
+        }
 
         // Update the timestamp output file.
         this.updateTimestamp();
 
+        LOGGER.getHandlers()[0].flush();
+
         LOGGER.info("Crawler job is done!");
     }
 
@@ -397,12 +527,20 @@
      *
      * @param runMap the map of run information including the EVIO file list
      */
-    private void updateDatacat(List<EvioFileMetadata> metadataList) {
-        if (this.config.updateDatacat()) {            
-            EvioDatacatUtilities.addEvioFiles(metadataList, config.datacatFolder());
-            LOGGER.info("done updating data catalog");
-        } else {
-            LOGGER.info("updating data catalog is disabled");
+    private void updateDatacat(final RunSummary runSummary) {
+        final DatacatClient datacatClient = new DatacatClientFactory().createClient();
+        for (final DatasetFileFormat fileFormat : config.getFileFormats()) {
+            LOGGER.info("adding files to datacat with format " + fileFormat.name());
+            for (final File file : runSummary.getFiles(fileFormat)) {
+
+                LOGGER.info("adding file " + file.getPath() + " to datacat");
+
+                // Get folder for file by stripping out root directory.
+                final String folder = DatacatUtilities.getFolder(config.rootDir().getPath(), file);
+
+                // Register file in the catalog.
+                // DatacatUtilities.addFile(datacatClient, folder, file);
+            }
         }
     }
 
@@ -413,43 +551,38 @@
      * @throws SQLException if there is a database query error
      */
     private void updateRunDatabase(final RunSummary runSummary) throws SQLException {
-        // Insert the run information into the database.
-        if (config.updateRunDatabase()) {
-
-            LOGGER.info("updating run database for run " + runSummary.getRun());
-
-            // Open a DB connection.
-            final Connection connection = config.connectionParameters().createConnection();
-
-            // Create factory for interfacing to run database.
-            RunManager runManager = new RunManager();
-            runManager.setConnection(connection);
-            final RunDatabaseDaoFactory dbFactory = runManager.createDaoFactory();
-
-            // Create object for updating run info in the database.
-            final RunSummaryDao runSummaryDao = dbFactory.createRunSummaryDao();
-            
-            // Delete existing run summary if necessary.
-            if (runSummaryDao.runSummaryExists(runSummary.getRun())) {
-                if (this.config.allowUpdates()) {
-                    LOGGER.info("deleting existing information for run " + runSummary.getRun());
-                    runSummaryDao.deleteFullRunSummary(runSummary);
-                } else {
-                    throw new RuntimeException("Run " + runSummary.getRun() + " exists in database and deletion is not enabled.");
-                }
-            }
-            
-            // Insert run summary into database.
-            runSummaryDao.insertFullRunSummary(runSummary);
-
-            // Close the DB connection.
-            connection.close();
-
-            LOGGER.info("done updating run database");
-
-        } else {
-            LOGGER.info("updating run database is disabled");
-        }
+
+        LOGGER.info("updating run database for run " + runSummary.getRun());
+
+        // Open a DB connection.
+        final Connection connection = config.connectionParameters().createConnection();
+
+        // Create factory for interfacing to run database.
+        final RunManager runManager = new RunManager();
+        runManager.setConnection(connection);
+        final RunDatabaseDaoFactory dbFactory = runManager.createDaoFactory();
+
+        // Create object for updating run info in the database.
+        final RunSummaryDao runSummaryDao = dbFactory.createRunSummaryDao();
+
+        // Delete existing run summary if necessary.
+        if (runSummaryDao.runSummaryExists(runSummary.getRun())) {
+            if (this.config.features.contains(CrawlerFeature.RUNDB_UPDATE)) {
+                LOGGER.info("deleting existing information for run " + runSummary.getRun());
+                runSummaryDao.deleteFullRunSummary(runSummary);
+            } else {
+                throw new RuntimeException("Run " + runSummary.getRun()
+                        + " exists in database and deletion is not enabled.");
+            }
+        }
+
+        // Insert run summary into database.
+        runSummaryDao.insertFullRunSummary(runSummary);
+
+        // Close the DB connection.
+        connection.close();
+
+        LOGGER.info("done updating run database");
 
         LOGGER.getHandlers()[0].flush();
     }
@@ -481,7 +614,7 @@
      *
      * @param visitor the file visitor
      */
-    private void walk(final EvioFileVisitor visitor) {
+    private void walk(final CrawlerFileVisitor visitor) {
         if (config.timestamp() != null) {
             // Date filter from timestamp.
             visitor.addFilter(new DateFileFilter(config.timestamp()));
@@ -494,7 +627,7 @@
             visitor.addFilter(new RunFilter(config.acceptRuns()));
             LOGGER.config("added run number filter");
         } else {
-            LOGGER.config("no run number filter used");
+            LOGGER.config("no run number filter will be used");
         }
 
         try {
@@ -505,5 +638,4 @@
             throw new RuntimeException("Error while walking the directory tree.", e);
         }
     }
-
 }

Modified: java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerConfig.java
 =============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerConfig.java	(original)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerConfig.java	Wed Sep 23 07:49:16 2015
@@ -4,12 +4,16 @@
 import java.text.ParseException;
 import java.text.SimpleDateFormat;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Date;
+import java.util.HashSet;
 import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Set;
 
 import org.hps.conditions.database.ConnectionParameters;
+import org.hps.datacat.client.DatasetFileFormat;
+import org.hps.datacat.client.DatasetSite;
 import org.hps.record.evio.EvioEventProcessor;
 
 /**
@@ -50,6 +54,21 @@
     private String datacatFolder = null;
 
     /**
+     * Set whether extraction of metadata from files is enabled.
+     */
+    private boolean enableMetadata;
+
+    /**
+     * Set of features enabled in this configuration.
+     */
+    Set<CrawlerFeature> features = new HashSet<CrawlerFeature>();
+
+    /**
+     * Set of file formats for filtering files.
+     */
+    Set<DatasetFileFormat> formats = new HashSet<DatasetFileFormat>();
+
+    /**
      * The maximum depth to crawl.
      */
     private Integer maxDepth = Integer.MAX_VALUE;
@@ -68,6 +87,11 @@
      * The root directory to search for files, which defaults to the current directory.
      */
     private File rootDir = new File(System.getProperty("user.dir"));
+
+    /**
+     * The dataset site for the datacat.
+     */
+    private DatasetSite site;
 
     /**
      * A timestamp to use for filtering input files on their creation date.
@@ -106,6 +130,44 @@
      */
     Set<Integer> acceptRuns() {
         return acceptRuns;
+    }
+
+    /**
+     * Add the default set of features.
+     */
+    CrawlerConfig addDefaultFeatures() {
+        final List<CrawlerFeature> defaultFeatures = Arrays.asList(CrawlerFeature.values());
+        this.features.addAll(defaultFeatures);
+        return this;
+    }
+
+    /**
+     * Add the default file formats.
+     */
+    CrawlerConfig addDefaultFileFormats() {
+        final List<DatasetFileFormat> defaultFormats = Arrays.asList(DatasetFileFormat.values());
+        this.formats.addAll(defaultFormats);
+        return this;
+    }
+
+    /**
+     * Add a feature to enable it.
+     *
+     * @return this object
+     */
+    CrawlerConfig addFeature(final CrawlerFeature feature) {
+        this.features.add(feature);
+        return this;
+    }
+
+    /**
+     * Add a file format for filtering.
+     *
+     * @param format the file format
+     */
+    CrawlerConfig addFileFormat(final DatasetFileFormat format) {
+        this.formats.add(format);
+        return this;
     }
 
     /**
@@ -162,6 +224,42 @@
     }
 
     /**
+     * Get the dataset site.
+     *
+     * @return the dataset site
+     */
+    DatasetSite datasetSite() {
+        return this.site;
+    }
+
+    /**
+     * Return <code>true</code> if metadata extraction from files is enabled.
+     *
+     * @return <code>true</code> if metadata extraction is enabled
+     */
+    boolean enableMetaData() {
+        return this.enableMetadata;
+    }
+
+    /**
+     * Get the set of enabled features.
+     *
+     * @return the set of enabled features
+     */
+    Set<CrawlerFeature> getFeatures() {
+        return this.features;
+    }
+
+    /**
+     * Get the file formats for filtering.
+     *
+     * @return the file formats for filtering
+     */
+    Set<DatasetFileFormat> getFileFormats() {
+        return this.formats;
+    }
+
+    /**
      * Get the max depth in the directory tree to crawl.
      *
      * @return the max depth
@@ -191,6 +289,16 @@
     }
 
     /**
+     * Remove a feature to disable it.
+     *
+     * @return this object
+     */
+    CrawlerConfig removeFeature(final CrawlerFeature feature) {
+        this.features.remove(feature);
+        return this;
+    }
+
+    /**
      * Get the root directory for the file search.
      *
      * @return the root directory for the file search
@@ -243,6 +351,26 @@
     }
 
     /**
+     * Set the dataset site.
+     *
+     * @return this object
+     */
+    void setDatasetSite(final DatasetSite site) {
+        this.site = site;
+    }
+
+    /**
+     * Set whether metadata extraction is enabled.
+     *
+     * @param enableMetadata <code>true</code> to enable metadata
+     * @return this object
+     */
+    CrawlerConfig setEnableMetadata(final boolean enableMetadata) {
+        this.enableMetadata = enableMetadata;
+        return this;
+    }
+
+    /**
      * Set the max depth.
      *
      * @param maxDepth the max depth

Added: java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerFeature.java
 =============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerFeature.java	(added)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerFeature.java	Wed Sep 23 07:49:16 2015
@@ -0,0 +1,32 @@
+package org.hps.crawler;
+
+/**
+ * Enum for enabling or disabling features in the file crawler.
+ */
+enum CrawlerFeature {
+    /**
+     * Allow inserts into run database.
+     */
+    RUNDB_INSERT,
+    /**
+     * Allow updating the run database if the run exists already.
+     */
+    RUNDB_UPDATE,
+    /**
+     * Create list of EPICS data for inserting into run database.
+     */
+    EPICS,
+    /**
+     * Create list of scaler data for inserting into run database.
+     */
+    SCALERS,
+    /**
+     * Extract trigger config for inserting into run database.
+     */
+    TRIGGER,
+    /**
+     * Populate the data catalog with files that are found when crawling.
+     */
+    DATACAT    
+}
+

Added: java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerFileUtilities.java
 =============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerFileUtilities.java	(added)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerFileUtilities.java	Wed Sep 23 07:49:16 2015
@@ -0,0 +1,15 @@
+package org.hps.crawler;
+
+import java.io.File;
+
+public class CrawlerFileUtilities {
+         
+    static boolean isHpsFile(File file) {
+        return file.getName().startsWith("hps");
+    }
+    
+    static int getRunFromFileName(File file) {
+        String name = file.getName();
+        return Integer.parseInt(name.substring(4, 8));
+    }
+}

Copied: java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerFileVisitor.java (from r3655, java/trunk/crawler/src/main/java/org/hps/crawler/EvioFileVisitor.java)
 =============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/EvioFileVisitor.java	(original)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerFileVisitor.java	Wed Sep 23 07:49:16 2015
@@ -7,27 +7,28 @@
 import java.nio.file.SimpleFileVisitor;
 import java.nio.file.attribute.BasicFileAttributes;
 import java.util.ArrayList;
-import java.util.Date;
 import java.util.List;
 import java.util.logging.Level;
 import java.util.logging.Logger;
 
-import org.hps.record.evio.EvioFileFilter;
-import org.hps.record.evio.EvioFileUtilities;
+import org.hps.datacat.client.DatasetFileFormat;
 import org.lcsim.util.log.DefaultLogFormatter;
 import org.lcsim.util.log.LogUtil;
 
 /**
  * A file visitor that crawls directories for EVIO files and returns the information as a {@link RunSummaryMap}.
+ * <p>
+ * The {@link #addFilter(FileFilter)} method can be used to add a file filter.  Paths must pass all filters to
+ * be accepted.
  *
  * @author Jeremy McCormick, SLAC
  */
-final class EvioFileVisitor extends SimpleFileVisitor<Path> {
+final class CrawlerFileVisitor extends SimpleFileVisitor<Path> {
 
     /**
      * Setup logger.
      */
-    private static final Logger LOGGER = LogUtil.create(EvioFileVisitor.class, new DefaultLogFormatter(), Level.FINE);
+    private static final Logger LOGGER = LogUtil.create(CrawlerFileVisitor.class, new DefaultLogFormatter(), Level.FINE);
 
     /**
      * A list of file filters to apply.
@@ -37,6 +38,7 @@
     /**
      * The run log containing information about files from each run.
      */
+    // FIXME: This should be replaced by a map of run summary to file set.
     private final RunSummaryMap runs = new RunSummaryMap();
     
     /**
@@ -44,12 +46,7 @@
      *
      * @param timestamp the timestamp which is used for date filtering
      */
-    EvioFileVisitor(final Date timestamp) {
-        this.addFilter(new EvioFileFilter());
-        if (timestamp != null) {
-            // Add date filter if timestamp is supplied.
-            this.addFilter(new DateFileFilter(timestamp));
-        }
+    CrawlerFileVisitor() {
     }
 
     /**
@@ -97,23 +94,25 @@
      */
     @Override
     public FileVisitResult visitFile(final Path path, final BasicFileAttributes attrs) {
+                
         final File file = path.toFile();
-        if (this.accept(file)) {
+        
+        if (this.accept(file)) {            
 
             // Get the run number from the file name.
-            final Integer run = EvioFileUtilities.getRunFromName(file);
+            final Integer run = CrawlerFileUtilities.getRunFromFileName(file);
 
-            // Get the sequence number from the file name.
-            final Integer seq = EvioFileUtilities.getSequenceFromName(file);
+            // Get the file format.
+            DatasetFileFormat format = DatacatUtilities.getFileFormat(file);
 
-            LOGGER.info("accepted file " + file.getPath() + " with run " + run + " and seq " + seq);
-
-            // Add this file to the file list for the run.
-            this.runs.getRunSummary(run).addFile(file);
+            LOGGER.info("accepted file " + file.getPath() + " with run " + run);
+            
+            // Add file to run summary.
+            this.runs.getRunSummary(run).addFile(format, file);
             
         } else {
             // File was rejected by one of the filters.
-            LOGGER.finer("rejected file " + file.getPath());
+            LOGGER.info("file " + file.getPath() + " was rejected");
         }
         // Always continue crawling.
         return FileVisitResult.CONTINUE;

Added: java/trunk/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java
 =============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java	(added)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java	Wed Sep 23 07:49:16 2015
@@ -0,0 +1,414 @@
+package org.hps.crawler;
+
+import java.io.File;
+import java.io.FileFilter;
+import java.io.IOException;
+import java.nio.file.FileVisitOption;
+import java.nio.file.FileVisitResult;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.SimpleFileVisitor;
+import java.nio.file.attribute.BasicFileAttributes;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.EnumSet;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.cli.PosixParser;
+import org.hps.datacat.client.DatacatClient;
+import org.hps.datacat.client.DatacatClientFactory;
+import org.hps.datacat.client.DatasetFileFormat;
+import org.lcsim.util.log.DefaultLogFormatter;
+import org.lcsim.util.log.LogUtil;
+
+/**
+ * Command line file crawler for populating the data catalog.
+ *
+ * @author Jeremy McCormick, SLAC
+ */
+public class DatacatCrawler {
+
+    /**
+     * Visitor which creates a {@link FileSet} from walking a directory tree.
+     * <p>
+     * Any number of {@link java.io.FileFilter} objects can be registered with this visitor to restrict which files are
+     * accepted.
+     *
+     * @author Jeremy McCormick, SLAC
+     */
+    final class DatacatFileVisitor extends SimpleFileVisitor<Path> {
+
+        /**
+         * The run log containing information about files from each run.
+         */
+        private final FileSet fileSet = new FileSet();
+
+        /**
+         * A list of file filters to apply.
+         */
+        private final List<FileFilter> filters = new ArrayList<FileFilter>();
+
+        /**
+         * Run the filters on the file to tell whether it should be accepted or not.
+         *
+         * @param file the EVIO file
+         * @return <code>true</code> if file should be accepted
+         */
+        private boolean accept(final File file) {
+            boolean accept = true;
+            for (final FileFilter filter : this.filters) {
+                accept = filter.accept(file);
+                if (!accept) {
+                    break;
+                }
+            }
+            return accept;
+        }
+
+        /**
+         * Add a file filter.
+         *
+         * @param filter the file filter
+         */
+        void addFilter(final FileFilter filter) {
+            this.filters.add(filter);
+        }
+
+        /**
+         * Get the file set created by visiting the directory tree.
+         *
+         * @return the file set from visiting the directory tree
+         */
+        FileSet getFileSet() {
+            return this.fileSet;
+        }
+
+        /**
+         * Visit a single file.
+         *
+         * @param path the file to visit
+         * @param attrs the file attributes
+         */
+        @Override
+        public FileVisitResult visitFile(final Path path, final BasicFileAttributes attrs) {
+            final File file = path.toFile();
+            if (this.accept(file)) {
+                final DatasetFileFormat format = DatacatUtilities.getFileFormat(file);
+                fileSet.addFile(format, file);
+            }
+            return FileVisitResult.CONTINUE;
+        }
+    }
+
+    /**
+     * Make a list of available file formats for printing help.
+     */
+    private static String AVAILABLE_FORMATS;
+
+    /**
+     * Setup the logger.
+     */
+    private static final Logger LOGGER = LogUtil.create(Crawler.class, new DefaultLogFormatter(), Level.CONFIG);
+
+    /**
+     * Command line options for the crawler.
+     */
+    private static final Options OPTIONS = new Options();
+    static {
+        final StringBuffer buffer = new StringBuffer();
+        for (final DatasetFileFormat format : DatasetFileFormat.values()) {
+            buffer.append(format.name() + " ");
+        }
+        buffer.setLength(buffer.length() - 1);
+        AVAILABLE_FORMATS = buffer.toString();
+    }
+
+    /**
+     * Statically define the command options.
+     */
+    static {
+        OPTIONS.addOption("L", "log-level", true, "set the log level (INFO, FINE, etc.)");
+        OPTIONS.addOption("b", "min-date", true, "min date for a file (example \"2015-03-26 11:28:59\")");
+        OPTIONS.addOption("d", "directory", true, "root directory to crawl");
+        OPTIONS.addOption("f", "folder", true, "datacat folder");
+        OPTIONS.addOption("h", "help", false, "print help and exit (overrides all other arguments)");
+        OPTIONS.addOption("o", "format", true, "add a file format for filtering: " + AVAILABLE_FORMATS);
+        OPTIONS.addOption("m", "metadata", false, "create metadata for datasets");
+        OPTIONS.addOption("s", "site", true, "datacat site");
+        OPTIONS.addOption("t", "timestamp-file", true, "existing or new timestamp file name");
+        OPTIONS.addOption("x", "max-depth", true, "max depth to crawl");
+    }
+
+    /**
+     * Main method.
+     * 
+     * @param args the command line arguments
+     */
+    public static void main(final String[] args) {
+        new DatacatCrawler().parse(args).run();
+    }
+
+    /**
+     * The crawler configuration.
+     */
+    private CrawlerConfig config;
+
+    /**
+     * The options parser.
+     */
+    private final PosixParser parser = new PosixParser();
+
+    /**
+     * Throw an exception if the path doesn't exist in the data catalog or it is not a folder.
+     *
+     * @param folder the folder in the datacat
+     * @throws RuntimeException if the given path does not exist or it is not a folder
+     */
+    void checkFolder(final String folder) {
+        final DatacatClient datacatClient = new DatacatClientFactory().createClient();
+        if (!datacatClient.exists(folder)) {
+            throw new RuntimeException("The folder " + folder + " does not exist in the data catalog.");
+        }
+        if (!datacatClient.isFolder(folder)) {
+            throw new RuntimeException("The path " + folder + " is not a folder.");
+        }
+    }
+
+    /**
+     * Parse command line options.
+     *
+     * @param args the command line arguments
+     * @return this object (for method chaining)
+     */
+    public DatacatCrawler parse(final String[] args) {
+        config = new CrawlerConfig();
+
+        LOGGER.config("parsing command line options");
+
+        this.config = new CrawlerConfig();
+
+        try {
+            final CommandLine cl = this.parser.parse(OPTIONS, args);
+
+            // Print help.
+            if (cl.hasOption("h") || args.length == 0) {
+                this.printUsage();
+            }
+
+            // Log level.
+            if (cl.hasOption("L")) {
+                final Level level = Level.parse(cl.getOptionValue("L"));
+                LOGGER.config("setting log level to " + level);
+                LOGGER.setLevel(level);
+            }
+
+            // Root directory for file crawling.
+            if (cl.hasOption("d")) {
+                final File rootDir = new File(cl.getOptionValue("d"));
+                if (!rootDir.exists()) {
+                    throw new IllegalArgumentException("The directory does not exist.");
+                }
+                if (!rootDir.isDirectory()) {
+                    throw new IllegalArgumentException("The specified path is not a directory.");
+                }
+                config.setRootDir(rootDir);
+                LOGGER.config("root dir set to " + config.rootDir());
+            }
+
+            // Timestamp file for date filtering.
+            if (cl.hasOption("t")) {
+                final File timestampFile = new File(cl.getOptionValue("t"));
+                config.setTimestampFile(timestampFile);
+                if (!timestampFile.exists()) {
+                    try {
+                        // Create new time stamp file which will have its date updated at the end of the job.
+                        LOGGER.config("creating new timestamp file " + timestampFile.getPath());
+                        timestampFile.createNewFile();
+                    } catch (final IOException e) {
+                        throw new IllegalArgumentException("Error creating timestamp file: " + timestampFile.getPath());
+                    }
+                } else {
+                    try {
+                        // Get the date filter for files from an existing time stamp file provided by the user.
+                        final Date timestamp = new Date(Files
+                                .readAttributes(config.timestampFile().toPath(), BasicFileAttributes.class)
+                                .lastModifiedTime().toMillis());
+                        config.setTimestamp(timestamp);
+                        LOGGER.config("got timestamp " + timestamp + " from existing file "
+                                + config.timestampFile().getPath());
+                    } catch (final IOException e) {
+                        throw new RuntimeException("Error getting attributes of timestamp file.", e);
+                    }
+                }
+            }
+
+            // User supplied timestamp string that is converted to a date for file filtering.
+            if (cl.hasOption("b")) {
+                try {
+                    if (config.timestamp() != null) {
+                        LOGGER.warning("existing timestamp from file " + config.timestamp()
+                                + " will be overridden by date from -b argument");
+                    }
+                    config.setTimestamp(cl.getOptionValue("b"));
+                    LOGGER.config("set timestamp to " + config.timestamp() + " from -b argument");
+                } catch (final java.text.ParseException e) {
+                    throw new RuntimeException(e);
+                }
+            }
+
+            // Max depth to crawl.
+            if (cl.hasOption("x")) {
+                final Integer maxDepth = Integer.parseInt(cl.getOptionValue("x"));
+                if (maxDepth < 1) {
+                    throw new IllegalArgumentException("invalid -x argument for maxDepth: " + maxDepth);
+                }
+                config.setMaxDepth(maxDepth);
+                LOGGER.config("set max depth to " + maxDepth);
+            }
+
+            // Configure enabled file formats.
+            if (cl.hasOption("o")) {
+                for (final String arg : cl.getOptionValues("o")) {
+                    DatasetFileFormat format = null;
+                    try {
+                        format = DatasetFileFormat.valueOf(arg);
+                    } catch (IllegalArgumentException | NullPointerException e) {
+                        throw new IllegalArgumentException("The format " + arg + " is not valid.", e);
+                    }
+                    LOGGER.config("adding format " + format.name());
+                    this.config.addFileFormat(format);
+                }
+            } else {
+                throw new RuntimeException("The -o argument with data format must be supplied at least once.");
+            }
+
+            // Enable metadata extraction from files.
+            if (cl.hasOption("m")) {
+                config.setEnableMetadata(true);
+                LOGGER.config("metadata extraction enabled");
+            }
+
+            // Datacat folder.
+            if (cl.hasOption("f")) {
+                config.setDatacatFolder(cl.getOptionValue("f"));
+                LOGGER.config("set datacat folder to " + config.datacatFolder());
+            } else {
+                throw new RuntimeException("The -f argument with the datacat folder is required.");
+            }
+
+        } catch (final ParseException e) {
+            throw new RuntimeException("Error parsing options.", e);
+        }
+
+        // Check the datacat folder which must already exist.
+        this.checkFolder(config.datacatFolder());
+
+        // Check that there is at least one file format enabled for filtering.
+        if (this.config.getFileFormats().isEmpty()) {
+            throw new IllegalStateException("At least one file format must be provided with the -f switch.");
+        }
+
+        LOGGER.info("done parsing command line options");
+        LOGGER.getHandlers()[0].flush();
+
+        return this;
+    }
+
+    /**
+     * Print the usage statement for this tool to the console and then exit the program.
+     */
+    private void printUsage() {
+        final HelpFormatter help = new HelpFormatter();
+        help.printHelp(70, "DatacatCrawler [options]", "", OPTIONS, "");
+        System.exit(0);
+    }
+
+    /**
+     * Run the crawler job.
+     */
+    void run() {
+
+        // Create the file visitor for crawling the root directory with the given date filter.
+        final DatacatFileVisitor visitor = new DatacatFileVisitor();
+
+        // Add date filter if timestamp is supplied.
+        if (config.timestamp() != null) {
+            visitor.addFilter(new DateFileFilter(config.timestamp()));
+        }
+
+        // Add file format filter.
+        for (final DatasetFileFormat fileFormat : config.getFileFormats()) {
+            LOGGER.info("adding file format filter for " + fileFormat.name());
+        }
+        visitor.addFilter(new FileFormatFilter(config.getFileFormats()));
+
+        // Walk the file tree using the visitor.
+        this.walk(visitor);
+
+        // Update the data catalog.
+        this.updateDatacat(visitor.getFileSet());
+    }
+
+    /**
+     * Update the data catalog.
+     *
+     * @param runMap the map of run information including the EVIO file list
+     */
+    private void updateDatacat(final FileSet fileSet) {
+        final DatacatClient datacatClient = new DatacatClientFactory().createClient();
+        for (final DatasetFileFormat fileFormat : config.getFileFormats()) {
+            LOGGER.info("adding files to datacat with format " + fileFormat.name());
+            for (final File file : fileSet.get(fileFormat)) {
+
+                LOGGER.info("adding file " + file.getAbsolutePath() + " to datacat");
+
+                // Create metadata if this is enabled (takes awhile).
+                Map<String, Object> metadata = new HashMap<String, Object>();
+                if (config.enableMetaData()) {
+                    metadata = DatacatUtilities.createMetadata(file);
+                }
+
+                // Register file in the catalog.
+                DatacatUtilities.addFile(datacatClient, config.datacatFolder(), file, metadata);
+            }
+        }
+    }
+
+    /**
+     * Walk the directory tree to find EVIO files for the runs that are being processed in the job.
+     *
+     * @param visitor the file visitor
+     */
+    private void walk(final DatacatFileVisitor visitor) {
+        if (config.timestamp() != null) {
+            // Date filter from timestamp.
+            visitor.addFilter(new DateFileFilter(config.timestamp()));
+            LOGGER.config("added date filter with time stamp " + config.timestamp());
+        }
+
+        // Is the accept run list not empty? (Empty means accept all runs.)
+        if (!config.acceptRuns().isEmpty()) {
+            // List of run numbers to accept.
+            visitor.addFilter(new RunFilter(config.acceptRuns()));
+            LOGGER.config("added run number filter");
+        } else {
+            LOGGER.config("no run number filter will be used");
+        }
+
+        try {
+            // Walk the file tree from the root directory.
+            final EnumSet<FileVisitOption> options = EnumSet.noneOf(FileVisitOption.class);
+            Files.walkFileTree(config.rootDir().toPath(), options, config.maxDepth(), visitor);
+        } catch (final IOException e) {
+            throw new RuntimeException("Error while walking the directory tree.", e);
+        }
+    }
+}

Added: java/trunk/crawler/src/main/java/org/hps/crawler/DatacatUtilities.java
 =============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/DatacatUtilities.java	(added)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/DatacatUtilities.java	Wed Sep 23 07:49:16 2015
@@ -0,0 +1,134 @@
+package org.hps.crawler;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.hps.datacat.client.DatacatClient;
+import org.hps.datacat.client.DatasetDataType;
+import org.hps.datacat.client.DatasetFileFormat;
+import org.hps.datacat.client.DatasetSite;
+
+/**
+ * Datacat utilities for the crawler.
+ *
+ * @author Jeremy McCormick, SLAC
+ */
+class DatacatUtilities {
+
+    static Map<String, DatasetFileFormat> formatMap = new HashMap<String, DatasetFileFormat>();
+    static {
+        for (final DatasetFileFormat format : DatasetFileFormat.values()) {
+            formatMap.put(format.extension(), format);
+        }
+    }
+
+    static void addFile(final DatacatClient datacatClient, final String folder, final File file,
+            final Map<String, Object> metadata) {
+        final DatasetFileFormat fileFormat = DatacatUtilities.getFileFormat(file);
+        final DatasetDataType dataType = DatacatUtilities.getDataType(file);
+        DatacatUtilities.addFile(datacatClient, folder, file, metadata, fileFormat, dataType, DatasetSite.SLAC);
+    }
+
+    /**
+     * Add a file to the data catalog.
+     *
+     * @param client the data catalog client
+     * @param folder the folder name e.g. "data/raw"
+     * @param fileMetadata the file's meta data including the path
+     * @param fileFormat the file's format (EVIO, LCIO etc.)
+     * @param dataType the file's data type (RAW, RECON, etc.)
+     * @return the HTTP response code
+     */
+    static int addFile(final DatacatClient client, final String folder, final File file,
+            final Map<String, Object> metadata, final DatasetFileFormat fileFormat, final DatasetDataType dataType,
+            final DatasetSite site) {
+
+        // Add the dataset to the data catalog using the REST API.
+        final int response = client.addDataset(folder, dataType, file.getAbsolutePath(), site, fileFormat,
+                file.getName(), metadata);
+
+        return response;
+    }
+
+    static Map<String, Object> createMetadata(final File file) {
+        final DatasetFileFormat fileFormat = DatacatUtilities.getFileFormat(file);
+        final DatasetDataType dataType = DatacatUtilities.getDataType(file);
+        final FileMetadataReader reader = DatacatUtilities.getFileMetaDataReader(fileFormat, dataType);
+        if (reader == null) {
+            throw new RuntimeException("No metadata reader found for format " + fileFormat.name() + " and type " + dataType.name() + ".");
+        }
+        Map<String, Object> metadata;
+        try {
+            metadata = reader.getMetadata(file);
+        } catch (final IOException e) {
+            throw new RuntimeException(e);
+        }
+        return metadata;
+    }
+
+    static DatasetDataType getDataType(final File file) {
+        final DatasetFileFormat fileFormat = getFileFormat(file);
+        DatasetDataType dataType = null;
+        if (fileFormat == null) {
+            throw new IllegalArgumentException("File has unknown format: " + file.getAbsolutePath());
+        }
+        if (fileFormat.equals(DatasetFileFormat.EVIO)) {
+            dataType = DatasetDataType.RAW;
+        } else if (fileFormat.equals(DatasetFileFormat.LCIO)) {
+            dataType = DatasetDataType.RECON;
+        } else if (fileFormat.equals(DatasetFileFormat.ROOT)) {
+            // FIXME: This should probably open the file and determine what it contains.
+            if (file.getName().contains("_dqm")) {
+                dataType = DatasetDataType.DQM;
+            } else if (file.getName().contains("_dst")) {
+                dataType = DatasetDataType.DST;
+            }
+        } else if (fileFormat.equals(DatasetFileFormat.AIDA)) {
+            dataType = DatasetDataType.DQM;
+        }
+        if (dataType == null) {
+            throw new IllegalArgumentException("Could not determine data type for format: " + fileFormat.name());
+        }
+        return dataType;
+    }
+
+    static DatasetFileFormat getFileFormat(final File pathname) {
+        String name = pathname.getName();
+        if (name.contains(DatasetFileFormat.EVIO.extension()) && !name.endsWith(DatasetFileFormat.EVIO.extension())) {
+            name = stripEvioFileNumber(name);
+        }
+        final String extension = name.substring(name.lastIndexOf(".") + 1);
+        return formatMap.get(extension);
+    }
+
+    static FileMetadataReader getFileMetaDataReader(final DatasetFileFormat fileFormat, final DatasetDataType dataType) {
+        FileMetadataReader reader = null;
+        if (fileFormat.equals(DatasetFileFormat.LCIO)) {
+            reader = new LcioMetadataReader();
+        } else if (fileFormat.equals(DatasetFileFormat.EVIO)) {
+            reader = new EvioMetadataReader();
+        } else if (fileFormat.equals(DatasetFileFormat.ROOT) && dataType.equals(DatasetDataType.DST)) {
+            reader = new RootDstMetadataReader();
+        }
+        return reader;
+    }
+
+    static String getFolder(final String rootDir, final File file) {
+        String stripDir = rootDir;
+        if (!stripDir.endsWith("/")) {
+            stripDir += "/";
+        }
+        final String folder = file.getParentFile().getPath().replace(stripDir, "");
+        return folder;
+    }
+
+    static String stripEvioFileNumber(final String name) {
+        String strippedName = name;
+        if (!name.endsWith(DatasetFileFormat.EVIO.extension())) {
+            strippedName = name.substring(0, name.lastIndexOf("."));
+        }
+        return strippedName;
+    }
+}

Added: java/trunk/crawler/src/main/java/org/hps/crawler/EvioMetadataReader.java
 =============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/EvioMetadataReader.java	(added)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/EvioMetadataReader.java	Wed Sep 23 07:49:16 2015
@@ -0,0 +1,148 @@
+package org.hps.crawler;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.hps.record.evio.EventTagConstant;
+import org.hps.record.evio.EvioEventUtilities;
+import org.hps.record.evio.EvioFileUtilities;
+import org.jlab.coda.jevio.EvioEvent;
+import org.jlab.coda.jevio.EvioException;
+import org.jlab.coda.jevio.EvioReader;
+import org.lcsim.util.log.DefaultLogFormatter;
+import org.lcsim.util.log.LogUtil;
+ 
+/**
+ * Reads metadata from EVIO files.
+ * 
+ * @author Jeremy McCormick, SLAC
+ */
+public class EvioMetadataReader implements FileMetadataReader {
+    
+    private static Logger LOGGER = LogUtil.create(EvioMetadataReader.class, new DefaultLogFormatter(), Level.ALL);
+   
+    /**
+     * Get the EVIO file metadata.
+     * 
+     * @param file the EVIO file
+     * @return the metadata map of key and value pairs
+     */
+    @Override
+    public Map<String, Object> getMetadata(File file) throws IOException {
+      
+        Date startDate = null;
+        Date endDate = null;
+        int badEventCount = 0;
+        int eventCount = 0;
+        int byteCount = 0;
+        boolean hasPrestart = false;
+        boolean hasEnd = false;
+        int[] eventIdData = null;
+        Integer run = null;
+        Integer endEvent = null;
+        Integer startEvent = null;
+        Long lastTimestamp = null;
+        
+        EvioReader evioReader = null;
+        try {
+            evioReader = EvioFileUtilities.open(file, false);
+        } catch (EvioException e) {
+            throw new IOException(e);
+        }
+        
+        int fileNumber = EvioFileUtilities.getSequenceFromName(file);
+        
+        EvioEvent evioEvent = null;
+        
+        while (true) {
+            try {
+                evioEvent = evioReader.parseNextEvent();
+            } catch (IOException | EvioException e) {
+                ++badEventCount;
+                continue;
+            }
+            if (evioEvent == null) {
+                break;
+            }
+            byteCount += evioEvent.getTotalBytes();
+            if (EventTagConstant.PRESTART.equals(evioEvent)) {
+                LOGGER.info("found PRESTART");
+                hasPrestart = true;
+                final int[] controlEventData = EvioEventUtilities.getControlEventData(evioEvent);
+                final long timestamp = controlEventData[0] * 1000L;
+                startDate = new Date(timestamp);
+                LOGGER.info("set start date to " + startDate + " from PRESTART");
+                if (run == null) {
+                    run = controlEventData[1];
+                    LOGGER.info("set run to " + run);
+                }
+            } else if (EventTagConstant.END.equals(evioEvent)) {
+                LOGGER.info("found END event");
+                hasEnd = true;
+                final int[] controlEventData = EvioEventUtilities.getControlEventData(evioEvent);
+                final long timestamp = controlEventData[0] * 1000L;
+                endDate = new Date(timestamp);
+                LOGGER.info("set end date to " + endDate);
+                if (run == null) {
+                    run = controlEventData[1];
+                    LOGGER.info("set run to " + run);
+                }
+            } else if (EvioEventUtilities.isPhysicsEvent(evioEvent)) {
+                final int[] headBankData = EvioEventUtilities.getHeadBankData(evioEvent);
+                if (startDate == null) {
+                    if (headBankData[3] != 0) {
+                        startDate = new Date(headBankData[3] * 1000L);
+                        LOGGER.info("set start date to " + startDate + " from physics event");
+                    }
+                }
+                if (run == null) {
+                    run = headBankData[1];
+                    LOGGER.info("set run to " + run + " from physics event");
+                }
+                eventIdData = EvioEventUtilities.getEventIdData(evioEvent);
+                if (startEvent == null) {
+                    startEvent = eventIdData[0];
+                    LOGGER.info("set start event " + startEvent);
+                }
+                if (headBankData[3] != 0) {
+                    lastTimestamp = headBankData[3] * 1000L;
+                }
+                ++eventCount;
+            }
+        }    
+        
+        // Set end date from last valid timestamp.
+        if (endDate == null) {
+            endDate = new Date(lastTimestamp);
+            LOGGER.info("set end date to " + endDate + " from last timestamp " + lastTimestamp);
+        }
+        
+        // Set end event number.
+        if (eventIdData != null) {
+            endEvent = eventIdData[0];
+            LOGGER.info("set end event " + endEvent);
+        }
+        
+        Map<String, Object> metaDataMap = new HashMap<String, Object>();
+        
+        metaDataMap.put("runMin", run);
+        metaDataMap.put("runMax", run);
+        metaDataMap.put("eventCount", eventCount);
+        metaDataMap.put("size", byteCount);
+        metaDataMap.put("fileNumber", fileNumber);
+        metaDataMap.put("badEventCount", badEventCount);
+        metaDataMap.put("endTimestamp", endDate.getTime());
+        metaDataMap.put("startTimestamp", startDate.getTime());
+        metaDataMap.put("startEvent", startEvent);
+        metaDataMap.put("endEvent", endEvent);
+        metaDataMap.put("hasEnd", hasEnd ? 1 : 0);
+        metaDataMap.put("hasPrestart", hasPrestart ? 1 : 0);
+        
+        return metaDataMap;
+    }                             
+}

Added: java/trunk/crawler/src/main/java/org/hps/crawler/FileFormatFilter.java
 =============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/FileFormatFilter.java	(added)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/FileFormatFilter.java	Wed Sep 23 07:49:16 2015
@@ -0,0 +1,64 @@
+package org.hps.crawler;
+
+import java.io.File;
+import java.io.FileFilter;
+import java.util.Set;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.hps.datacat.client.DatasetFileFormat;
+import org.lcsim.util.log.DefaultLogFormatter;
+import org.lcsim.util.log.LogUtil;
+
+/**
+ * Filter files on their format.
+ * <p>
+ * Only files matching the format will be accepted by the file visitor.
+ * 
+ * @author Jeremy McCormick, SLAC
+ */
+public class FileFormatFilter implements FileFilter {
+
+    /**
+     * Setup logger.
+     */
+    private static final Logger LOGGER = LogUtil.create(FileFormatFilter.class, new DefaultLogFormatter(), Level.ALL);
+    
+    /**
+     * The file format.
+     */
+    private Set<DatasetFileFormat> formats;
+    
+    /**
+     * Create a new filter with the given format.
+     * 
+     * @param format the file format
+     */
+    FileFormatFilter(Set<DatasetFileFormat> formats) {
+        if (formats == null) {
+            throw new IllegalArgumentException("The formats collection is null.");
+        }
+        if (formats.isEmpty()) {
+            throw new IllegalArgumentException("The formats collection is empty.");
+        }
+        this.formats = formats;
+    }
+    
+    /**
+     * Returns <code>true</code> if the file should be accepted, e.g. it matches the filer's format.
+     * 
+     * @param pathname the file's full path
+     */
+    @Override
+    public boolean accept(File pathname) {
+        LOGGER.info(pathname.getPath());
+        DatasetFileFormat fileFormat = DatacatUtilities.getFileFormat(pathname);
+        if (fileFormat != null) {
+            LOGGER.info("file " + pathname.getPath() + " has format " + fileFormat.name());        
+            return formats.contains(fileFormat);
+        } else {
+            LOGGER.info("rejected file " + pathname.getPath() + " with unknown format");
+            return false;
+        }
+    }
+}

Added: java/trunk/crawler/src/main/java/org/hps/crawler/FileMetadataReader.java
 =============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/FileMetadataReader.java	(added)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/FileMetadataReader.java	Wed Sep 23 07:49:16 2015
@@ -0,0 +1,11 @@
+package org.hps.crawler;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Map;
+
+
+public interface FileMetadataReader {   
+    
+    public Map<String, Object> getMetadata(File file) throws IOException;
+}

Added: java/trunk/crawler/src/main/java/org/hps/crawler/FileSet.java
 =============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/FileSet.java	(added)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/FileSet.java	Wed Sep 23 07:49:16 2015
@@ -0,0 +1,27 @@
+package org.hps.crawler;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+import org.hps.datacat.client.DatasetFileFormat;
+
+/**
+ * Map of file format to a list of files.
+ * 
+ * @author Jeremy McCormick, SLAC
+ */
+public class FileSet extends HashMap<DatasetFileFormat, List<File>> {
+    
+    public List<File> get(DatasetFileFormat format) {
+        if (super.get(format) == null) {
+            this.put(format, new ArrayList<File>());
+        }
+        return super.get(format);
+    }
+    
+    public void addFile(DatasetFileFormat format, File file) {
+        this.get(format).add(file);
+    }
+}

Added: java/trunk/crawler/src/main/java/org/hps/crawler/LcioMetadataReader.java
 =============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/LcioMetadataReader.java	(added)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/LcioMetadataReader.java	Wed Sep 23 07:49:16 2015
@@ -0,0 +1,70 @@
+package org.hps.crawler;
+
+import java.io.EOFException;
+import java.io.File;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.lcsim.conditions.ConditionsManager;
+import org.lcsim.conditions.ConditionsManagerImplementation;
+import org.lcsim.conditions.ConditionsReader;
+import org.lcsim.event.EventHeader;
+import org.lcsim.lcio.LCIOReader;
+import org.lcsim.util.loop.DummyConditionsConverter;
+import org.lcsim.util.loop.DummyDetector;
+
+/**
+ * Reads metadata from LCIO files with reconstructed data.
+ * 
+ * @author Jeremy McCormick, SLAC
+ */
+public class LcioMetadataReader implements FileMetadataReader {
+
+    /*
+     * Setup the conditions system in dummy mode.
+     */
+    static {
+        ConditionsManager conditionsManager = ConditionsManager.defaultInstance();
+        ConditionsReader dummyReader = ConditionsReader.createDummy();
+        ((ConditionsManagerImplementation) conditionsManager).setConditionsReader(dummyReader, "DUMMY");
+        DummyDetector detector = new DummyDetector("DUMMY");
+        conditionsManager.registerConditionsConverter(new DummyConditionsConverter(detector));
+    }
+    
+    /**
+     * Get the metadata for the LCIO file.
+     * 
+     * @param file the LCIO file
+     * @return the metadata map with key and value pairs
+     */
+    @Override
+    public Map<String, Object> getMetadata(File file) throws IOException {
+        Map<String, Object> metaData = new HashMap<String, Object>();
+        LCIOReader reader = null;
+        try {        
+            reader = new LCIOReader(file);               
+            EventHeader eventHeader = null;
+            int eventCount = 0;
+            Integer run = null;
+            try {
+                while((eventHeader = reader.read()) != null) {
+                    if (run == null) {
+                        run = eventHeader.getRunNumber();
+                    }            
+                    eventCount++;
+                }
+            } catch (EOFException e) {
+                e.printStackTrace();
+            }
+            metaData.put("eventCount", eventCount);
+            metaData.put("runMin", run);
+            metaData.put("runMax", run);
+        } finally {
+            if (reader != null) {
+                reader.close();
+            }
+        }        
+        return metaData;
+    }
+}

Added: java/trunk/crawler/src/main/java/org/hps/crawler/RootDstMetadataReader.java
 =============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/RootDstMetadataReader.java	(added)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/RootDstMetadataReader.java	Wed Sep 23 07:49:16 2015
@@ -0,0 +1,65 @@
+package org.hps.crawler;
+
+import hep.io.root.RootClassNotFound;
+import hep.io.root.RootFileReader;
+import hep.io.root.interfaces.TBranch;
+import hep.io.root.interfaces.TLeafElement;
+import hep.io.root.interfaces.TObjArray;
+import hep.io.root.interfaces.TTree;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * This is a very simple metadata reader for ROOT DST files.
+ * <p>
+ * It currently only sets the standard metadata for event count and run number.
+ * 
+ * @author Jeremy McCormick, SLAC
+ */
+public class RootDstMetadataReader implements FileMetadataReader {
+
+    /**
+     * Get the metadata for a ROOT DST file.
+     * 
+     * @return the metadata for a ROOT DST file
+     */
+    public Map<String, Object> getMetadata(File file) throws IOException {
+        Map<String, Object> metadata = new HashMap<String, Object>();        
+        RootFileReader rootReader = null;
+        long eventCount = 0;        
+        int runMin = 0;
+        int runMax = 0;
+        long size = 0;
+        try {
+            rootReader = new RootFileReader(file.getAbsolutePath());
+            TTree tree = (TTree) rootReader.get("HPS_Event");
+            //TBranch branch = tree.getBranch("Event");
+            eventCount = tree.getEntries();
+            size = tree.getTotBytes();
+            TObjArray leaves = tree.getLeaves();
+            
+            for (Object object : leaves) {
+                TLeafElement leaf = (TLeafElement) object;
+                if ("run_number".equals(leaf.getName())) {
+                    runMin = (int) leaf.getWrappedValue(0);
+                    runMax = (int) leaf.getWrappedValue(0);
+                    break;
+                }
+            }
+        } catch (IOException | RootClassNotFound e) {
+            throw new IOException(e);
+        } finally {
+            if (rootReader != null) {
+                rootReader.close();
+            }
+        }        
+        metadata.put("eventCount", eventCount);
+        metadata.put("runMin", runMin);
+        metadata.put("runMax", runMax);
+        metadata.put("size", size);
+        return metadata;
+    }    
+}

Added: java/trunk/run-database/src/main/java/org/hps/run/database/RunProcessor.java
 =============================================================================
--- java/trunk/run-database/src/main/java/org/hps/run/database/RunProcessor.java	(added)
+++ java/trunk/run-database/src/main/java/org/hps/run/database/RunProcessor.java	Wed Sep 23 07:49:16 2015
@@ -0,0 +1,228 @@
+package org.hps.run.database;
+
+import java.io.File;
+import java.util.Collections;
+import java.util.List;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.hps.datacat.client.DatasetFileFormat;
+import org.hps.record.epics.EpicsRunProcessor;
+import org.hps.record.evio.EvioFileMetadata;
+import org.hps.record.evio.EvioFileMetadataAdapter;
+import org.hps.record.evio.EvioFileSequenceComparator;
+import org.hps.record.evio.EvioFileSource;
+import org.hps.record.evio.EvioLoop;
+import org.hps.record.scalers.ScalersEvioProcessor;
+import org.hps.record.triggerbank.TiTimeOffsetEvioProcessor;
+import org.hps.record.triggerbank.TriggerConfig;
+import org.hps.record.triggerbank.TriggerConfigVariable;
+import org.lcsim.util.log.DefaultLogFormatter;
+import org.lcsim.util.log.LogUtil;
+
+/**
+ * Processes EVIO files from a run and extracts meta data for updating the run database.
+ *
+ * @author Jeremy McCormick, SLAC
+ */
+public final class RunProcessor {
+
+    /**
+     * Setup logger.
+     */
+    private static final Logger LOGGER = LogUtil.create(RunProcessor.class, new DefaultLogFormatter(), Level.FINE);
+
+    /**
+     * Processor for extracting EPICS information.
+     */
+    private EpicsRunProcessor epicsProcessor;
+
+    /**
+     * The data source with the list of EVIO files to process.
+     */
+    private final EvioFileSource evioFileSource;
+
+    /**
+     * The EVIO event processing loop.
+     */
+    private final EvioLoop evioLoop = new EvioLoop();
+
+    /**
+     * Processor for extracting scaler data.
+     */
+    private ScalersEvioProcessor scalersProcessor;
+
+    /**
+     * Processor for extracting TI time offset.
+     */
+    private TiTimeOffsetEvioProcessor triggerTimeProcessor;
+    
+    /**
+     * Record loop adapter for getting file metadata.
+     */
+    private final EvioFileMetadataAdapter metadataAdapter = new EvioFileMetadataAdapter();
+    
+    /**
+     * The run summary for the run.
+     */
+    private RunSummaryImpl runSummary;
+    
+    /**
+     * Create a run processor.
+     *
+     * @param runSummary the run summary object for the run
+     * @return the run processor
+     */
+    public RunProcessor(RunSummaryImpl runSummary) {
+        
+        this.runSummary = runSummary;
+        
+        List<File> evioFiles = runSummary.getFiles(DatasetFileFormat.EVIO);
+        if (evioFiles == null || evioFiles.isEmpty()) {
+            throw new IllegalArgumentException("No EVIO files found in file set.");
+        }
+
+        // Sort the list of EVIO files.
+        Collections.sort(runSummary.getFiles(DatasetFileFormat.EVIO), new EvioFileSequenceComparator());
+
+        // Setup record loop.
+        evioFileSource = new EvioFileSource(evioFiles);
+        evioLoop.setEvioFileSource(evioFileSource);       
+        
+        // Add file metadata processor.
+        evioLoop.addRecordListener(metadataAdapter);
+        evioLoop.addLoopListener(metadataAdapter);
+    }
+    
+    public void addEpicsProcessor() {
+        // Add EPICS processor.
+        this.epicsProcessor = new EpicsRunProcessor();
+        evioLoop.addEvioEventProcessor(epicsProcessor);
+    }
+    
+    public void addScalerProcessor() {
+        // Add scaler data processor.
+        scalersProcessor = new ScalersEvioProcessor();
+        scalersProcessor.setResetEveryEvent(false);
+        evioLoop.addEvioEventProcessor(scalersProcessor);
+    }
+    
+    public void addTriggerTimeProcessor() {
+        // Add processor for extracting TI time offset.
+        triggerTimeProcessor = new TiTimeOffsetEvioProcessor();
+        evioLoop.addEvioEventProcessor(triggerTimeProcessor);
+    }
+
+    /**
+     * Extract meta data from first file in run.
+     */
+    private void processFirstFile() {
+        final EvioFileMetadata metadata = metadataAdapter.getEvioFileMetadata().get(0);
+        if (metadata == null) {
+            throw new IllegalStateException("No meta data exists for first file.");
+        }
+        LOGGER.info("first file metadata: " + metadata.toString());
+        if (metadata.getStartDate() == null) {
+            throw new IllegalStateException("The start date is not set in the metadata.");
+        }
+        LOGGER.info("setting unix start time to " + metadata.getStartDate().getTime() + " from meta data");
+        runSummary.setStartDate(metadata.getStartDate());
+    }
+
+    /**
+     * Extract meta data from last file in run.
+     */
+    private void processLastFile() {
+        LOGGER.info("looking for " + runSummary.getEvioFiles().get(runSummary.getEvioFiles().size() - 1).getPath() + " metadata");
+        LOGGER.getHandlers()[0].flush();
+        final EvioFileMetadata metadata = this.metadataAdapter.getEvioFileMetadata().get(this.metadataAdapter.getEvioFileMetadata().size() - 1);
+        if (metadata == null) {
+            throw new IllegalStateException("Failed to find metadata for last file.");
+        }
+        LOGGER.info("last file metadata: " + metadata.toString());
+        if (metadata.getEndDate() == null) {
+            throw new IllegalStateException("The end date is not set in the metadata.");
+        }
+        LOGGER.info("setting unix end time to " + metadata.getEndDate().getTime() + " from meta data");
+        runSummary.setEndDate(metadata.getEndDate());
+        LOGGER.info("setting has END to " + metadata.hasEnd());
+        runSummary.setEndOkay(metadata.hasEnd());
+    }
+
+    /**
+     * Process the run by executing the registered {@link org.hps.record.evio.EvioEventProcessor}s and extracting the
+     * start and end dates.
+     * <p>
+     * This method will also execute file caching from MSS, if enabled by the {@link #useFileCache} option.
+     *
+     * @throws Exception if there is an error processing a file
+     */
+    public void processRun() throws Exception {
+
+        LOGGER.info("processing " + this.runSummary.getEvioFiles().size() + " files from run "
+                + this.runSummary.getRun());
+
+        // Run processors over all files.
+        LOGGER.info("looping over all events");
+        evioLoop.loop(-1);
+                
+        LOGGER.info("got " + metadataAdapter.getEvioFileMetadata().size() + " metadata objects from loop");
+        LOGGER.getHandlers()[0].flush();
+
+        // Set start date from first file.
+        LOGGER.info("processing first file");
+        this.processFirstFile();
+
+        // Set end date from last file.
+        LOGGER.info("processing last file");
+        this.processLastFile();
+
+        // Update run summary from processors.
+        LOGGER.info("updating run summary");
+        this.updateRunSummary();
+
+        LOGGER.info("run processor done with run " + this.runSummary.getRun());
+    }
+
+    /**
+     * Update the current run summary by copying data to it from the EVIO processors and the event loop.
+     */
+    private void updateRunSummary() {
+
+        // Set total number of events from the event loop.
+        LOGGER.info("setting total events " + evioLoop.getTotalCountableConsumed());
+        runSummary.setTotalEvents((int) evioLoop.getTotalCountableConsumed());
+
+        if (scalersProcessor != null) {
+            // Add scaler data from the scalers EVIO processor.
+            LOGGER.info("adding " + this.scalersProcessor.getScalerData().size() + " scaler data objects");
+            runSummary.setScalerData(this.scalersProcessor.getScalerData());
+        }
+
+        if (epicsProcessor != null) {
+            // Add EPICS data from the EPICS EVIO processor.
+            LOGGER.info("adding " + this.epicsProcessor.getEpicsData().size() + " EPICS data objects");
+            runSummary.setEpicsData(this.epicsProcessor.getEpicsData());
+        }
+
+        if (triggerTimeProcessor != null) {
+            // Add trigger config from the trigger time processor.
+            LOGGER.info("updating trigger config");
+            final TriggerConfig triggerConfig = new TriggerConfig();
+            this.triggerTimeProcessor.updateTriggerConfig(triggerConfig);
+            LOGGER.info("tiTimeOffset: " + triggerConfig.get(TriggerConfigVariable.TI_TIME_OFFSET));
+            runSummary.setTriggerConfig(triggerConfig);
+        }
+
+        LOGGER.getHandlers()[0].flush();
+    }        
+    
+    /**
+     * Get list of metadata created by processing the files.
+     * 
+     * @return the list of metadata
+     */
+    public List<EvioFileMetadata> getEvioFileMetaData() {
+        return this.metadataAdapter.getEvioFileMetadata();
+    }
+}

Modified: java/trunk/run-database/src/main/java/org/hps/run/database/RunSummary.java
 =============================================================================
--- java/trunk/run-database/src/main/java/org/hps/run/database/RunSummary.java	(original)
+++ java/trunk/run-database/src/main/java/org/hps/run/database/RunSummary.java	Wed Sep 23 07:49:16 2015
@@ -4,6 +4,7 @@
 import java.util.Date;
 import java.util.List;
 
+import org.hps.datacat.client.DatasetFileFormat;
 import org.hps.record.epics.EpicsData;
 import org.hps.record.scalers.ScalerData;
 import org.hps.record.triggerbank.TriggerConfig;
@@ -121,7 +122,7 @@
     int getTotalEvents();
 
     /**
-     * Get the total number of files for this run.
+     * Get the total number of EVIO files for this run.
      *
      * @return the total number of files for this run
      */
@@ -140,4 +141,13 @@
      * @return the date when this run record was last updated
      */
     Date getUpdated();
+    
+    /**
+     * Get a list of files in the run by format (EVIO, LCIO etc.).
+     * 
+     * @param format the file format
+     * @return the list of files with the given format
+     */
+    // FIXME: This should be removed from the run summary interface.
+    public List<File> getFiles(DatasetFileFormat format);
 }

Modified: java/trunk/run-database/src/main/java/org/hps/run/database/RunSummaryDaoImpl.java
 =============================================================================
--- java/trunk/run-database/src/main/java/org/hps/run/database/RunSummaryDaoImpl.java	(original)
+++ java/trunk/run-database/src/main/java/org/hps/run/database/RunSummaryDaoImpl.java	Wed Sep 23 07:49:16 2015
@@ -371,21 +371,36 @@
     @Override
     public void insertFullRunSummary(final RunSummary runSummary) {
 
+        if (runSummary == null) {
+            throw new IllegalArgumentException("The run summary is null.");
+        }
+        
         // Insert basic run log info.
         this.insertRunSummary(runSummary);
 
         // Insert EPICS data.
-        LOGGER.info("inserting " + runSummary.getEpicsData().size() + " EPICS records");
-        epicsDataDao.insertEpicsData(runSummary.getEpicsData());
+        if (runSummary.getEpicsData() != null && !runSummary.getEpicsData().isEmpty()) {
+            LOGGER.info("inserting " + runSummary.getEpicsData().size() + " EPICS records");
+            epicsDataDao.insertEpicsData(runSummary.getEpicsData());
+        } else {
+            LOGGER.warning("no EPICS data to insert");
+        }
 
         // Insert scaler data.
-        LOGGER.info("inserting " + runSummary.getScalerData().size() + " scaler data records");
-        scalerDataDao.insertScalerData(runSummary.getScalerData(), runSummary.getRun());
+        if (runSummary.getScalerData() != null && !runSummary.getScalerData().isEmpty()) {
+            LOGGER.info("inserting " + runSummary.getScalerData().size() + " scaler data records");
+            scalerDataDao.insertScalerData(runSummary.getScalerData(), runSummary.getRun());
+        } else {
+            LOGGER.warning("no scaler data to insert");
+        }
 
         // Insert trigger config.
-        LOGGER.info("inserting " + runSummary.getTriggerConfig().size() + " trigger config variables");
-        triggerConfigIntDao.insertTriggerConfig(runSummary.getTriggerConfig(), runSummary.getRun());
-
+        if (runSummary.getTriggerConfig() != null && !runSummary.getTriggerConfig().isEmpty()) {
+            LOGGER.info("inserting " + runSummary.getTriggerConfig().size() + " trigger config variables");
+            triggerConfigIntDao.insertTriggerConfig(runSummary.getTriggerConfig(), runSummary.getRun());
+        } else {
+            LOGGER.warning("no trigger config to insert");
+        }
     }
 
     /**
@@ -440,7 +455,7 @@
         runSummary.setScalerData(scalerDataDao.getScalerData(run));
 
         // Read trigger config.
-        runSummary.setTriggerConfigInt(triggerConfigIntDao.getTriggerConfig(run));
+        runSummary.setTriggerConfig(triggerConfigIntDao.getTriggerConfig(run));
 
         return runSummary;
     }

Modified: java/trunk/run-database/src/main/java/org/hps/run/database/RunSummaryImpl.java
 =============================================================================
--- java/trunk/run-database/src/main/java/org/hps/run/database/RunSummaryImpl.java	(original)
+++ java/trunk/run-database/src/main/java/org/hps/run/database/RunSummaryImpl.java	Wed Sep 23 07:49:16 2015
@@ -6,9 +6,12 @@
 import java.util.ArrayList;
 import java.util.Date;
 import java.util.GregorianCalendar;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 import java.util.TimeZone;
 
+import org.hps.datacat.client.DatasetFileFormat;
 import org.hps.record.epics.EpicsData;
 import org.hps.record.scalers.ScalerData;
 import org.hps.record.triggerbank.TriggerConfig;
@@ -54,11 +57,6 @@
     private List<EpicsData> epicsDataList;
 
     /**
-     * The list of EVIO files in the run.
-     */
-    private List<File> evioFileList = new ArrayList<File>();
-
-    /**
      * The run number.
      */
     private final int run;
@@ -76,7 +74,7 @@
     /**
      * The trigger data for the run.
      */
-    private TriggerConfig triggerConfigInt;
+    private TriggerConfig triggerConfig;
 
     /**
      * Start date of run.
@@ -97,6 +95,11 @@
      * Date when the run record was last updated.
      */
     private Date updated;
+    
+    /**
+     * Lists of files indexed by their format.
+     */
+    private Map<DatasetFileFormat, List<File>> fileMap = new HashMap<DatasetFileFormat, List<File>>();
 
     /**
      * Create a run summary.
@@ -112,8 +115,8 @@
      *
      * @param file the file to add
      */
-    public void addFile(final File file) {
-        this.evioFileList.add(file);
+    public void addEvioFile(final File file) {
+        this.getEvioFiles().add(file);
     }
 
     /**
@@ -171,7 +174,7 @@
      * @return the list of EVIO files in this run
      */
     public List<File> getEvioFiles() {
-        return this.evioFileList;
+        return this.fileMap.get(DatasetFileFormat.EVIO);
     }
 
     /**
@@ -208,7 +211,7 @@
      * @return the trigger config of this run
      */
     public TriggerConfig getTriggerConfig() {
-        return triggerConfigInt;
+        return triggerConfig;
     }
 
     /**
@@ -271,7 +274,7 @@
      *
      * @param startDate the start date
      */
-    public void setEndDate(final Date endDate) {
+    void setEndDate(final Date endDate) {
         this.endDate = endDate;
     }
 
@@ -280,7 +283,7 @@
      *
      * @param endOkay <code>true</code> if end is okay
      */
-    public void setEndOkay(final boolean endOkay) {
+    void setEndOkay(final boolean endOkay) {
         this.endOkay = endOkay;
     }
    
@@ -289,26 +292,17 @@
      *
      * @param epics the EPICS data for the run
      */
-    public void setEpicsData(final List<EpicsData> epicsDataList) {
+    void setEpicsData(final List<EpicsData> epicsDataList) {
         this.epicsDataList = epicsDataList;
     }
-
-    /**
-     * Set the list of EVIO files for the run.
-     *
-     * @param evioFileList the list of EVIO files for the run
-     */
-    public void setEvioFiles(final List<File> evioFileList) {
-        this.evioFileList = evioFileList;
-    }
-
+    
     /**
      * Set whether the run was "okay" meaning the data is usable for physics
      * analysis.
      *
      * @param runOkay <code>true</code> if the run is okay
      */
-    public void setRunOkay(final boolean runOkay) {
+    void setRunOkay(final boolean runOkay) {
         this.runOkay = runOkay;
     }
 
@@ -317,7 +311,7 @@
      *
      * @param scalerData the scaler data
      */
-    public void setScalerData(final List<ScalerData> scalerDataList) {
+    void setScalerData(final List<ScalerData> scalerDataList) {
         this.scalerDataList = scalerDataList;
     }
 
@@ -326,8 +320,8 @@
      *
      * @param triggerConfig the trigger config
      */
-    public void setTriggerConfigInt(final TriggerConfig triggerConfigInt) {
-        this.triggerConfigInt = triggerConfigInt;
+    void setTriggerConfig(final TriggerConfig triggerConfig) {
+        this.triggerConfig = triggerConfig;
     }
 
     /**
@@ -335,7 +329,7 @@
      *
      * @param startDate the start date
      */
-    public void setStartDate(final Date startDate) {
+    void setStartDate(final Date startDate) {
         this.startDate = startDate;
     }
 
@@ -344,7 +338,7 @@
      *
      * @param totalEvents the total number of physics events in the run
      */
-    public void setTotalEvents(final int totalEvents) {
+    void setTotalEvents(final int totalEvents) {
         this.totalEvents = totalEvents;
     }
 
@@ -353,7 +347,7 @@
      *
      * @param totalFiles the total number of EVIO files in the run
      */
-    public void setTotalFiles(final int totalFiles) {
+    void setTotalFiles(final int totalFiles) {
         this.totalFiles = totalFiles;
     }
 
@@ -362,8 +356,37 @@
      *
      * @param updated the date when the run record was last updated
      */
-    public void setUpdated(final Date updated) {
+    void setUpdated(final Date updated) {
         this.updated = updated;
+    }
+    
+    /**
+     * Add a file associated with this run.
+     * <p>
+     * This is public because it is called by the file crawler.
+     * 
+     * @param file a file associated with this run
+     */
+    // FIXME: This should be removed from the run summary interface.
+    public void addFile(DatasetFileFormat format, File file) {
+        List<File> files = this.fileMap.get(file);
+        if (files == null) {
+            this.fileMap.put(format, new ArrayList<File>());
+        }
+        this.fileMap.get(format).add(file);
+    }
+    
+    /**
+     * Get a list of files in the run by format (EVIO, LCIO etc.).
+     * 
+     * @param format the file format
+     * @return the list of files with the given format
+     */
+    public List<File> getFiles(DatasetFileFormat format) {
+        if (!this.fileMap.containsKey(format)) {
+            this.fileMap.put(format, new ArrayList<File>());
+        }
+        return this.fileMap.get(format);
     }
 
     /**