Print

Print


Author: [log in to unmask]
Date: Fri Jun 19 19:03:30 2015
New Revision: 3171

Log:
Updates to crawler including overhaul of configuration (now in a separate class); add support for inserting scaler data into db.

Added:
    java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/Crawler.java
      - copied, changed from r3169, java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/EvioFileCrawler.java
    java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/CrawlerConfig.java
    java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/ScalerDataUpdater.java
Removed:
    java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/EvioFileCrawler.java
Modified:
    java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/RunLog.java
    java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/RunLogUpdater.java
    java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/RunProcessor.java
    java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/RunSummary.java
    java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/RunSummaryUpdater.java
    java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/package-info.java

Copied: java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/Crawler.java (from r3169, java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/EvioFileCrawler.java)
 =============================================================================
--- java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/EvioFileCrawler.java	(original)
+++ java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/Crawler.java	Fri Jun 19 19:03:30 2015
@@ -5,12 +5,11 @@
 import java.nio.file.FileVisitOption;
 import java.nio.file.Files;
 import java.nio.file.attribute.BasicFileAttributes;
-import java.text.SimpleDateFormat;
-import java.util.ArrayList;
+import java.sql.Connection;
+import java.sql.SQLException;
 import java.util.Date;
 import java.util.EnumSet;
 import java.util.HashSet;
-import java.util.List;
 import java.util.Set;
 import java.util.logging.Level;
 import java.util.logging.Logger;
@@ -20,8 +19,7 @@
 import org.apache.commons.cli.Options;
 import org.apache.commons.cli.ParseException;
 import org.apache.commons.cli.PosixParser;
-import org.hps.conditions.database.DatabaseConditionsManager;
-import org.hps.record.evio.EvioEventProcessor;
+import org.hps.conditions.database.ConnectionParameters;
 import org.lcsim.util.log.DefaultLogFormatter;
 import org.lcsim.util.log.LogUtil;
 
@@ -31,12 +29,12 @@
  *
  * @author Jeremy McCormick, SLAC
  */
-public final class EvioFileCrawler {
-
-    /**
-     * Setup logger.
-     */
-    private static final Logger LOGGER = LogUtil.create(EvioFileCrawler.class, new DefaultLogFormatter(), Level.ALL);
+public final class Crawler {
+
+    /**
+     * Setup the logger.
+     */
+    private static final Logger LOGGER = LogUtil.create(Crawler.class, new DefaultLogFormatter(), Level.ALL);
 
     /**
      * Constant for milliseconds conversion.
@@ -47,8 +45,6 @@
      * Command line options for the crawler.
      */
     private static final Options OPTIONS = new Options();
-    
-    private static final SimpleDateFormat DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
 
     /**
      * Statically define the command options.
@@ -59,15 +55,13 @@
         OPTIONS.addOption("c", "cache-files", false, "automatically cache files from MSS (JLAB only)");
         OPTIONS.addOption("C", "db-config", true, "database connection properties file (required)");
         OPTIONS.addOption("d", "directory", true, "root directory to start crawling (default is current dir)");
-        OPTIONS.addOption("e", "epics", false, "process EPICS data found in EVIO files (run log is also updated if -r is used)");
-        OPTIONS.addOption("E", "evio-processor", true, "class name of an additional EVIO processor to execute (can be used multiple times)");
+        OPTIONS.addOption("E", "evio-processor", true, "full class name of an EvioEventProcessor to execute (can be used multiple times)");
         OPTIONS.addOption("h", "help", false, "print help and exit");
-        OPTIONS.addOption("m", "max-files", true, "max number of files to process per run (only for debugging)");
-        OPTIONS.addOption("p", "print", true, "set event print interval during EVIO processing");
+        OPTIONS.addOption("m", "max-files", true, "max number of files to process per run (mostly for debugging)");
+        OPTIONS.addOption("p", "print", true, "set event printing interval during EVIO processing");
         OPTIONS.addOption("r", "insert-run-log", false, "update the run database (not done by default)");
-        OPTIONS.addOption("t", "timestamp-file", true, "existing or new timestamp file name for date cut off");
-        OPTIONS.addOption("s", "print-summary", false, "print the run summaries at the end of the job");
-        OPTIONS.addOption("w", "max-cache-wait", true, "total seconds to allow for file caching");
+        OPTIONS.addOption("t", "timestamp-file", true, "existing or new timestamp file name");
+        OPTIONS.addOption("w", "max-cache-wait", true, "total time to allow for file caching (seconds)");
         OPTIONS.addOption("L", "log-level", true, "set the log level (INFO, FINE, etc.)");
         OPTIONS.addOption("u", "update-run-log", false, "allow overriding existing data in the run db (not allowed by default)");
     }
@@ -79,384 +73,298 @@
      */
     public static void main(final String[] args) {
         try {
-            new EvioFileCrawler().parse(args).run();
+            new Crawler().parse(args).run();
         } catch (final Exception e) {
             throw new RuntimeException(e);
         }
     }
 
     /**
-     * A list of run numbers to accept in the job.
-     */
-    private final Set<Integer> acceptRuns = new HashSet<Integer>();
-
-    /**
      * The class for managing the file caching using the 'jcache' command.
      */
     private final JCacheManager cacheManager = new JCacheManager();
 
     /**
-     * Default event print interval.
-     */
-    private final int DEFAULT_EVENT_PRINT_INTERVAL = 1000;
-
-    /**
-     * Flag indicating whether EPICS data banks should be processed.
-     */
-    private boolean processEpicsData = false;
-
-    /**
-     * Interval for printing out event number while running EVIO processors.
-     */
-    private int eventPrintInterval = DEFAULT_EVENT_PRINT_INTERVAL;
-
-    /**
-     * The maximum number of files to accept (just used for debugging purposes).
-     */
-    private int maxFiles = -1;
-
-    /**
      * The options parser.
      */
     private final PosixParser parser = new PosixParser();
-
-    /**
-     * Flag indicating whether the run summaries should be printed (may result in some extra file processing).
-     */
-    private boolean printSummary = false;
-
-    /**
-     * The root directory to crawl which defaults to the current directory.
-     */
-    private File rootDir = new File(System.getProperty("user.dir"));
-
-    /**
-     * A timestamp to use for filtering input files on their creation date.
-     */
-    private Date timestamp = null;
-
-    /**
-     * A file to use for the timestamp date.
-     */
-    private File timestampFile = null;
-
-    /**
-     * Flag indicating if the run database should be updated from results of the job.
-     */
-    private boolean updateRunLog = false;
-
-    /**
-     * Flag indicating if the file cache should be used (e.g. jcache automatically executed to move files to the cache disk from tape).
-     */
-    private boolean useFileCache = false;
-
-    /**
-     * The maximum wait time in milliseconds to allow for file caching operations.
-     */
-    private Long waitTime;
     
-    private boolean allowUpdates = false;
+    /**
+     * Configuration options from the command line.
+     */
+    private CrawlerConfig config;
     
-    private List<EvioEventProcessor> processors = new ArrayList<EvioEventProcessor>();
-
-    /**
-     * Create the processor for a single run.
-     *
-     * @param runSummary the run summary for the run
-     * @return the run processor
-     */
-    private RunProcessor createRunProcessor(final RunSummary runSummary) {
-        final RunProcessor processor = new RunProcessor(runSummary, this.cacheManager);
-        if (this.processEpicsData) {
-            processor.addProcessor(new EpicsLog(runSummary));
-        }
-        if (this.printSummary) {
-            processor.addProcessor(new EventTypeLog(runSummary));
-        }
-        if (this.maxFiles != -1) {
-            processor.setMaxFiles(this.maxFiles);
-        }
-        processor.useFileCache(this.useFileCache);
-        processor.setEventPrintInterval(this.eventPrintInterval);
-        return processor;
-    }
-
-    /**
-     * Parse command line options, but do not start the job.
+    /**
+     * Parse command line options into internal configuration object.
      *
      * @param args the command line arguments
      * @return the configured crawler object
      */
-    private EvioFileCrawler parse(final String args[]) {
+    private Crawler parse(final String args[]) {
+        
+        LOGGER.info("parsing command line options");
+        
+        config = new CrawlerConfig();
+        
         try {
             final CommandLine cl = this.parser.parse(OPTIONS, args);
 
+            // Print help.
             if (cl.hasOption("h")) {
                 this.printUsage();
             }
 
+            // Log level.
             if (cl.hasOption("L")) {
                 final Level level = Level.parse(cl.getOptionValue("L"));
                 LOGGER.info("setting log level to " + level);
                 LOGGER.setLevel(level);
             }
-            
+
+            // Database connection properties file (this is not optional).
             if (cl.hasOption("C")) {
-                String dbPropPath = cl.getOptionValue("C");
-                File dbPropFile = new File(dbPropPath);
+                final String dbPropPath = cl.getOptionValue("C");
+                final File dbPropFile = new File(dbPropPath);
                 if (!dbPropFile.exists()) {
                     throw new IllegalArgumentException("Connection properties file " + dbPropFile.getPath() + " does not exist.");
-                }
+                }                
+                config.setConnection(ConnectionParameters.fromProperties(dbPropFile));
                 LOGGER.config("using " + dbPropPath + " for db connection properties");
-                DatabaseConditionsManager.getInstance().setConnectionProperties(dbPropFile);
             } else {
                 throw new RuntimeException("The -C switch providing the database connection properties file is a required argument.");
             }
 
+            // Root directory for file crawling.
             if (cl.hasOption("d")) {
-                this.rootDir = new File(cl.getOptionValue("d"));
-                if (!this.rootDir.exists()) {
+                File rootDir = new File(cl.getOptionValue("d"));
+                if (!rootDir.exists()) {
                     throw new IllegalArgumentException("The directory does not exist.");
                 }
-                if (!this.rootDir.isDirectory()) {
+                if (!rootDir.isDirectory()) {
                     throw new IllegalArgumentException("The specified path is not a directory.");
                 }
-            }
-
+                config.setRootDir(rootDir);
+                LOGGER.config("root dir for crawling set to " + config.rootDir());
+            }
+
+            // Timestamp file for date filtering.
             if (cl.hasOption("t")) {
-                this.timestampFile = new File(cl.getOptionValue("t"));
-                if (!this.timestampFile.exists()) {
+                File timestampFile = new File(cl.getOptionValue("t"));
+                config.setTimestampFile(timestampFile);
+                if (!timestampFile.exists()) {
                     try {
-                        // Create new time stamp file.
-                        LOGGER.info("creating new timestamp file " + this.timestampFile.getPath());
-                        this.timestampFile.createNewFile();
-                    } catch (IOException e) {
-                        throw new IllegalArgumentException("Error creating timestamp file " + this.timestampFile.getPath());
+                        // Create new time stamp file which will have its date updated at the end of the job.
+                        LOGGER.config("creating new timestamp file " + timestampFile.getPath());
+                        timestampFile.createNewFile();                        
+                    } catch (final IOException e) {
+                        throw new IllegalArgumentException("Error creating timestamp file: " + timestampFile.getPath());
                     }
-                } else { 
+                } else {
                     try {
-                        // Get cut-off date for files from existing time stamp file. 
-                        this.timestamp = new Date(Files.readAttributes(this.timestampFile.toPath(), BasicFileAttributes.class).lastModifiedTime().toMillis());
-                        LOGGER.info("got timestamp " + this.timestamp + " from existing file " + this.timestampFile.getPath());
+                        // Get the date filter for files from an existing time stamp file provided by the user.
+                        Date timestamp = new Date(Files.readAttributes(config.timestampFile().toPath(), BasicFileAttributes.class).lastModifiedTime()
+                                .toMillis());
+                        config.setTimestamp(timestamp);
+                        LOGGER.config("got timestamp " + timestamp + " from existing file " + config.timestampFile().getPath());
                     } catch (final IOException e) {
                         throw new RuntimeException("Error getting attributes of timestamp file.", e);
                     }
                 }
             }
 
+            // List of one or more runs to accept in the job.
             if (cl.hasOption("a")) {
+                Set<Integer> acceptRuns = new HashSet<Integer>();
                 for (final String runString : cl.getOptionValues("a")) {
                     final Integer acceptRun = Integer.parseInt(runString);
-                    this.acceptRuns.add(acceptRun);
-                    LOGGER.config("added run number filter " + acceptRun);
-                }
-            }
-
-            if (cl.hasOption("s")) {
-                LOGGER.config("print summary enabled");
-                this.printSummary = true;
-            }
-
-            if (cl.hasOption("r")) {                
-                this.updateRunLog = true;
-                LOGGER.config("run db will be updated");
-            }
-
-            if (cl.hasOption("e")) {                
-                this.processEpicsData = true;
-                LOGGER.config("EPICS processing enabled");
-            }
-
-            if (cl.hasOption("c")) {                
-                this.useFileCache = true;
-                LOGGER.config("using file cache");
-            }
-
+                    acceptRuns.add(acceptRun);
+                    LOGGER.config("added run filter " + acceptRun);
+                }
+                config.setAcceptRuns(acceptRuns);
+            }
+
+            // Enable run log updating (off by default).
+            if (cl.hasOption("r")) {
+                config.setUpdateRunLog(true);
+                LOGGER.config("updating run database is enabled");
+            }
+
+            // Enable file cache usage for running at JLAB.
+            if (cl.hasOption("c")) {
+                config.setUseFileCache(true);
+                LOGGER.config("file cache is enabled");
+            }
+
+            // Max wait time for file caching.
             if (cl.hasOption("w")) {
-                this.waitTime = Long.parseLong(cl.getOptionValue("w")) * MILLISECONDS;
-                if (this.waitTime > 0L) {
-                    this.cacheManager.setWaitTime(this.waitTime);
-                    LOGGER.config("max wait time for caching set to " + this.waitTime);
-                }
-            }
-
+                Long waitTime = Long.parseLong(cl.getOptionValue("w")) * MILLISECONDS;
+                config.setWaitTime(waitTime);
+                LOGGER.config("max time for file caching set to " + config.waitTime());
+            }
+
+            // Max files to process per run; mostly just here for debugging purposes.
             if (cl.hasOption("m")) {
-                this.maxFiles = Integer.parseInt(cl.getOptionValue("m"));
-                LOGGER.config("max files set to " + this.maxFiles);
-            }
-
+                int maxFiles = Integer.parseInt(cl.getOptionValue("m"));
+                config.setMaxFiles(maxFiles);
+                LOGGER.config("max files set to " + maxFiles);
+            }
+
+            // Event printing interval when doing EVIO event processing.
             if (cl.hasOption("p")) {
-                this.eventPrintInterval = Integer.parseInt(cl.getOptionValue("p"));
-                LOGGER.config("event print interval set to " + this.eventPrintInterval);
-            }
-            
+                int eventPrintInterval = Integer.parseInt(cl.getOptionValue("p"));
+                config.setEventPrintInterval(eventPrintInterval);
+                LOGGER.config("event print interval set to " + eventPrintInterval);
+            }
+
+            // Flag to allow replacement of existing records in the database; not allowed by default.
             if (cl.hasOption("u")) {
-                this.allowUpdates = true;
-                if (!this.updateRunLog) {
-                    LOGGER.info("the -u option is ignored because run_log is not being updated");
-                }
-            }
-            
+                config.setAllowUpdates(true);
+                LOGGER.config("replacement of existing run log information in database is enabled");
+            }
+
+            // User supplied timestamp string that is converted to a date for file filtering.
             if (cl.hasOption("b")) {
                 try {
-                    if (this.timestamp != null) {
-                        LOGGER.warning("existing timestamp from file " + this.timestamp + " will be overridden by date from -b argument");
+                    if (config.timestamp() != null) {
+                        LOGGER.warning("existing timestamp from file " + config.timestamp() + " will be overridden by date from -b argument");
                     }
-                    this.timestamp = DATE_FORMAT.parse(cl.getOptionValue("b"));
-                    LOGGER.info("set timestamp to " + DATE_FORMAT.format(this.timestamp));
-                } catch (java.text.ParseException e) {
+                    config.setTimestamp(cl.getOptionValue("b"));
+                    LOGGER.config("set timestamp to " + config.timestamp() + " from -b argument");
+                } catch (final java.text.ParseException e) {
                     throw new RuntimeException(e);
                 }
             }
-            
-            
+
+            // User supplied EvioEventProcessor classes to run in the run processing step.
             if (cl.hasOption("E")) {
-                String[] classNames = cl.getOptionValues("E");
-                for (String className : classNames) {
+                final String[] classNames = cl.getOptionValues("E");
+                for (final String className : classNames) {
                     try {
-                        processors.add(createEvioEventProcessor(className));
-                    } catch (Exception e) {
+                        config.addProcessor(className);
+                    } catch (final Exception e) {
                         throw new RuntimeException(e);
                     }
                 }
             }
-            
+
         } catch (final ParseException e) {
             throw new RuntimeException("Error parsing options.", e);
         }
+        
+        LOGGER.info("done parsing command line options");
 
         return this;
     }
 
     /**
-     * Print the usage statement for this tool to the console.
+     * Print the usage statement for this tool to the console and exit.
      */
     private void printUsage() {
         final HelpFormatter help = new HelpFormatter();
         help.printHelp("EvioFileCrawler", "", OPTIONS, "");
         System.exit(0);
     }
-
-    /**
-     * Process a single run.
-     *
-     * @param runSummary the run summary information
-     * @throws Exception if there is some error while running the file processing
-     */
-    private void processRun(final RunSummary runSummary) throws Exception {
-
-        // Clear the cache manager.
-        this.cacheManager.clear();
-
-        // Create a processor to process all the EVIO events in the run.
-        final RunProcessor runProcessor = this.createRunProcessor(runSummary);
-        
-        for (EvioEventProcessor processor : processors) {
-            runProcessor.addProcessor(processor);
-            LOGGER.config("added extra EVIO processor " + processor.getClass().getName());
-        }
-
-        // Process all of the runs files.
-        runProcessor.process();
-    }
-
-    /**
-     * Process all runs that were found.
-     *
-     * @param runs the run log containing the list of run summaries
-     * @throws Exception if there is an error processing one of the runs
-     */
-    private void processRuns(final RunLog runs) throws Exception {
-        // Process all of the runs that were found.
-        for (final int run : runs.getSortedRunNumbers()) {
-            this.processRun(runs.getRunSummary(run));
-        }
-    }
-
-    /**
-     * Run the crawler job (generally will take a long time!).
+   
+    /**
+     * Run the full crawler job, including all configured file-processing steps, which may take a very long time!
      *
      * @throws Exception if there is some error during the job
      */
     public void run() throws Exception {
-        final EnumSet<FileVisitOption> options = EnumSet.noneOf(FileVisitOption.class);
-        final EvioFileVisitor visitor = new EvioFileVisitor(this.timestamp);
-        if (this.timestamp != null) {
-            visitor.addFilter(new DateFileFilter(this.timestamp));
-            LOGGER.config("added date filter with timestamp " + this.timestamp);
-        }
-        if (!this.acceptRuns.isEmpty()) {
-            visitor.addFilter(new RunFilter(this.acceptRuns));
-            LOGGER.config("added run filter");
-        }
-        try {
-            // Walk the file tree from the root directory.
-            Files.walkFileTree(this.rootDir.toPath(), options, Integer.MAX_VALUE, visitor);
-        } catch (final IOException e) {
-            throw new RuntimeException(e);
-        }
-
+
+        LOGGER.info("running Crawler job");
+        
+        // Create the file visitor for crawling the root directory with the given date filter.
+        final EvioFileVisitor visitor = new EvioFileVisitor(config.timestamp());
+
+        // Walk the file tree using the visitor.
+        walk(visitor);
+
+        // Get the list of run data created by the visitor.
         final RunLog runs = visitor.getRunLog();
 
-        // Print run numbers that were found.
+        // Print the run numbers that were found.
+        printRunNumbers(runs);
+
+        // Sort the files on their sequence numbers.
+        runs.sortAllFiles();
+
+        // Process all the files, performing caching from MSS if necessary. 
+        RunProcessor.processRuns(this.cacheManager, runs, config);
+
+        // Print the summary information after the run processing is done.
+        runs.printRunSummaries();
+
+        // Execute the database update.
+        executeRunLogUpdate(runs);
+
+        // Update the timestamp output file.
+        updateTimestamp();
+
+        LOGGER.info("Crawler job is done!");
+    }
+
+    private void executeRunLogUpdate(final RunLog runs) throws SQLException {
+        // Insert the run information into the database.
+        if (config.updateRunLog()) {
+
+            // Open a DB connection.
+            final Connection connection = config.connectionParameters().createConnection();
+
+            // Create and configure RunLogUpdater which updates the run log for all runs found in the crawl job.
+            final RunLogUpdater runUpdater = new RunLogUpdater(connection, runs, config.allowUpdates());
+
+            // Update the DB.
+            runUpdater.insert();
+
+            // Close the DB connection.
+            connection.close();
+        }
+    }
+
+    private void printRunNumbers(final RunLog runs) {
+        // Print the list of runs that were found.
         final StringBuffer sb = new StringBuffer();
         for (final Integer run : runs.getSortedRunNumbers()) {
             sb.append(run + " ");
         }
-        LOGGER.info("found files from runs: " + sb.toString());
-
-        // Sort files on their sequence numbers.
-        LOGGER.fine("sorting files by sequence ...");
-        runs.sortAllFiles();
-
-        // Process all the files in all of runs. This will perform caching from MSS if necessary.
-        this.processRuns(runs);
-
-        // Print the run summary information.
-        if (this.printSummary) {
-            runs.printRunSummaries();
-        }
-
-        // Insert run information into the database.
-        if (this.updateRunLog) {
-
-            // Create and configure RunLogUpdater which updates the run log for all runs found in crawl.
-            RunLogUpdater runUpdater = new RunLogUpdater(runs, allowUpdates);
-            
-            if (!this.processEpicsData) {
-                // Disable inserting EPICS data if it was not processed.
-                runUpdater.setInsertEpicsData(false);
-            }
-            
-            // Update the db.
-            runUpdater.insert();
-            
-            // Close the db connection.
-            runUpdater.close();
-        }               
-
-        // Update the timestamp file which can be used to tell which files have been processed.
-        if (this.timestampFile == null) {
-            this.timestampFile = new File("timestamp");
+        LOGGER.info("found EVIO files from runs: " + sb.toString());
+    }
+
+    private void walk(final EvioFileVisitor visitor) {
+        if (config.timestamp() != null) {
+            // Date filter from timestamp.
+            visitor.addFilter(new DateFileFilter(config.timestamp()));
+            LOGGER.config("added date filter " + config.timestamp());
+        }
+
+        if (!config.acceptRuns().isEmpty()) {
+            // List of run numbers to accept.
+            visitor.addFilter(new RunFilter(config.acceptRuns()));
+            LOGGER.config("added run number filter");
+        }
+
+        try {
+            // Walk the file tree from the root directory.
+            final EnumSet<FileVisitOption> options = EnumSet.noneOf(FileVisitOption.class);
+            Files.walkFileTree(config.rootDir().toPath(), options, Integer.MAX_VALUE, visitor);
+        } catch (final IOException e) {
+            throw new RuntimeException("Error while walking the directory tree.", e);
+        }
+    }
+
+    private void updateTimestamp() {
+        // Update the timestamp file which can be used to tell which files have been processed by their creation date.
+        if (config.timestampFile() == null) {
+            config.setTimestampFile(new File("timestamp"));
             try {
-                this.timestampFile.createNewFile();
+                config.timestampFile().createNewFile();
             } catch (final IOException e) {
                 throw new RuntimeException(e);
             }
-            LOGGER.info("created new timestamp file: " + this.timestampFile.getPath());
-        }
-        this.timestampFile.setLastModified(System.currentTimeMillis());
-        LOGGER.info("set modified on timestamp file: " + new Date(this.timestampFile.lastModified()));
-    }
-    
-    /**
-     * Create an {@link org.hps.record.evio.EvioEventProcessor} by its class name.
-     * 
-     * @param className the fully qualified name of the class
-     * @return the new object
-     * @throws Exception if there is a problem instantiating the class (does not exist; access exception etc.)
-     */
-    private EvioEventProcessor createEvioEventProcessor(String className) throws Exception {
-        return EvioEventProcessor.class.cast(Class.forName(className).newInstance());
+            LOGGER.config("created new timestamp file: " + config.timestampFile().getPath());
+        }
+        config.timestampFile().setLastModified(System.currentTimeMillis());
+        LOGGER.config("set modified on timestamp file: " + new Date(config.timestampFile().lastModified()));
     }
 }

Added: java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/CrawlerConfig.java
 =============================================================================
--- java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/CrawlerConfig.java	(added)
+++ java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/CrawlerConfig.java	Fri Jun 19 19:03:30 2015
@@ -0,0 +1,201 @@
+package org.hps.record.evio.crawler;
+
+import java.io.File;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.List;
+import java.util.Set;
+
+import org.hps.conditions.database.ConnectionParameters;
+import org.hps.record.evio.EvioEventProcessor;
+
+/**
+ * Crawls EVIO files in a directory tree, groups the files that are found by run, and optionally performs various tasks based on the run summary
+ * information that is accumulated, including printing a summary, caching the files from JLAB MSS, and updating a run database.
+ *
+ * @author Jeremy McCormick, SLAC
+ */
+final class CrawlerConfig {
+
+    private static final SimpleDateFormat TIMESTAMP_DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+    
+    /**
+     * Default event print interval.
+     */
+    private final int DEFAULT_EVENT_PRINT_INTERVAL = 1000;
+
+    /**
+     * Interval for printing out event number while running EVIO processors.
+     */
+    private int eventPrintInterval = DEFAULT_EVENT_PRINT_INTERVAL;
+   
+    /**
+     * A list of run numbers to accept in the job.
+     */
+    private Set<Integer> acceptRuns;
+
+    private boolean allowUpdates = false;
+
+    private ConnectionParameters connectionParameters;
+
+    /**
+     * The maximum number of files to accept (just used for debugging purposes).
+     */
+    private int maxFiles = -1;
+
+    private final List<EvioEventProcessor> processors = new ArrayList<EvioEventProcessor>();
+
+    /**
+     * The root directory to crawl which defaults to the current directory.
+     */
+    private File rootDir = new File(System.getProperty("user.dir"));
+
+    /**
+     * A timestamp to use for filtering input files on their creation date.
+     */
+    private Date timestamp = null;
+
+    /**
+     * A file to use for the timestamp date.
+     */
+    private File timestampFile = null;
+
+    /**
+     * Flag indicating if the run database should be updated from results of the job.
+     */
+    private boolean updateRunLog = false;
+
+    /**
+     * Flag indicating if the file cache should be used (e.g. jcache automatically executed to move files to the cache disk from tape).
+     */
+    private boolean useFileCache = false;
+
+    /**
+     * The maximum wait time in milliseconds to allow for file caching operations.
+     */
+    private Long waitTime;
+
+    CrawlerConfig addProcessor(final EvioEventProcessor processor) {
+        this.processors.add(processor);
+        return this;
+    }
+    
+    CrawlerConfig addProcessor(final String className) {
+        try {
+            this.processors.add(EvioEventProcessor.class.cast(Class.forName(className).newInstance()));
+        } catch (Exception e) {
+            throw new RuntimeException("Error creating EvioEventProcessor with type: " + className, e);
+        }
+        return this;
+    }
+
+    CrawlerConfig setAcceptRuns(final Set<Integer> acceptRuns) {
+        this.acceptRuns = acceptRuns;
+        return this;
+    }
+
+    CrawlerConfig setAllowUpdates(final boolean allowUpdates) {
+        this.allowUpdates = allowUpdates;
+        return this;
+    }
+
+    CrawlerConfig setConnection(final ConnectionParameters connectionParameters) {
+        this.connectionParameters = connectionParameters;
+        return this;
+    }
+
+    CrawlerConfig setMaxFiles(final int maxFiles) {
+        this.maxFiles = maxFiles;
+        return this;
+    }
+    
+    CrawlerConfig setRootDir(File rootDir) {
+        this.rootDir = rootDir;
+        return this;
+    }
+    
+    CrawlerConfig setTimestamp(Date timestamp) {
+        this.timestamp = timestamp;
+        return this;
+    }
+    
+    CrawlerConfig setTimestamp(String timestampString) throws ParseException {
+        TIMESTAMP_DATE_FORMAT.parse(timestampString);
+        return this;
+    }
+    
+    CrawlerConfig setTimestampFile(File timestampFile) {
+        this.timestampFile = timestampFile;
+        return this;
+    }
+    
+    CrawlerConfig setUpdateRunLog(boolean updateRunLog) {
+        this.updateRunLog = updateRunLog;
+        return this;
+    }
+    
+    CrawlerConfig setUseFileCache(boolean useFileCache) {
+        this.useFileCache = useFileCache;
+        return this;
+    }
+    
+    CrawlerConfig setWaitTime(long waitTime) {
+        this.waitTime = waitTime;
+        return this;
+    }
+    
+    CrawlerConfig setEventPrintInterval(int eventPrintInterval) {
+        this.eventPrintInterval = eventPrintInterval;
+        return this;
+    }
+    
+    Set<Integer> acceptRuns() {
+        return acceptRuns;
+    }
+    
+    boolean allowUpdates() {
+        return allowUpdates;
+    }
+
+    ConnectionParameters connectionParameters() {
+        return connectionParameters;
+    }
+
+    int maxFiles() {
+        return maxFiles;
+    }
+
+    List<EvioEventProcessor> processors() {
+        return processors;
+    }
+
+    File rootDir() {
+        return rootDir;
+    }
+
+    Date timestamp() {
+        return timestamp;
+    }
+    
+    File timestampFile() {
+        return timestampFile;
+    }
+
+    boolean updateRunLog() {
+        return updateRunLog;
+    }
+
+    boolean useFileCache() {
+        return useFileCache;
+    }
+
+    Long waitTime() {
+        return waitTime;
+    }    
+    
+    int eventPrintInterval() {
+        return this.eventPrintInterval;
+    }          
+}

Modified: java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/RunLog.java
 =============================================================================
--- java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/RunLog.java	(original)
+++ java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/RunLog.java	Fri Jun 19 19:03:30 2015
@@ -11,8 +11,9 @@
 import org.lcsim.util.log.LogUtil;
 
 /**
- * This class contains summary information about a series of runs that are themselves modeled with the {@link RunSummary} class. These can be looked
- * up by their run number {@link #getRunSummary(int)}.
+ * This class contains summary information about a series of runs which are modeled with the {@link RunSummary} class.
+ * <p> 
+ * These can be looked up by their run number {@link #getRunSummary(int)}.
  *
  * @author Jeremy McCormick, SLAC
  */

Modified: java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/RunLogUpdater.java
 =============================================================================
--- java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/RunLogUpdater.java	(original)
+++ java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/RunLogUpdater.java	Fri Jun 19 19:03:30 2015
@@ -5,13 +5,12 @@
 import java.util.logging.Level;
 import java.util.logging.Logger;
 
-import org.hps.conditions.database.ConnectionParameters;
 import org.lcsim.util.log.LogUtil;
 
 /**
  * Updates the run database with information from the crawler job.
  * <p>
- * The {@link RunSummaryUpdater} is used to insert rows for each run.
+ * The {@link RunSummaryUpdater} is used to insert the data for each run.
  * 
  * @author Jeremy McCormick, SLAC
  */
@@ -38,44 +37,20 @@
     private boolean allowUpdates = false;
     
     /**
-     * <code>true</code> if EPICS data should be put into the database (skipped if not).
-     */
-    private boolean insertEpicsData = true;
-
-    /**
      * Create a new updater.
      * 
      * @param runLog the run information
      * @param allowUpdates <code>true</code> if updates should be allowed
      */
-    RunLogUpdater(RunLog runLog, boolean allowUpdates) {
+    RunLogUpdater(Connection connection, RunLog runLog, boolean allowUpdates) {
+        
+        // Set the DB connection.
+        this.connection = connection;
+        
+        // Set the run log with the run info to update.
         this.runLog = runLog;
-        
-        // Create database connection to use in this session.
-        final ConnectionParameters cp = new ConnectionParameters("root", "derp", "hps_run_db", "localhost");
-        connection = cp.createConnection();
     }
-    
-    /**
-     * Set to <code>true</code> if EPICS data should be inserted.
-     * 
-     * @param insertEpicsData <code>true</code> 
-     */
-    void setInsertEpicsData(boolean insertEpicsData) {
-        this.insertEpicsData = insertEpicsData;
-    }
-    
-    /**
-     * Close the database connection.
-     */
-    void close() {
-        try {
-            connection.close();
-        } catch (SQLException e) {
-            e.printStackTrace();
-        }
-    }
-    
+            
     /**
      * Insert the run summary information into the database, including updating the run_log_files
      * and run_log_epics tables.
@@ -96,68 +71,19 @@
                 
                 // Get the RunSummary data for the run.
                 RunSummary runSummary = runLog.getRunSummary(run);
-                
+                                               
                 LOGGER.info("updating " + runSummary);
                                 
                 // Create the db updater for the RunSummary.
-                RunSummaryUpdater runUpdater = new RunSummaryUpdater(connection, runSummary);      
-                                
-                // Does a row already exist for the run?
-                if (runUpdater.runExists()) {
-                    LOGGER.info("record for " + run + " exists already");
-                    // Are updates allowed?
-                    if (allowUpdates) {
-                        LOGGER.info("existing row for " + run + " will be updated");
-                        // Update existing row.
-                        runUpdater.updateRun();
-                    } else {
-                        // Row exists and updates not allowed which is an error.
-                        throw new RuntimeException("Row already exists for run " + run + " and allowUpdates is false.");
-                    }
-                } else {                
-                                        
-                    LOGGER.info("inserting new row in runs for run " + run + " ...");
-                    
-                    // Insert new record into run_log.
-                    runUpdater.insertRun();
-                }
+                RunSummaryUpdater runUpdater = new RunSummaryUpdater(connection, runSummary);
                 
-                // Do records exist in the run_log_files table?
-                if (runUpdater.filesExist()) {
-                    // Are updates disallowed?
-                    if (!allowUpdates) { 
-                        // File records exist for the run but updating is allowed so throw an exception.
-                        throw new RuntimeException("Cannot delete existing records in run_log_files because allowUpdates is false.");
-                    } else {
-                        // Delete the file log.
-                        runUpdater.deleteFiles();
-                    }
-                }
-                                
-                // Insert records into run_log_files now that existing records were deleted, if necessary.
-                runUpdater.insertFiles();
+                // Set whether existing records can be replaced.
+                runUpdater.setAllowDeleteExisting(allowUpdates);
                 
-                // Is EPICS data processing enabled?
-                if (insertEpicsData) {
-                    // Does the EPICS data already exist?
-                    if (runUpdater.epicsExists()) {
-                        // Is replacing data disallowed?
-                        if (!allowUpdates) {
-                            // EPICS data exists but updating is not allowed so throw exception.
-                            throw new RuntimeException("EPICS run log already exists and allowUpdates is false.");
-                        } else {
-                            // Delete existing EPICS data.
-                            runUpdater.deleteEpics();
-                        }
-                    }
-                    
-                    // Insert EPICS data processed in the job for this run.
-                    runUpdater.insertEpics();
-                }
+                // Insert the run records.
+                runUpdater.insert();
                 
-                // Commit the transactions for this run.
-                LOGGER.info("committing transaction for run " + run);
-                connection.commit();
+                LOGGER.info("run " + runSummary.getRun() + " summary inserted successfully");
             }
             
         } catch (final SQLException e) {
@@ -168,13 +94,7 @@
                 LOGGER.log(Level.SEVERE, "error rolling back transaction", e2);
                 throw new RuntimeException(e);
             }
-        } finally {
-            try {
-                connection.close();
-            } catch (final SQLException e) {
-                throw new RuntimeException(e);
-            }
-        }
+        } 
         
         LOGGER.info("done inserting run data");
     }             

Modified: java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/RunProcessor.java
 =============================================================================
--- java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/RunProcessor.java	(original)
+++ java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/RunProcessor.java	Fri Jun 19 19:03:30 2015
@@ -10,6 +10,7 @@
 
 import org.hps.record.evio.EvioEventConstants;
 import org.hps.record.evio.EvioEventProcessor;
+import org.hps.record.scalers.ScalersEvioProcessor;
 import org.jlab.coda.jevio.EvioEvent;
 import org.jlab.coda.jevio.EvioException;
 import org.jlab.coda.jevio.EvioReader;
@@ -17,7 +18,7 @@
 import org.lcsim.util.log.LogUtil;
 
 /**
- * Processes all the EVIO files from a run in order to extract various information including start and end dates.
+ * Processes EVIO files from a run in order to extract various meta data information including start and end dates.
  * <p>
  * This class is a wrapper for activating different sub-tasks, including optionally caching all files from the JLAB MSS to the cache disk.
  * <p>
@@ -33,6 +34,42 @@
     private static final Logger LOGGER = LogUtil.create(RunProcessor.class, new DefaultLogFormatter(), Level.FINE);
 
     /**
+     * Create the processor for a single run.
+     *
+     * @param runSummary the run summary for the run
+     * @return the run processor
+     */
+    static RunProcessor createRunProcessor(final JCacheManager cacheManager, final RunSummary runSummary, CrawlerConfig config) {
+
+        // Create new run processor.
+        final RunProcessor processor = new RunProcessor(runSummary, cacheManager);
+
+        // EPICS processor.
+        processor.addProcessor(new EpicsLog(runSummary));
+
+        // Scaler data processor.
+        final ScalersEvioProcessor scalersProcessor = new ScalersEvioProcessor();
+        scalersProcessor.setResetEveryEvent(false);
+        processor.addProcessor(scalersProcessor);
+
+        // Event log processor.
+        processor.addProcessor(new EventTypeLog(runSummary));
+
+        // Max files.
+        if (config.maxFiles() != -1) {
+            processor.setMaxFiles(config.maxFiles());
+        }
+
+        // Enable file caching.
+        processor.useFileCache(config.useFileCache());
+
+        // Set event printing interval.
+        processor.setEventPrintInterval(config.eventPrintInterval());
+
+        return processor;
+    }
+
+    /**
      * The cache manager.
      */
     private final JCacheManager cacheManager;
@@ -107,7 +144,7 @@
 
     /**
      * Get the event count from the current <code>EvioReader</code>.
-     * 
+     *
      * @param reader the current <code>EvioReader</code>
      * @return the event count
      * @throws IOException if there is a generic IO error
@@ -142,22 +179,30 @@
         return this.processors;
     }
 
-    /**
-     * Return <code>true</code> if a valid CODA <i>END</i> event can be located in the
-     * <code>EvioReader</code>'s current file. 
-     * 
+    ScalersEvioProcessor getScalersProcessor() {
+        for (final EvioEventProcessor processor : this.processors) {
+            if (processor instanceof ScalersEvioProcessor) {
+                return ScalersEvioProcessor.class.cast(processor);
+            }
+        }
+        return null;
+    }
+
+    /**
+     * Return <code>true</code> if a valid CODA <i>END</i> event can be located in the <code>EvioReader</code>'s current file.
+     *
      * @param reader the EVIO reader
      * @return <code>true</code> if valid END event is located
      * @throws Exception if there are IO problems using the reader
      */
     boolean isEndOkay(final EvioReader reader) throws Exception {
         LOGGER.info("checking is END okay ...");
-        
+
         boolean endOkay = false;
-        
+
         // Go to second to last event for searching.
         reader.gotoEventNumber(reader.getEventCount() - 2);
-        
+
         // Look for END event.
         EvioEvent event = null;
         while ((event = reader.parseNextEvent()) != null) {
@@ -170,8 +215,8 @@
     }
 
     /**
-     * Process the run by executing the registered {@link org.hps.record.evio.EvioEventProcessor}s and 
-     * performing special tasks such as the extraction of start and end dates.
+     * Process the run by executing the registered {@link org.hps.record.evio.EvioEventProcessor}s and performing special tasks such as the extraction
+     * of start and end dates.
      * <p>
      * This method will also activate file caching, if enabled by the {@link #useFileCache} option.
      *
@@ -230,13 +275,13 @@
                 this.runSummary.setStartDate(runStart);
             }
 
-            // Compute event count for the file and store the value in the run summary's file list.
+            // Compute the event count for the file and store the value in the run summary's file list.
             LOGGER.info("getting event count for " + file.getPath() + "...");
             final int eventCount = this.computeEventCount(reader);
             this.runSummary.getEvioFileList().setEventCount(file, eventCount);
             LOGGER.info("set event count " + eventCount + " for " + file.getPath());
 
-            // Process the events using the list of EVIO processors.
+            // Process the events using the EVIO processors.
             LOGGER.info("running EVIO processors ...");
             reader.gotoEventNumber(0);
             int nProcessed = 0;
@@ -266,6 +311,12 @@
                 LOGGER.info("found end date " + endDate);
             }
 
+            // Pull scaler data from EVIO processor into run summary.
+            final ScalersEvioProcessor scalersProcessor = this.getScalersProcessor();
+            if (scalersProcessor != null) {
+                runSummary.setScalerData(scalersProcessor.getScalerData());
+            }
+
         } finally {
             if (reader != null) {
                 reader.close();
@@ -306,5 +357,40 @@
         this.useFileCache = cacheFiles;
         LOGGER.config("file caching enabled");
     }
-
+    
+    /**
+     * Process all the runs that were found.
+     *
+     * @param runs the run log containing the list of run summaries
+     * @throws Exception if there is an error processing one of the runs
+     */
+    static void processRuns(JCacheManager cacheManager, final RunLog runs, CrawlerConfig config) throws Exception {
+        
+        // Configure max wait time of jcache manager.
+        if (config.waitTime() != null && config.waitTime() > 0L) {
+            cacheManager.setWaitTime(config.waitTime());
+            LOGGER.config("JCacheManager max wait time set to " + config.waitTime());
+        }                                
+        
+        // Process all of the runs that were found.
+        for (final int run : runs.getSortedRunNumbers()) {
+            
+            // Get the run summary.
+            RunSummary runSummary = runs.getRunSummary(run);
+            
+            // Clear the cache manager.
+            cacheManager.clear();
+
+            // Create a processor to process all the EVIO events in the run.
+            final RunProcessor runProcessor = RunProcessor.createRunProcessor(cacheManager, runSummary, config);
+
+            for (final EvioEventProcessor processor : config.processors()) {
+                runProcessor.addProcessor(processor);
+                LOGGER.config("added extra EVIO processor " + processor.getClass().getName());
+            }
+
+            // Process all of the runs files.
+            runProcessor.process();
+        }
+    }
 }

Modified: java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/RunSummary.java
 =============================================================================
--- java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/RunSummary.java	(original)
+++ java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/RunSummary.java	Fri Jun 19 19:03:30 2015
@@ -7,11 +7,11 @@
 import java.util.logging.Logger;
 
 import org.hps.record.epics.EpicsData;
+import org.hps.record.scalers.ScalerData;
 import org.lcsim.util.log.LogUtil;
 
 /**
- * This class models the run summary information which is persisted as a row in the <i>run_log</i> table
- * of the run database.
+ * This class models the run summary information which is persisted as a row in the <i>run_log</i> table of the run database.
  * <p>
  * This information includes:
  * <ul>
@@ -63,6 +63,8 @@
      */
     private final int run;
 
+    private ScalerData scalerData;
+
     /**
      * The start date of the run.
      */
@@ -141,6 +143,10 @@
      */
     int getRun() {
         return this.run;
+    }
+
+    ScalerData getScalerData() {
+        return this.scalerData;
     }
 
     /**
@@ -232,6 +238,10 @@
         this.eventTypeCounts = eventTypeCounts;
     }
 
+    void setScalerData(final ScalerData scalerData) {
+        this.scalerData = scalerData;
+    }
+
     /**
      * Set the start date of the run.
      *
@@ -247,9 +257,10 @@
     void sortFiles() {
         this.files.sort();
     }
-    
+
+    @Override
     public String toString() {
-        return "RunSummary { run: " + this.run + ", started: " + this.getStartDate() + ", ended: " + this.getEndDate() + ", events: " 
+        return "RunSummary { run: " + this.run + ", started: " + this.getStartDate() + ", ended: " + this.getEndDate() + ", events: "
                 + this.getTotalEvents() + ", endOkay: " + endOkay + " }";
     }
 }

Modified: java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/RunSummaryUpdater.java
 =============================================================================
--- java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/RunSummaryUpdater.java	(original)
+++ java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/RunSummaryUpdater.java	Fri Jun 19 19:03:30 2015
@@ -13,7 +13,7 @@
 
 /**
  * Updates the run database tables with information from a single run.
- * 
+ *
  * @author Jeremy McCormick, SLAC
  */
 public class RunSummaryUpdater {
@@ -22,105 +22,176 @@
      * Setup logging.
      */
     private static final Logger LOGGER = LogUtil.create(RunSummaryUpdater.class);
-    
+
+    /**
+     * Flag to allow deletion/replacement of existing records; disallowed by default.
+     */
+    private boolean allowDeleteExisting = false;
+
+    /**
+     * The database connection.
+     */
+    private final Connection connection;
+
+    /**
+     * The run number (read from the summary in the constructor for convenience).
+     */
+    private int run = -1;
+
     /**
      * The run summary to update or insert.
      */
-    private RunSummary runSummary;   
-    
-    /**
-     * The database connection.
-     */
-    private Connection connection;
-    
-    /**
-     * The run number (read from the summary in the constructor for convenience).
-     */
-    private int run = -1;
-        
+    private final RunSummary runSummary;
+
     /**
      * Create a <code>RunSummaryUpdater</code> for the given <code>RunSummary</code>.
-     * 
+     *
      * @param connection the database connection
      * @param runSummary the run summary to update or insert
      */
-    RunSummaryUpdater(Connection connection, RunSummary runSummary) {
+    RunSummaryUpdater(final Connection connection, final RunSummary runSummary) {
 
         if (connection == null) {
             throw new IllegalArgumentException("connection is null");
         }
         this.connection = connection;
-        
+
         if (runSummary == null) {
             throw new IllegalArgumentException("runSummary is null");
         }
         this.runSummary = runSummary;
+
+        // Cache run number.
+        this.run = this.runSummary.getRun();
+    }
+
+    private void delete() throws SQLException {
+
+        LOGGER.info("deleting existing information for run " + runSummary.getRun());
+
+        // Delete EPICS log.
+        this.deleteEpics();
+
+        // Delete file list.
+        this.deleteFiles();
+
+        // Delete run log.
+        this.deleteRun();
+
+        LOGGER.info("deleted run " + runSummary.getRun() + " info successfully");
+    }
+
+    /**
+     * Delete existing EPICS data from the run_log_epics table.
+     *
+     * @throws SQLException if there is an error performing the db query
+     */
+    private void deleteEpics() throws SQLException {
+        final PreparedStatement statement = connection.prepareStatement("DELETE FROM run_epics WHERE run = ?");
+        statement.setInt(1, this.run);
+        statement.executeUpdate();
+    }
+
+    /**
+     * Delete the records of the files associated to this run.
+     *
+     * @param files the list of files
+     * @throws SQLException if there is an error executing the SQL query
+     */
+    private void deleteFiles() throws SQLException {
+        LOGGER.info("deleting rows from run_files for " + run + " ...");
+        final PreparedStatement s = connection.prepareStatement("DELETE FROM run_files where run = ?");
+        s.setInt(1, run);
+        s.executeUpdate();
+        LOGGER.info("done deleting rows from run_files for " + run);
+    }
+
+    /**
+     * Delete the row for this run from the <i>runs</i> table.
+     * <p>
+     * This doesn't delete the rows from <i>run_epics</i> or <i>run_files</i>.
+     *
+     * @throws SQLException if there is an error executing the SQL query
+     */
+    private void deleteRun() throws SQLException {
+        LOGGER.info("deleting record from runs for " + run + " ...");
+        final PreparedStatement s = connection.prepareStatement("DELETE FROM runs where run = ?");
+        s.setInt(1, run);
+        s.executeUpdate();
+        LOGGER.info("deleted rows from runs for " + run);
+    }
+
+    void insert() throws SQLException {
         
-        this.run = this.runSummary.getRun();
-    }
-        
-    /**
-     * Execute a SQL update to modify an existing row in the database.
-     * 
-     * @throws SQLException if there is an error executing the SQL statement
-     */
-    void updateRun() throws SQLException {
-        
-        PreparedStatement runLogStatement = null;
-        runLogStatement = 
-                connection.prepareStatement("UPDATE runs SET start_date = ?, end_date = ?, nevents = ?, nfiles = ?, end_ok = ? where run = ?");        
-        LOGGER.info("preparing to update run " + run + " in runs table ..");
-        runLogStatement.setTimestamp(1, new java.sql.Timestamp(runSummary.getStartDate().getTime()));
-        runLogStatement.setTimestamp(2, new java.sql.Timestamp(runSummary.getEndDate().getTime()));
-        runLogStatement.setInt(3, runSummary.getTotalEvents());
-        runLogStatement.setInt(4, runSummary.getEvioFileList().size());
-        runLogStatement.setBoolean(5, runSummary.isEndOkay());
-        runLogStatement.setInt(6, run);
-        runLogStatement.executeUpdate();
-        LOGGER.info("run " + run + " was updated");
-    }
-    
-    /**
-     * Insert a new row in the <i>runs</i> table.
-     *
-     * @param connection the database connection
-     * @throws SQLException if there is an error querying the database
-     */
-    void insertRun() throws SQLException {
-        PreparedStatement statement = 
-                connection.prepareStatement("INSERT INTO runs (run, start_date, end_date, nevents, nfiles, end_ok) VALUES(?, ?, ?, ?, ?, ?)");
-        LOGGER.info("preparing to insert run " + run + " into runs table ..");
-        statement.setInt(1, run);
-        statement.setTimestamp(2, new java.sql.Timestamp(runSummary.getStartDate().getTime()));
-        statement.setTimestamp(3, new java.sql.Timestamp(runSummary.getEndDate().getTime()));
-        statement.setInt(4, runSummary.getTotalEvents());
-        statement.setInt(5, runSummary.getEvioFileList().size());
-        statement.setBoolean(6, runSummary.isEndOkay());
-        statement.executeUpdate();
-        LOGGER.info("inserted run " + run + " to runs table");
-    }
-    
-    /**
-     * Return <code>true</code> if there is an existing row for this run summary.
-     * 
-     * @return <code>true</code> if there is an existing row for this run summary.
-     * @throws SQLException if there is an error executing the SQL query
-     */
-    boolean runExists() throws SQLException {
-        PreparedStatement s = connection.prepareStatement("SELECT run FROM runs where run = ?");
-        s.setInt(1, run);        
-        ResultSet rs = s.executeQuery();
-        return rs.first();
-    }
-    
-    /**
-     * Insert the file names into the run database.    
+        LOGGER.info("performing db insert for " + runSummary);
+
+        // Turn auto-commit off as this whole method is a single transaction.
+        connection.setAutoCommit(false);
+
+        // Does the run exist in the database already?
+        if (this.runExists()) {
+            // Is deleting existing rows allowed?
+            if (allowDeleteExisting) {
+                // Delete the existing rows.
+                this.delete();
+            } else {
+                // Rows exist but updating is disallowed which is a fatal error.
+                final RuntimeException x = new RuntimeException("Run " + runSummary.getRun() + " already exists and deleting is not allowed.");
+                LOGGER.log(Level.SEVERE, x.getMessage(), x);
+                throw x;
+            }
+        }
+
+        // Insert basic run log info.
+        this.insertRun();
+
+        // Insert list of files.
+        this.insertFiles();
+
+        // Insert EPICS data.
+        this.insertEpics();
+
+        // Insert scaler data.
+        if (runSummary.getScalerData() != null) {
+            new ScalerDataUpdater(connection, runSummary.getScalerData(), run).insert();
+        }
+
+        // Commit the transactions for this run.
+        LOGGER.info("committing transaction for run " + run);
+        connection.commit();
+
+        // Turn auto-commit back on.
+        connection.setAutoCommit(true);
+    }
+
+    /**
+     * Insert EPICS data into the run_log_epics table.
+     *
+     * @throws SQLException if there is an error performing the db query
+     */
+    private void insertEpics() throws SQLException {
+        final PreparedStatement statement = connection.prepareStatement("INSERT INTO run_epics (run, variable_name, value) values (?, ?, ?)");
+        final EpicsData data = runSummary.getEpicsData();
+        if (data != null) {
+            for (final String variableName : data.getUsedNames()) {
+                statement.setInt(1, this.run);
+                statement.setString(2, variableName);
+                statement.setDouble(3, data.getValue(variableName));
+                statement.executeUpdate();
+            }
+        } else {
+            LOGGER.warning("skipped inserting EPICS data (none found in RunSummary)");
+        }
+    }
+
+    /**
+     * Insert the file names into the run database.
      *
      * @param connection the database connection
      * @param run the run number
      * @throws SQLException if there is a problem executing one of the database queries
      */
-    void insertFiles() throws SQLException {
+    private void insertFiles() throws SQLException {
         LOGGER.info("updating file list ...");
         PreparedStatement filesStatement = null;
         filesStatement = connection.prepareStatement("INSERT INTO run_files (run, directory, name) VALUES(?, ?, ?)");
@@ -134,90 +205,42 @@
             filesStatement.executeUpdate();
         }
         LOGGER.info("run_files was updated");
-    }    
-    
-    /**
-     * Delete the records of the files associated to this run.
-     * 
-     * @param files the list of files
-     * @throws SQLException if there is an error executing the SQL query
-     */
-    void deleteFiles() throws SQLException {        
-        LOGGER.info("deleting rows from run_files for " + run + " ...");
-        PreparedStatement s = connection.prepareStatement("DELETE FROM run_files where run = ?");
-        s.setInt(1, run);
-        s.executeUpdate();
-        LOGGER.info("done deleting rows from run_files for " + run);
-    }
-    
-    /**
-     * Delete the row for this run from the <i>runs</i> table.
-     * <p>
-     * This doesn't delete the rows from <i>run_epics</i> or <i>run_files</i>.
-     * 
-     * @throws SQLException if there is an error executing the SQL query
-     */
-    void deleteRun() throws SQLException {
-        LOGGER.info("deleting record from runs for " + run + " ...");
-        PreparedStatement s = connection.prepareStatement("DELETE FROM runs where run = ?");
-        s.setInt(1, run);
-        s.executeUpdate();
-        LOGGER.info("deleted rows from runs for " + run);
-    }
-    
-    /**
-     * Return <code>true</code> if there is a row for at least one file for the run.
-     * @return <code>true</code> if there are file rows for this run
-     * @throws SQLException if there is an error executing the SQL query
-     */
-    boolean filesExist() throws SQLException {
-        PreparedStatement s = connection.prepareStatement("SELECT run FROM run_files where run = ?");
-        s.setInt(1, run);        
-        ResultSet rs = s.executeQuery();
-        return rs.first();
-    }
-    
-    /**
-     * Insert EPICS data into the run_log_epics table.
-     * 
-     * @throws SQLException if there is an error performing the db query
-     */
-    void insertEpics() throws SQLException {
-        PreparedStatement statement = connection.prepareStatement("INSERT INTO run_epics (run, variable_name, value) values (?, ?, ?)");
-        EpicsData data = runSummary.getEpicsData();
-        if (data != null) {
-            for (String variableName : data.getUsedNames()) {
-                statement.setInt(1, this.run);
-                statement.setString(2, variableName);
-                statement.setDouble(3, data.getValue(variableName));
-                statement.executeUpdate();
-            }
-        } else {
-            LOGGER.warning("skipped inserting EPICS data (none found in RunSummary)");
-        }
-    }
-    
-    /**
-     * Delete existing EPICS data from the run_log_epics table.
-     * 
-     * @throws SQLException if there is an error performing the db query
-     */
-    void deleteEpics() throws SQLException {
-        PreparedStatement statement = connection.prepareStatement("DELETE FROM run_epics WHERE run = ?");
-        statement.setInt(1, this.run);
+    }
+
+    /**
+     * Insert a new row in the <i>runs</i> table.
+     *
+     * @param connection the database connection
+     * @throws SQLException if there is an error querying the database
+     */
+    private void insertRun() throws SQLException {
+        final PreparedStatement statement = connection
+                .prepareStatement("INSERT INTO runs (run, start_date, end_date, nevents, nfiles, end_ok) VALUES(?, ?, ?, ?, ?, ?)");
+        LOGGER.info("preparing to insert run " + run + " into runs table ..");
+        statement.setInt(1, run);
+        statement.setTimestamp(2, new java.sql.Timestamp(runSummary.getStartDate().getTime()));
+        statement.setTimestamp(3, new java.sql.Timestamp(runSummary.getEndDate().getTime()));
+        statement.setInt(4, runSummary.getTotalEvents());
+        statement.setInt(5, runSummary.getEvioFileList().size());
+        statement.setBoolean(6, runSummary.isEndOkay());
         statement.executeUpdate();
-    }
-    
+        LOGGER.info("inserted run " + run + " to runs table");
+    }
+
     /**
      * Return <code>true</code> if there is an existing row for this run summary.
-     * 
+     *
      * @return <code>true</code> if there is an existing row for this run summary.
      * @throws SQLException if there is an error executing the SQL query
      */
-    boolean epicsExists() throws SQLException {
-        PreparedStatement s = connection.prepareStatement("SELECT run from run_epics where run = ?");
+    private boolean runExists() throws SQLException {
+        final PreparedStatement s = connection.prepareStatement("SELECT run FROM runs where run = ?");
         s.setInt(1, run);
-        ResultSet rs = s.executeQuery();
+        final ResultSet rs = s.executeQuery();
         return rs.first();
     }
-}
+
+    void setAllowDeleteExisting(final boolean allowDeleteExisting) {
+        this.allowDeleteExisting = allowDeleteExisting;
+    }
+}

Added: java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/ScalerDataUpdater.java
 =============================================================================
--- java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/ScalerDataUpdater.java	(added)
+++ java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/ScalerDataUpdater.java	Fri Jun 19 19:03:30 2015
@@ -0,0 +1,83 @@
+package org.hps.record.evio.crawler;
+
+import java.sql.Connection;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+
+import org.hps.record.scalers.ScalerData;
+
+/**
+ * Database interface for inserting scaler data into the run log.
+ * 
+ * @author Jeremy McCormick, SLAC
+ */
+public class ScalerDataUpdater {
+
+    private boolean allowDeleteExisting = false;
+
+    private final Connection connection;
+
+    private final ScalerData data;
+
+    private final int run;
+
+    private final String SQL_DELETE = "DELETE FROM run_scalers WHERE run = ?";
+
+    private final String SQL_INSERT = "INSERT INTO run_scalers (run, idx, value) VALUES (?, ?, ?)";
+
+    private final String SQL_SELECT = "SELECT run FROM run_scalers WHERE run = ?";
+
+    ScalerDataUpdater(final Connection connection, final ScalerData data, final int run) {
+        this.connection = connection;
+        this.data = data;
+        this.run = run;
+    }
+
+    private void delete() throws SQLException {
+        final PreparedStatement s = connection.prepareStatement(SQL_DELETE);
+        s.setInt(1, run);
+        s.executeUpdate();
+    }
+
+    private boolean exists() throws SQLException {
+        final PreparedStatement s = connection.prepareStatement(SQL_SELECT);
+        s.setInt(1, run);
+        final ResultSet rs = s.executeQuery();
+        return rs.first();
+    }
+
+    void insert() throws SQLException {
+        if (this.exists()) {
+            if (allowDeleteExisting) {
+                this.delete();
+            } else {
+                throw new RuntimeException("Scaler data already exists and updates are not allowed.");
+            }
+        }
+        try {
+            this.insertScalerData();
+        } catch (final SQLException e) {
+            connection.rollback();
+        }
+    }
+
+    private void insertScalerData() throws SQLException {
+        final PreparedStatement statement;
+        try {
+            statement = connection.prepareStatement(SQL_INSERT);
+            for (int idx = 0; idx < data.size(); idx++) {
+                statement.setInt(1, run);
+                statement.setInt(2, idx);
+                statement.setInt(3, data.getValue(idx));
+                statement.executeUpdate();
+            }
+        } finally {
+            connection.commit();
+        }
+    }
+
+    void setAllowDeleteExisting(final boolean allowDeleteExisting) {
+        this.allowDeleteExisting = allowDeleteExisting;
+    }
+}

Modified: java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/package-info.java
 =============================================================================
--- java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/package-info.java	(original)
+++ java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/package-info.java	Fri Jun 19 19:03:30 2015
@@ -1,5 +1,8 @@
 /**
- * Implements an EVIO file crawler to extract run and configuration information, including run start and end dates, event counts, etc.
+ * Implements an EVIO file crawler to extract run and configuration information.
+ * <p>
+ * This information includes run start and end dates, event counts, EPICS data, scaler data, 
+ * and the list of associated EVIO files.
  *
  * @author Jeremy McCormick, SLAC
  */