Print

Print


Author: [log in to unmask]
Date: Fri Jun  5 17:09:45 2015
New Revision: 3102

Log:
Updates and cleanup to EVIO file crawler in preparation for running at JLAB; improved the time stamp arguments to allow a date to be supplied for cut off; reorganized the database update classes.

Added:
    java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/RunSummaryUpdater.java
Modified:
    java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/DateFileFilter.java
    java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/EpicsLog.java
    java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/EventTypeLog.java
    java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/EvioFileCrawler.java
    java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/EvioFileFilter.java
    java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/EvioFileList.java
    java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/EvioFileSequenceComparator.java
    java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/EvioFileUtilities.java
    java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/EvioFileVisitor.java
    java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/JCacheManager.java
    java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/RunFilter.java
    java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/RunLog.java
    java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/RunLogUpdater.java
    java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/RunProcessor.java
    java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/RunSummary.java
    java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/package-info.java

Modified: java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/DateFileFilter.java
 =============================================================================
--- java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/DateFileFilter.java	(original)
+++ java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/DateFileFilter.java	Fri Jun  5 17:09:45 2015
@@ -12,7 +12,7 @@
  * <p>
  * Files with a creation date after the time stamp will be rejected.
  *
- * @author Jeremy McCormick
+ * @author Jeremy McCormick, SLAC
  */
 final class DateFileFilter implements FileFilter {
 

Modified: java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/EpicsLog.java
 =============================================================================
--- java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/EpicsLog.java	(original)
+++ java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/EpicsLog.java	Fri Jun  5 17:09:45 2015
@@ -11,7 +11,7 @@
 /**
  * Create a summary log of EPICS information found in EVIO events.
  *
- * @author Jeremy McCormick
+ * @author Jeremy McCormick, SLAC
  */
 final class EpicsLog extends EvioEventProcessor {
 

Modified: java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/EventTypeLog.java
 =============================================================================
--- java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/EventTypeLog.java	(original)
+++ java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/EventTypeLog.java	Fri Jun  5 17:09:45 2015
@@ -11,7 +11,7 @@
 /**
  * This class makes a log of the number of different event types found in a run by their tag value.
  *
- * @author Jeremy McCormick
+ * @author Jeremy McCormick, SLAC
  */
 final class EventTypeLog extends EvioEventProcessor {
 

Modified: java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/EvioFileCrawler.java
 =============================================================================
--- java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/EvioFileCrawler.java	(original)
+++ java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/EvioFileCrawler.java	Fri Jun  5 17:09:45 2015
@@ -5,6 +5,7 @@
 import java.nio.file.FileVisitOption;
 import java.nio.file.Files;
 import java.nio.file.attribute.BasicFileAttributes;
+import java.text.SimpleDateFormat;
 import java.util.Date;
 import java.util.EnumSet;
 import java.util.HashSet;
@@ -24,11 +25,14 @@
  * Crawls EVIO files in a directory tree, groups the files that are found by run, and optionally performs various tasks based on the run summary
  * information that is accumulated, including printing a summary, caching the files from JLAB MSS, and updating a run database.
  *
- * @author <a href="mailto:[log in to unmask]">Jeremy McCormick</a>
+ * @author Jeremy McCormick, SLAC
  */
-// TODO: write out Auger XML (and don't actually execute job)
-// TODO: write summary EVIO file with control/EPICS events (maybe?)
-// TODO: flag to allow overwriting existing information in run table
+// TODO: need options for...
+// -database connections prop file
+// -writing Auger XML for crawl job (and don't actually execute job)
+// -writing out a summary EVIO file containing control events only (PRESTART, EPICS, scalars?, END)
+// -allow overwriting existing information in run table rather than inserting
+// -get supplementary information from run spreadsheet (including whether run was "JUNK" or not)
 public final class EvioFileCrawler {
 
     /**
@@ -45,23 +49,27 @@
      * Command line options for the crawler.
      */
     private static final Options OPTIONS = new Options();
+    
+    private static final SimpleDateFormat DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
 
     /**
      * Statically define the command options.
      */
     static {
+        OPTIONS.addOption("a", "accept-runs", true, "list of run numbers to accept (others will be excluded)");
+        OPTIONS.addOption("b", "begin-date", true, "min date for files (example 2015-03-26 11:28:59)");
+        OPTIONS.addOption("c", "cache-files", false, "automatically cache files from MSS (JLAB only)");
+        OPTIONS.addOption("d", "directory", true, "root directory to start crawling (default is current dir)");
+        OPTIONS.addOption("e", "epics", false, "process EPICS data found in EVIO files");
         OPTIONS.addOption("h", "help", false, "print help and exit");
-        OPTIONS.addOption("t", "timestamp-file", true, "timestamp file for date filtering; modified time will be set at end of job");
-        OPTIONS.addOption("d", "directory", true, "starting directory");
-        OPTIONS.addOption("r", "runs", true, "list of runs to accept (others will be excluded)");
-        OPTIONS.addOption("s", "summary", false, "print run summary at end of job");
-        OPTIONS.addOption("L", "log-level", true, "set log level (INFO, FINE, etc.)");
-        OPTIONS.addOption("u", "update", false, "update the run database");
-        OPTIONS.addOption("e", "epics", false, "process EPICS data");
-        OPTIONS.addOption("c", "cache", false, "automatically cache all files from MSS");
-        OPTIONS.addOption("w", "wait", true, "total time in seconds to allow for file caching");
-        OPTIONS.addOption("m", "max-files", true, "maximum number of files to accept per run (for debugging)");
-        OPTIONS.addOption("p", "print", true, "set event printing interval when running EVIO processors");
+        OPTIONS.addOption("m", "max-files", true, "max number of files to process per run (only for debugging)");
+        OPTIONS.addOption("p", "print", true, "set event print interval during EVIO processing");
+        OPTIONS.addOption("r", "insert-run-log", false, "update the run database (not done by default)");
+        OPTIONS.addOption("t", "timestamp-file", true, "existing or new timestamp file name for date cut off");
+        OPTIONS.addOption("s", "print-summary", false, "print run summary at the end of the job");
+        OPTIONS.addOption("w", "max-cache-wait", true, "total seconds to allow for file caching");
+        OPTIONS.addOption("L", "log-level", true, "set the log level (INFO, FINE, etc.)");
+        OPTIONS.addOption("u", "update-run-log", false, "allow overriding existing data in the run db (not allowed by default)");
     }
 
     /**
@@ -83,7 +91,7 @@
     private final Set<Integer> acceptRuns = new HashSet<Integer>();
 
     /**
-     * The class for managing the file caching using the jcache command.
+     * The class for managing the file caching using the 'jcache' command.
      */
     private final JCacheManager cacheManager = new JCacheManager();
 
@@ -135,7 +143,7 @@
     /**
      * Flag indicating if the run database should be updated from results of the job.
      */
-    private boolean update = false;
+    private boolean updateRunLog = false;
 
     /**
      * Flag indicating if the file cache should be used (e.g. jcache automatically executed to move files to the cache disk from tape).
@@ -146,6 +154,8 @@
      * The maximum wait time in milliseconds to allow for file caching operations.
      */
     private Long waitTime;
+    
+    private boolean allowUpdates = false;
 
     /**
      * Create the processor for a single run.
@@ -202,18 +212,26 @@
             if (cl.hasOption("t")) {
                 this.timestampFile = new File(cl.getOptionValue("t"));
                 if (!this.timestampFile.exists()) {
-                    throw new IllegalArgumentException("The timestamp file does not exist: " + this.timestampFile.getPath());
-                }
-                try {
-                    this.timestamp = new Date(Files.readAttributes(this.timestampFile.toPath(), BasicFileAttributes.class).lastModifiedTime()
-                            .toMillis());
-                } catch (final IOException e) {
-                    throw new RuntimeException("Error getting attributes of timestamp file.", e);
-                }
-            }
-
-            if (cl.hasOption("r")) {
-                for (final String runString : cl.getOptionValues("r")) {
+                    try {
+                        // Create new time stamp file.
+                        LOGGER.info("creating new timestamp file " + this.timestampFile.getPath());
+                        this.timestampFile.createNewFile();
+                    } catch (IOException e) {
+                        throw new IllegalArgumentException("Error creating timestamp file " + this.timestampFile.getPath());
+                    }
+                } else { 
+                    try {
+                        // Get cut-off date for files from existing time stamp file. 
+                        this.timestamp = new Date(Files.readAttributes(this.timestampFile.toPath(), BasicFileAttributes.class).lastModifiedTime().toMillis());
+                        LOGGER.info("got timestamp " + this.timestamp + " from existing file " + this.timestampFile.getPath());
+                    } catch (final IOException e) {
+                        throw new RuntimeException("Error getting attributes of timestamp file.", e);
+                    }
+                }
+            }
+
+            if (cl.hasOption("a")) {
+                for (final String runString : cl.getOptionValues("a")) {
                     final Integer acceptRun = Integer.parseInt(runString);
                     this.acceptRuns.add(acceptRun);
                     LOGGER.config("added accept run " + acceptRun);
@@ -224,8 +242,8 @@
                 this.printSummary = true;
             }
 
-            if (cl.hasOption("u")) {
-                this.update = true;
+            if (cl.hasOption("r")) {
+                this.updateRunLog = true;
             }
 
             if (cl.hasOption("e")) {
@@ -250,7 +268,26 @@
             if (cl.hasOption("p")) {
                 this.eventPrintInterval = Integer.parseInt(cl.getOptionValue("p"));
             }
-
+            
+            if (cl.hasOption("u")) {
+                this.allowUpdates = true;
+                if (!this.updateRunLog) {
+                    LOGGER.info("the -u option is ignored because run_log is not being updated");
+                }
+            }
+            
+            if (cl.hasOption("b")) {
+                try {
+                    if (this.timestamp != null) {
+                        LOGGER.warning("existing timestamp from file " + this.timestamp + " will be overridden by date from -b argument");
+                    }
+                    this.timestamp = DATE_FORMAT.parse(cl.getOptionValue("b"));
+                    LOGGER.info("set timestamp to " + DATE_FORMAT.format(this.timestamp));
+                } catch (java.text.ParseException e) {
+                    throw new RuntimeException(e);
+                }
+            }
+            
         } catch (final ParseException e) {
             throw new RuntimeException("Error parsing options.", e);
         }
@@ -343,9 +380,9 @@
         }
 
         // Insert run information into the database.
-        if (this.update) {
+        if (this.updateRunLog) {
             // Update run log.
-            new RunLogUpdater(runs).insert();
+            new RunLogUpdater(runs, allowUpdates).insert();
         }
 
         // Update the timestamp file which can be used to tell which files have been processed.

Modified: java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/EvioFileFilter.java
 =============================================================================
--- java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/EvioFileFilter.java	(original)
+++ java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/EvioFileFilter.java	Fri Jun  5 17:09:45 2015
@@ -7,7 +7,7 @@
  * This is a simple file filter that will accept EVIO files with a certain convention to their naming which looks like <i>FILENAME.evio.SEQUENCE</i>.
  * This matches the convention used by the CODA DAQ software.
  *
- * @author Jeremy McCormick
+ * @author Jeremy McCormick, SLAC
  */
 final class EvioFileFilter implements FileFilter {
 

Modified: java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/EvioFileList.java
 =============================================================================
--- java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/EvioFileList.java	(original)
+++ java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/EvioFileList.java	Fri Jun  5 17:09:45 2015
@@ -14,7 +14,7 @@
  * This is a list of <code>File</code> objects that are assumed to be EVIO files. There are some added utilities for getting the total number of
  * events in all the files.
  *
- * @author Jeremy McCormick
+ * @author Jeremy McCormick, SLAC
  */
 final class EvioFileList extends ArrayList<File> {
 

Modified: java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/EvioFileSequenceComparator.java
 =============================================================================
--- java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/EvioFileSequenceComparator.java	(original)
+++ java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/EvioFileSequenceComparator.java	Fri Jun  5 17:09:45 2015
@@ -6,7 +6,7 @@
 /**
  * Compare two EVIO files by their sequence numbers.
  *
- * @author Jeremy McCormick
+ * @author Jeremy McCormick, SLAC
  */
 final class EvioFileSequenceComparator implements Comparator<File> {
 

Modified: java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/EvioFileUtilities.java
 =============================================================================
--- java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/EvioFileUtilities.java	(original)
+++ java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/EvioFileUtilities.java	Fri Jun  5 17:09:45 2015
@@ -16,7 +16,7 @@
 /**
  * A miscellaneous collection of EVIO file utility methods used by classes in the crawler package.
  *
- * @author Jeremy McCormick
+ * @author Jeremy McCormick, SLAC
  */
 final class EvioFileUtilities {
 
@@ -31,10 +31,10 @@
     private static final long MILLISECONDS = 1000L;
 
     /**
-     * Get a cached file path assuming that the input file is on the JLAB MSS.
+     * Get a cached file path, assuming that the input file path is on the JLAB MSS e.g. it starts with "/mss".
      *
      * @param file the MSS file path
-     * @return the cached file path
+     * @return the cached file path (prepends "/cache" to the path)
      * @throws IllegalArgumentException if the file is not on the MSS (e.g. path does not start with "/mss")
      */
     static File getCachedFile(final File file) {

Modified: java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/EvioFileVisitor.java
 =============================================================================
--- java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/EvioFileVisitor.java	(original)
+++ java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/EvioFileVisitor.java	Fri Jun  5 17:09:45 2015
@@ -16,9 +16,11 @@
 import org.lcsim.util.log.LogUtil;
 
 /**
- * A file visitor that crawls directories looking for EVIO files.
+ * A file visitor that crawls directories for EVIO files.
+ * <p>
+ * It updates a run log to keep track of which files are associated with which run numbers.
  *
- * @author Jeremy McCormick
+ * @author Jeremy McCormick, SLAC
  */
 final class EvioFileVisitor extends SimpleFileVisitor<Path> {
 

Modified: java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/JCacheManager.java
 =============================================================================
--- java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/JCacheManager.java	(original)
+++ java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/JCacheManager.java	Fri Jun  5 17:09:45 2015
@@ -22,29 +22,27 @@
 /**
  * Utility class for using the <i>jcache</i> command at JLAB.
  *
- * @author Jeremy McCormick
+ * @author Jeremy McCormick, SLAC
  */
 final class JCacheManager {
 
     /**
      * Keeps track of cache status for a single file.
-     *
-     * @author Jeremy McCormick
      */
     static class CacheStatus {
 
         /**
-         * Flag indicating if file is cached yet.
+         * Flag indicating if the file is cached yet.
          */
         private boolean cached = false;
 
         /**
-         * Path to the file on the MSS (not the cached path).
+         * Path to the file on the MSS.
          */
         private File file = null;
 
         /**
-         * The request ID from the 'jcache submit' command.
+         * The request ID from executing the 'jcache submit' command.
          */
         private Integer requestId = null;
 
@@ -301,7 +299,7 @@
     /**
      * Submit cache request for every file in a list.
      *
-     * @param files
+     * @param files the list of files
      */
     void cache(final List<File> files) {
         for (final File file : files) {
@@ -354,7 +352,7 @@
     }
 
     /**
-     * Clear all cache statuses.
+     * Clear all cache statuses, which means files are no longer tracked by this manager.
      */
     void clear() {
         this.cacheStatuses.clear();

Modified: java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/RunFilter.java
 =============================================================================
--- java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/RunFilter.java	(original)
+++ java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/RunFilter.java	Fri Jun  5 17:09:45 2015
@@ -7,7 +7,7 @@
 /**
  * A filter which rejects files that have a run number not in the accept list.
  *
- * @author Jeremy McCormick
+ * @author Jeremy McCormick, SLAC
  */
 final class RunFilter implements FileFilter {
 

Modified: java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/RunLog.java
 =============================================================================
--- java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/RunLog.java	(original)
+++ java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/RunLog.java	Fri Jun  5 17:09:45 2015
@@ -16,7 +16,7 @@
  * <p>
  * This class is able to update the run database using the <code>insert</code> methods.
  *
- * @author Jeremy McCormick
+ * @author Jeremy McCormick, SLAC
  */
 final class RunLog {
 

Modified: java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/RunLogUpdater.java
 =============================================================================
--- java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/RunLogUpdater.java	(original)
+++ java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/RunLogUpdater.java	Fri Jun  5 17:09:45 2015
@@ -1,11 +1,7 @@
 package org.hps.record.evio.crawler;
 
-import java.io.File;
 import java.sql.Connection;
-import java.sql.PreparedStatement;
-import java.sql.ResultSet;
 import java.sql.SQLException;
-import java.util.List;
 import java.util.logging.Level;
 import java.util.logging.Logger;
 
@@ -15,7 +11,7 @@
 /**
  * Updates the run database with run log information from crawler job.
  * 
- * @author Jeremy McCormick
+ * @author Jeremy McCormick, SLAC
  */
 public class RunLogUpdater {
     
@@ -24,11 +20,13 @@
      */
     private static final Logger LOGGER = LogUtil.create(RunLogUpdater.class);
     
-    RunLog runLog;
+    private RunLog runLog;
     
-    final Connection connection;
+    private final Connection connection;
+    
+    private boolean allowUpdates = false;
         
-    RunLogUpdater(RunLog runLog) {
+    RunLogUpdater(RunLog runLog, boolean allowUpdates) {
         this.runLog = runLog;
         
         // Create database connection to use in this session.
@@ -43,36 +41,66 @@
             e.printStackTrace();
         }
     }
-            
-    boolean hasRun(int run) {
-        boolean hasRun = false;
-        PreparedStatement statement = null;
-        try {
-            statement = connection.prepareStatement("SELECT run from run_log where run = ?");
-            statement.setInt(1, run);
-            ResultSet rs = statement.executeQuery();
-            if (rs.next()) hasRun = true;
-        } catch (final SQLException e) {
-            throw new RuntimeException(e);
-        } 
-        return hasRun;
-    }
     
     /**
-     * Insert all the information from the run log into the run database.
+     * Insert the run summary information into the database.
+     *
+     * @param connection the database connection
+     * @throws SQLException if there is an error querying the database
      */
-    void insert() {
-
+    void insert() throws SQLException {
+        
         LOGGER.info("inserting runs into run_log ...");
         try {
             connection.setAutoCommit(false);
 
-            this.insertRunLog(connection);
+            // Update or insert a row for every run found.
+            for (final Integer run : runLog.getSortedRunNumbers()) {
+                
+                RunSummary runSummary = runLog.getRunSummary(run);
+                
+                LOGGER.info("updating " + runSummary);
+                                
+                RunSummaryUpdater updater = new RunSummaryUpdater(connection, runSummary);      
+                                
+                // Does a row already exist for run?
+                if (updater.runLogExists()) {
+                    LOGGER.info("record for " + run + " exists already");
+                    // Are updates allowed?
+                    if (allowUpdates) {
+                        LOGGER.info("updating existing row in run_log for " + run);
+                        // Update existing row.
+                        updater.updateRunLog();
+                    } else {
+                        // Row exists and updates not allowed which is an error.
+                        throw new RuntimeException("Row already exists for run " + run + " and allowUpdates is false");
+                    }
+                } else {                
+                    
+                    LOGGER.info("inserting new row in run_log for " + run);
+                    
+                    // Insert new record into run_log.
+                    updater.insertRunLog();
+                }
 
-            this.insertFiles(connection);
-
-            connection.commit();
-
+                boolean fileLogExists = updater.fileLogExists();
+                
+                // Are updates disallowed and file log exists?
+                if (!allowUpdates && fileLogExists) {
+                    // File records exist but updates not allowed so this is an error.
+                    throw new RuntimeException("Cannot delete existing file records because allowUpdates is false");                    
+                }
+                
+                // Delete existing file log.
+                if (fileLogExists) {
+                    // Delete the file log.
+                    updater.deleteFileLog();
+                }
+                
+                // Insert the file log.
+                updater.insertFileLog();
+            }
+            
         } catch (final SQLException e) {
             LOGGER.log(Level.SEVERE, "rolling back transaction", e);
             try {
@@ -83,69 +111,10 @@
         } finally {
             try {
                 connection.setAutoCommit(true);
+                connection.close();
             } catch (final SQLException e) {
                 throw new RuntimeException(e);
-            }            
+            }
         }
-    }
-
-    /**
-     * Insert the file lists into the run database.
-     *
-     * @param connection the database connection
-     * @throws SQLException if there is an error executing the SQL query
-     */
-    private void insertFiles(final Connection connection) throws SQLException {
-        for (final int run : runLog.getSortedRunNumbers()) {
-            insertFiles(connection, run, runLog.getRunSummary(run).getEvioFileList());
-        }
-    }
-
-    /**
-     * Insert the run summary information into the database.
-     *
-     * @param connection the database connection
-     * @throws SQLException if there is an error querying the database
-     */
-    private void insertRunLog(final Connection connection) throws SQLException {
-        PreparedStatement runLogStatement = null;
-        runLogStatement = connection
-                .prepareStatement("INSERT INTO run_log (run, start_date, end_date, nevents, nfiles, end_ok, last_updated) VALUES(?, ?, ?, ?, ?, ?, NOW())");
-        for (final Integer run : runLog.getSortedRunNumbers()) {
-            LOGGER.info("preparing to insert run " + run + " into database ..");
-            final RunSummary runSummary = runLog.getRunSummary(run);
-            runLogStatement.setInt(1, run);
-            runLogStatement.setTimestamp(2, new java.sql.Timestamp(runSummary.getStartDate().getTime()));
-            runLogStatement.setTimestamp(3, new java.sql.Timestamp(runSummary.getEndDate().getTime()));
-            runLogStatement.setInt(4, runSummary.getTotalEvents());
-            runLogStatement.setInt(5, runSummary.getEvioFileList().size());
-            runLogStatement.setBoolean(6, runSummary.isEndOkay());
-            runLogStatement.executeUpdate();
-            LOGGER.info("committed run " + run + " to run_log");
-        }
-        LOGGER.info("run_log was updated!");
-    }
-    
-    /**
-     * Insert the file names into the run database.
-     *
-     * @param connection the database connection
-     * @param run the run number
-     * @throws SQLException if there is a problem executing one of the database queries
-     */
-    void insertFiles(final Connection connection, final int run, List<File> files) throws SQLException {
-        LOGGER.info("updating file list ...");
-        PreparedStatement filesStatement = null;
-        filesStatement = connection.prepareStatement("INSERT INTO run_log_files (run, directory, name) VALUES(?, ?, ?)");
-        LOGGER.info("inserting files from run " + run + " into database");
-        for (final File file : files) {
-            LOGGER.info("creating update statement for " + file.getPath());
-            filesStatement.setInt(1, run);
-            filesStatement.setString(2, file.getParentFile().getPath());
-            filesStatement.setString(3, file.getName());
-            LOGGER.info("executing statement: " + filesStatement);
-            filesStatement.executeUpdate();
-        }
-        LOGGER.info("run_log_files was updated!");
-    }    
+    }             
 }

Modified: java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/RunProcessor.java
 =============================================================================
--- java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/RunProcessor.java	(original)
+++ java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/RunProcessor.java	Fri Jun  5 17:09:45 2015
@@ -24,7 +24,7 @@
  * <p>
  * There is also a list of processors which is run on all events from the run, if the processor list is not empty.
  *
- * @author Jeremy McCormick
+ * @author Jeremy McCormick, SLAC
  */
 final class RunProcessor {
 
@@ -120,7 +120,8 @@
         List<File> files = this.runSummary.getEvioFileList();
         if (this.maxFiles != -1) {
             LOGGER.info("limiting processing to first " + this.maxFiles + " files from max files setting");
-            files = files.subList(0, this.maxFiles - 1);
+            files = files.subList(0, this.maxFiles);
+            LOGGER.info("using file list with size " + files.size());
         }
         return files;
     }
@@ -195,7 +196,6 @@
      * @throws IOException if there is some kind of IO error
      * @throws Exception if there is a generic error thrown by event processing
      */
-    // FIXME: I think this method is terribly inefficient right now.
     private void process(final File file) throws EvioException, IOException, Exception {
         LOGGER.fine("processing " + file.getPath() + " ...");
 
@@ -240,7 +240,6 @@
 
             // Check if END event is present if this is the last file in the run.
             if (file.equals(this.runSummary.getEvioFileList().last())) {
-                LOGGER.info("checking end okay ...");
                 final boolean endOkay = this.isEndOkay(reader);
                 this.runSummary.setEndOkay(endOkay);
                 LOGGER.info("endOkay set to " + endOkay);

Modified: java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/RunSummary.java
 =============================================================================
--- java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/RunSummary.java	(original)
+++ java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/RunSummary.java	Fri Jun  5 17:09:45 2015
@@ -10,7 +10,8 @@
 import org.lcsim.util.log.LogUtil;
 
 /**
- * This class models the run summary information which is persisted as one record in the <i>run_log</i> table.
+ * This class models the run summary information which is persisted as a row in the <i>run_log</i> table
+ * of the run database.
  * <p>
  * This information includes:
  * <ul>
@@ -20,9 +21,10 @@
  * <li>total number of events across all files in the run</li>
  * <li>number of files found belonging to the run</li>
  * <li>whether the EVIO END event was found</li>
+ * <li>whether the run is considered good</li>
  * </ul>
  *
- * @author <a href="mailto:[log in to unmask]">Jeremy McCormick</a>
+ * @author Jeremy McCormick, SLAC
  */
 final class RunSummary {
 
@@ -245,4 +247,9 @@
     void sortFiles() {
         this.files.sort();
     }
+    
+    public String toString() {
+        return "RunSummary { run: " + this.run + ", started: " + this.getStartDate() + ", ended: " + this.getEndDate() + ", events: " 
+                + this.getTotalEvents() + ", endOkay: " + endOkay + " }";
+    }
 }

Added: java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/RunSummaryUpdater.java
 =============================================================================
--- java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/RunSummaryUpdater.java	(added)
+++ java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/RunSummaryUpdater.java	Fri Jun  5 17:09:45 2015
@@ -0,0 +1,181 @@
+package org.hps.record.evio.crawler;
+
+import java.io.File;
+import java.sql.Connection;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.util.List;
+import java.util.logging.Logger;
+
+import org.lcsim.util.log.LogUtil;
+
+/**
+ * Utilities for updating or insert a run summary row into the run_log table.
+ * 
+ * @author Jeremy McCormick, SLAC
+ */
+public class RunSummaryUpdater {
+
+    /**
+     * Setup logging.
+     */
+    private static final Logger LOGGER = LogUtil.create(RunSummaryUpdater.class);
+    
+    /**
+     * The run summary to update or insert.
+     */
+    private RunSummary runSummary;   
+    
+    /**
+     * The database connection.
+     */
+    private Connection connection;
+    
+    /**
+     * The run number (read from the summary in the constructor for convenience).
+     */
+    private int run = -1;
+    
+    /**
+     * Create a <code>RunSummaryUpdater</code> for the given <code>RunSummary</code>.
+     * 
+     * @param connection the database connection
+     * @param runSummary the run summary to update or insert
+     */
+    RunSummaryUpdater(Connection connection, RunSummary runSummary) {
+
+        if (connection == null) {
+            throw new IllegalArgumentException("connection is null");
+        }
+        this.connection = connection;
+        
+        if (runSummary == null) {
+            throw new IllegalArgumentException("runSummary is null");
+        }
+        this.runSummary = runSummary;
+        
+        this.run = this.runSummary.getRun();
+    }
+    
+    /**
+     * Execute a SQL update to modify an existing row in the database.
+     * 
+     * @throws SQLException if there is an error executing the SQL statement
+     */
+    void updateRunLog() throws SQLException {
+        
+        PreparedStatement runLogStatement = null;
+        runLogStatement = 
+                connection.prepareStatement("UPDATE run_log SET start_date = ?, end_date = ?, nevents = ?, nfiles = ?, end_ok = ? where run = ?");        
+        LOGGER.info("preparing to update run " + run + " in run_log ..");
+        runLogStatement.setTimestamp(1, new java.sql.Timestamp(runSummary.getStartDate().getTime()));
+        runLogStatement.setTimestamp(2, new java.sql.Timestamp(runSummary.getEndDate().getTime()));
+        runLogStatement.setInt(3, runSummary.getTotalEvents());
+        runLogStatement.setInt(4, runSummary.getEvioFileList().size());
+        runLogStatement.setBoolean(5, runSummary.isEndOkay());
+        runLogStatement.setInt(6, run);
+        runLogStatement.executeUpdate();
+        connection.commit();
+        LOGGER.info("run " + run + " was updated in run_log");
+    }
+    
+    /**
+     * Insert a new row in the run_log table.
+     *
+     * @param connection the database connection
+     * @throws SQLException if there is an error querying the database
+     */
+    void insertRunLog() throws SQLException {
+        PreparedStatement statement = 
+                connection.prepareStatement("INSERT INTO run_log (run, start_date, end_date, nevents, nfiles, end_ok) VALUES(?, ?, ?, ?, ?, ?)");
+        LOGGER.info("preparing to insert run " + run + " into run_log ..");
+        statement.setInt(1, run);
+        statement.setTimestamp(2, new java.sql.Timestamp(runSummary.getStartDate().getTime()));
+        statement.setTimestamp(3, new java.sql.Timestamp(runSummary.getEndDate().getTime()));
+        statement.setInt(4, runSummary.getTotalEvents());
+        statement.setInt(5, runSummary.getEvioFileList().size());
+        statement.setBoolean(6, runSummary.isEndOkay());
+        statement.executeUpdate();
+        connection.commit();
+        LOGGER.info("insert run " + run + " to run_log");
+    }
+    
+    /**
+     * Return <code>true</code> if there is an existing row for this run summary.
+     * 
+     * @return <code>true</code> if there is an existing row for this run summary.
+     * @throws SQLException if there is an error executing the SQL query
+     */
+    boolean runLogExists() throws SQLException {
+        PreparedStatement s = connection.prepareStatement("SELECT run from run_log where run = ?");
+        s.setInt(1, run);        
+        ResultSet rs = s.executeQuery();
+        return rs.first();
+    }
+    
+    /**
+     * Insert the file names into the run database.    
+     *
+     * @param connection the database connection
+     * @param run the run number
+     * @throws SQLException if there is a problem executing one of the database queries
+     */
+    void insertFileLog() throws SQLException {
+        LOGGER.info("updating file list ...");
+        PreparedStatement filesStatement = null;
+        filesStatement = connection.prepareStatement("INSERT INTO run_log_files (run, directory, name) VALUES(?, ?, ?)");
+        LOGGER.info("inserting files from run " + run + " into database");
+        for (final File file : runSummary.getEvioFileList()) {
+            LOGGER.info("creating update statement for " + file.getPath());
+            filesStatement.setInt(1, run);
+            filesStatement.setString(2, file.getParentFile().getPath());
+            filesStatement.setString(3, file.getName());
+            LOGGER.info("executing statement: " + filesStatement);
+            filesStatement.executeUpdate();
+        }
+        connection.commit();
+        LOGGER.info("run_log_files was updated!");
+    }    
+    
+    /**
+     * Delete the records of the files associated to this run.
+     * 
+     * @param files the list of files
+     * @throws SQLException if there is an error executing the SQL query
+     */
+    void deleteFileLog() throws SQLException {
+        LOGGER.info("deleting run_log_files for " + run + " ...");
+        PreparedStatement s = connection.prepareStatement("DELETE FROM run_log_files where run = ?");
+        s.setInt(1, run);
+        s.executeUpdate();
+        connection.commit();
+        LOGGER.info("done deleting run_log_files for " + run);
+    }
+    
+    /**
+     * Delete the row for this run from the run_log table.
+     * 
+     * @throws SQLException if there is an error executing the SQL query
+     */
+    void deleteRunLog() throws SQLException {
+        LOGGER.info("deleting run_log for " + run + " ...");
+        PreparedStatement s = connection.prepareStatement("DELETE FROM run_log where run = ?");
+        s.setInt(1, run);
+        s.executeUpdate();
+        connection.commit();
+        LOGGER.info("done deleting run_log_files for " + run);
+    }
+    
+    /**
+     * Return <code>true</code> if there is a row for at least one file for the run.
+     * @return <code>true</code> if there are file rows for this run
+     * @throws SQLException if there is an error executing the SQL query
+     */
+    boolean fileLogExists() throws SQLException {
+        PreparedStatement s = connection.prepareStatement("SELECT run FROM run_log_files where run = ?");
+        s.setInt(1, run);        
+        ResultSet rs = s.executeQuery();
+        return rs.first();
+    }
+}

Modified: java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/package-info.java
 =============================================================================
--- java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/package-info.java	(original)
+++ java/trunk/record-util/src/main/java/org/hps/record/evio/crawler/package-info.java	Fri Jun  5 17:09:45 2015
@@ -1,6 +1,6 @@
 /**
  * Implements an EVIO file crawler for extracting run and configuration information, including run start and end dates, event counts, etc.
  *
- * @author Jeremy McCormick
+ * @author Jeremy McCormick, SLAC
  */
 package org.hps.record.evio.crawler;