Print

Print


Author: [log in to unmask]
Date: Fri May  1 17:54:41 2015
New Revision: 2880

Log:
More work on EVIO file crawler.

Modified:
    java/trunk/users/src/main/java/org/hps/users/jeremym/EvioFileScanner.java

Modified: java/trunk/users/src/main/java/org/hps/users/jeremym/EvioFileScanner.java
 =============================================================================
--- java/trunk/users/src/main/java/org/hps/users/jeremym/EvioFileScanner.java	(original)
+++ java/trunk/users/src/main/java/org/hps/users/jeremym/EvioFileScanner.java	Fri May  1 17:54:41 2015
@@ -3,25 +3,33 @@
 import java.io.File;
 import java.io.FileFilter;
 import java.io.IOException;
+import java.io.PrintStream;
 import java.nio.file.FileVisitOption;
 import java.nio.file.FileVisitResult;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.SimpleFileVisitor;
 import java.nio.file.attribute.BasicFileAttributes;
+import java.sql.Connection;
+import java.sql.PreparedStatement;
+import java.sql.SQLException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.Date;
 import java.util.EnumSet;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
+import java.util.Set;
+import java.util.logging.Level;
 import java.util.logging.Logger;
 
 import org.apache.commons.cli.CommandLine;
 import org.apache.commons.cli.Options;
 import org.apache.commons.cli.ParseException;
 import org.apache.commons.cli.PosixParser;
+import org.hps.conditions.database.ConnectionParameters;
 import org.hps.record.evio.EvioEventConstants;
 import org.hps.record.evio.EvioEventUtilities;
 import org.jlab.coda.jevio.BaseStructure;
@@ -30,40 +38,56 @@
 import org.jlab.coda.jevio.EvioReader;
 import org.lcsim.util.log.LogUtil;
 
+/**
+ * A utility for scanning EVIO files by run.
+ *
+ * @author <a href="mailto:[log in to unmask]">Jeremy McCormick</a>
+ */
 // TODO:
 //
-// Info from files:
-// -run number
-// -list of files
-// -start date (from PRESTART)
-// -end date (from END)
-// -total number of events
+// tasks per run:
 //
-// -get trigger config
-// -get SVT config
-//
-// Command line args:
-
-// -start and end run number filter (outside range will be excluded)
-// -list of run numbers (not in list will be excluded)
-// -output timestamp file (when dir walk ends)
-// -list of "tasks" to execute for each EVIO file (register run data, put into file catalog, etc.)
-// -caching to cache first/last files or all files for run
+// -get list of files in run
+// -cache all files from run to /cache/mss
+// -get start and end dates
+// -get total number of events
+// -dump trigger config
+// -dump SVT config
+// -check for missing or corrupt files in run
+// -update run database
 public class EvioFileScanner {
 
+    static class DateFilter implements FileFilter {
+
+        Date date;
+
+        DateFilter(final Date date) {
+            this.date = date;
+        }
+
+        @Override
+        public boolean accept(final File pathname) {
+            BasicFileAttributes attr = null;
+            try {
+                attr = Files.readAttributes(pathname.toPath(), BasicFileAttributes.class);
+            } catch (final IOException e) {
+                throw new RuntimeException("Error getting file attributes.", e);
+            }
+            return attr.creationTime().toMillis() > this.date.getTime();
+        }
+    }
+
     static class EvioFileList extends ArrayList<File> {
 
-        public File first() {
-            return this.get(0);
-        }
-
-        public int getTotalEvents() {
-            int totalEvents = 0;
+        int totalEvents = 0;
+
+        void computeTotalEvents() {
+            this.totalEvents = 0;
             for (final File file : this) {
                 EvioReader reader = null;
                 try {
-                    reader = new EvioReader(file, false);
-                    totalEvents += reader.getEventCount();
+                    reader = open(file);
+                    this.totalEvents += reader.getEventCount();
                 } catch (EvioException | IOException e) {
                     throw new RuntimeException(e);
                 } finally {
@@ -76,14 +100,21 @@
                     }
                 }
             }
-            return totalEvents;
-        }
-
-        public File last() {
+        }
+
+        File first() {
+            return this.get(0);
+        }
+
+        int getTotalEvents() {
+            return this.totalEvents;
+        }
+
+        File last() {
             return this.get(this.size() - 1);
         }
 
-        public void sort() {
+        void sort() {
             final List<File> fileList = new ArrayList<File>(this);
             Collections.sort(fileList, new EvioFileSequenceComparator());
             this.clear();
@@ -101,47 +132,102 @@
         }
     }
 
-    // hps_005077.evio.20
     static class EvioFileVisitor extends SimpleFileVisitor<Path> {
 
-        static SimpleEvioFileFilter FILTER = new SimpleEvioFileFilter();
-
-        FileRunMap runMap = new FileRunMap();
-
-        FileRunMap getRunMap() {
-            return this.runMap;
+        boolean cache = false;
+
+        List<FileFilter> filters = new ArrayList<FileFilter>();
+
+        RunLog runs = new RunLog();
+
+        EvioFileVisitor() {
+            addFilter(new EvioFilter());
+        }
+
+        boolean accept(final File file) {
+            boolean accept = true;
+            for (final FileFilter filter : this.filters) {
+                accept = filter.accept(file);
+                if (accept == false) {
+                    LOGGER.fine(filter.getClass().getSimpleName() + " rejected file: " + file.getPath());
+                    break;
+                }
+            }
+            return accept;
+        }
+
+        void addFilter(final FileFilter filter) {
+            this.filters.add(filter);
+            LOGGER.config("added filter: " + filter.getClass().getSimpleName());
+        }
+
+        RunLog getRunLog() {
+            return this.runs;
+        }
+
+        void setCache(final boolean cache) {
+            this.cache = cache;
         }
 
         @Override
         public FileVisitResult visitFile(final Path path, final BasicFileAttributes attrs) {
 
             final File file = path.toFile();
-            if (FILTER.accept(file)) {
-                LOGGER.info("found EVIO file " + file.getPath());
-
-                final Integer runNumber = getRunNumber(file);
-                final Integer sequenceNumber = getSequenceNumber(file);
-
-                LOGGER.info("run number " + runNumber);
-                LOGGER.info("sequence number " + sequenceNumber);
-
-                this.runMap.get(runNumber).add(file);
+            if (accept(file)) {
+                LOGGER.info("accepted EVIO file: " + file.getPath());
+
+                final Integer run = getRunFromName(file);
+                final Integer sequence = getSequenceNumber(file);
+
+                LOGGER.info("run: " + run);
+                LOGGER.info("sequence: " + sequence);
+
+                this.runs.get(run).addFile(file);
+            } else {
+                LOGGER.fine("rejected file: " + file.getPath());
             }
             return FileVisitResult.CONTINUE;
         }
-
-    }
-
-    static class FileRunMap extends HashMap<Integer, EvioFileList> {
+    }
+
+    static class EvioFilter implements FileFilter {
 
         @Override
-        public EvioFileList get(final Object key) {
+        public boolean accept(final File pathname) {
+            return pathname.getName().contains(".evio");
+        }
+    }
+
+    static class RunFilter implements FileFilter {
+        Set<Integer> acceptRuns;
+
+        RunFilter(final Set<Integer> acceptRuns) {
+            this.acceptRuns = acceptRuns;
+        }
+
+        @Override
+        public boolean accept(final File file) {
+            return this.acceptRuns.contains(getRunFromName(file));
+        }
+    }
+
+    static class RunLog extends HashMap<Integer, RunSummary> {
+
+        void computeTotalEvents() {
+            for (final RunSummary runSummary : this.values()) {
+                runSummary.computeTotalEvents();
+            }
+        }
+
+        @Override
+        public RunSummary get(final Object key) {
             if (!this.containsKey(key)) {
                 if (!(key instanceof Integer)) {
                     throw new IllegalArgumentException("The key argument has bad type.");
                 }
                 if (super.get(key) == null) {
-                    this.put(Integer.class.cast(key), new EvioFileList());
+                    final int run = Integer.class.cast(key);
+                    this.put(Integer.class.cast(key), new RunSummary(run));
                 }
             }
             return super.get(key);
@@ -153,56 +239,125 @@
             return runList;
         }
 
+        void printRunSummaries() {
+            for (final int run : this.keySet()) {
+                this.get(run).printRunSummary(System.out);
+            }
+        }
+
+        void sortAllFiles() {
+            for (final Integer run : this.keySet()) {
+                this.get(run).sortFiles();
+            }
+        }
+    }
+
+    static class RunSummary {
+
+        EvioFileList files = new EvioFileList();
+        int run;
+
+        RunSummary(final int run) {
+            this.run = run;
+        }
+
+        void addFile(final File file) {
+            this.files.add(file);
+        }
+
+        void computeTotalEvents() {
+            this.files.computeTotalEvents();
+        }
+
+        EvioFileList getFiles() {
+            return this.files;
+        }
+
+        Date getRunEnd() {
+            return EvioFileScanner.getRunEnd(this.files.last());
+        }
+
+        Date getRunStart() {
+            return EvioFileScanner.getRunStart(this.files.first());
+        }
+
+        int getTotalEvents() {
+            return this.files.getTotalEvents();
+        }
+
+        boolean isEndOkay() {
+            LOGGER.info("checking is END okay ...");
+            boolean isEndOkay = false;
+            final File lastFile = this.files.last();
+            EvioReader reader = null;
+            try {
+                reader = open(lastFile);
+                reader.gotoEventNumber(reader.getEventCount() - 5);
+                EvioEvent event = null;
+                while ((event = reader.parseNextEvent()) != null) {
+                    if (event.getHeader().getTag() == EvioEventConstants.END_EVENT_TAG) {
+                        isEndOkay = true;
+                        break;
+                    }
+                }
+            } catch (EvioException | IOException e) {
+                throw new RuntimeException(e);
+            } finally {
+                if (reader != null) {
+                    try {
+                        reader.close();
+                    } catch (final IOException e) {
+                        e.printStackTrace();
+                    }
+                }
+            }
+            return isEndOkay;
+        }
+
+        void printRunSummary(final PrintStream ps) {
+            ps.println("--------------------------------------------");
+            ps.println("run: " + this.run);
+            ps.println("first file: " + this.files.first());
+            ps.println("last file: " + this.files.last());
+            ps.println("started: " + getRunStart());
+            ps.println("ended: " + getRunEnd());
+            ps.println("total events: " + this.files.getTotalEvents());
+            ps.println("files: " + this.files.size());
+            for (final File file : this.files) {
+                ps.println(file.getPath());
+            }
+        }
+
         void sortFiles() {
-            for (final Integer run : this.keySet()) {
-                this.get(run).sort();
-            }
-        }
-    }
-
-    static class SimpleEvioFileFilter implements FileFilter {
-
-        @Override
-        public boolean accept(final File pathname) {
-            return pathname.getName().contains(".evio");
+            this.files.sort();
         }
     }
 
     private static final Logger LOGGER = LogUtil.create(EvioFileVisitor.class);
-
-    static final long MILLISECONDS = 1000L;
+    private static final long MILLISECONDS = 1000L;
 
     private static final Options OPTIONS = new Options();
 
     static {
-        OPTIONS.addOption("t", "timestamp", true, "timestamp file");
-        OPTIONS.addOption("d", "dir", true, "starting directory");
-    }
-
-    static Integer getRunNumber(final File file) {
-        final String name = file.getName();
-        // FIXME: Better way would be opening the file and getting the PRESTART or head bank value of run.
-        return Integer.parseInt(name.substring(0, name.indexOf(".")).replace("hps_", "").replace("cosmic_", ""));
-    }
-
-    static Integer getSequenceNumber(final File file) {
-        final String name = file.getName();
-        return Integer.parseInt(name.substring(name.lastIndexOf(".") + 1));
-    }
-
-    public static void main(final String[] args) {
-        new EvioFileScanner().parse(args).run();
-    }
-
-    final PosixParser parser = new PosixParser();
-
-    File rootDir = new File(System.getProperty("user.dir"));
-
-    Date getDate(final File file, final int eventTag, final int gotoEvent) {
+        LOGGER.setLevel(Level.ALL);
+    }
+
+    static {
+        OPTIONS.addOption("t", "timestamp-file", true,
+                "timestamp file for date filtering; modified time will be set at end of job");
+        OPTIONS.addOption("d", "directory", true, "starting directory");
+        OPTIONS.addOption("r", "runs", true, "list of runs to accept (others will be excluded)");
+        OPTIONS.addOption("c", "cache", false, "cache files to /cache/mss from MSS (only works at JLAB)");
+        OPTIONS.addOption("p", "print", false, "print run summary at end of job");
+        OPTIONS.addOption("L", "log-level", true, "set log level (INFO, FINE, etc.)");
+        OPTIONS.addOption("u", "update", false, "update the run database");
+    }
+
+    static Date getDate(final File file, final int eventTag, final int gotoEvent) {
         Date date = null;
         EvioReader reader = null;
         try {
-            reader = new EvioReader(file.getPath(), false);
+            reader = open(file);
             EvioEvent event;
             if (gotoEvent > 0) {
                 reader.gotoEventNumber(gotoEvent);
@@ -213,7 +368,6 @@
                 if (event.getHeader().getTag() == eventTag) {
                     final int[] data = EvioEventUtilities.getControlEventData(event);
                     final long seconds = data[0];
-                    System.out.printf("control: %d %d %d %n", data[0], data[1], data[2]);
                     date = new Date(seconds * MILLISECONDS);
                     break;
                 }
@@ -232,32 +386,34 @@
         return date;
     }
 
-    Date getHeadBankDate(final EvioEvent event) {
+    static Date getHeadBankDate(final EvioEvent event) {
         Date date = null;
         final BaseStructure headBank = EvioEventUtilities.getHeadBank(event);
         if (headBank != null) {
             final int[] data = headBank.getIntData();
             final long time = data[3];
-            System.out.printf("head bank: %d %d %d %d %d%n", data[0], data[1], data[2], data[3], data[4]);
-            System.out.println("time from head bank: " + time);
-            date = new Date(time);
+            if (time != 0L) {
+                date = new Date(time * MILLISECONDS);
+            }
         }
         return date;
     }
 
-    Date getRunEnd(final File file) {
-        System.out.println("getRunEnd");
-        Date date = this.getDate(file, EvioEventConstants.END_EVENT_TAG, -10);
+    static Date getRunEnd(final File file) {
+        Date date = getDate(file, EvioEventConstants.END_EVENT_TAG, -10);
         if (date == null) {
-            System.out.println("END tag not found; looking at last event ...");
             EvioReader reader = null;
             try {
-                reader = new EvioReader(file.getPath(), false);
-                System.out.println("event count: " + reader.getEventCount());
-                final EvioEvent lastEvent = reader.getEvent(reader.getEventCount() - 1);
-                reader.parseEvent(lastEvent);
-                System.out.println("getting date from last event " + lastEvent.getEventNumber());
-                date = this.getHeadBankDate(lastEvent);
+                reader = open(file);
+                reader.gotoEventNumber(reader.getEventCount() - 11);
+                EvioEvent event = null;
+                while ((event = reader.parseNextEvent()) != null) {
+                    if (EvioEventUtilities.isPhysicsEvent(event)) {
+                        if ((date = getHeadBankDate(event)) != null) {
+                            break;
+                        }
+                    }
+                }
             } catch (EvioException | IOException e) {
                 throw new RuntimeException(e);
             } finally {
@@ -273,19 +429,27 @@
         return date;
     }
 
-    Date getRunStart(final File file) {
-        System.out.println("getRunStart");
-        Date date = this.getDate(file, EvioEventConstants.PRESTART_EVENT_TAG, 0);
+    static Integer getRunFromName(final File file) {
+        final String name = file.getName();
+        final int startIndex = name.lastIndexOf("_") + 1;
+        final int endIndex = name.indexOf(".");
+        return Integer.parseInt(name.substring(startIndex, endIndex));
+    }
+
+    static Date getRunStart(final File file) {
+        Date date = getDate(file, EvioEventConstants.PRESTART_EVENT_TAG, 0);
         if (date == null) {
-            System.out.println("PRESTART not found; looking at first event ...");
             EvioReader reader = null;
             try {
-                reader = new EvioReader(file.getPath(), false);
+                reader = open(file);
                 EvioEvent event = null;
-                while (!EvioEventUtilities.isPhysicsEvent(event = reader.parseNextEvent())) {
-                }
-                System.out.println("looking at head bank of event " + event.getEventNumber());
-                date = this.getHeadBankDate(event);
+                while ((event = reader.parseNextEvent()) != null) {
+                    if (EvioEventUtilities.isPhysicsEvent(event)) {
+                        if ((date = getHeadBankDate(event)) != null) {
+                            break;
+                        }
+                    }
+                }
             } catch (EvioException | IOException e) {
                 throw new RuntimeException(e);
             } finally {
@@ -301,10 +465,59 @@
         return date;
     }
 
-    public EvioFileScanner parse(final String args[]) {
-
+    static Integer getSequenceNumber(final File file) {
+        final String name = file.getName();
+        return Integer.parseInt(name.substring(name.lastIndexOf(".") + 1));
+    }
+
+    public static void main(final String[] args) {
+        new EvioFileScanner().parse(args).run();
+    }
+
+    static EvioReader open(final File file) throws IOException, EvioException {
+        final long start = System.currentTimeMillis();
+        // final EvioReader reader = new EvioReader(file, false, true);
+        final EvioReader reader = new EvioReader(file, false, false);
+        final long end = System.currentTimeMillis() - start;
+        LOGGER.info("opened " + file.getPath() + " in " + end / MILLISECONDS + " seconds");
+        return reader;
+    }
+
+    final Set<Integer> acceptRuns = new HashSet<Integer>();
+
+    final PosixParser parser = new PosixParser();
+
+    boolean printSummary = false;
+
+    File rootDir = new File(System.getProperty("user.dir"));
+
+    Date timestamp = null;
+
+    File timestampFile = null;
+
+    boolean update = false;
+
+    void cache(final File file) {
+        if (!file.getPath().startsWith("/mss")) {
+            throw new IllegalArgumentException("Only files on /mss can be cached.");
+        }
+        try {
+            new ProcessBuilder("jcache", "submit", "default", file.getPath()).start();
+        } catch (final IOException e) {
+            throw new RuntimeException(e);
+        }
+        LOGGER.info("process started to cache " + file.getPath());
+    }
+
+    EvioFileScanner parse(final String args[]) {
         try {
             final CommandLine cl = this.parser.parse(OPTIONS, args);
+
+            if (cl.hasOption("L")) {
+                final Level level = Level.parse(cl.getOptionValue("L"));
+                LOGGER.info("setting log level to " + level);
+                LOGGER.setLevel(level);
+            }
 
             if (cl.hasOption("d")) {
                 this.rootDir = new File(cl.getOptionValue("d"));
@@ -316,6 +529,37 @@
                 }
             }
 
+            if (cl.hasOption("t")) {
+                this.timestampFile = new File(cl.getOptionValue("t"));
+                if (!this.timestampFile.exists()) {
+                    throw new IllegalArgumentException("The timestamp file does not exist: "
+                            + this.timestampFile.getPath());
+                }
+                try {
+                    this.timestamp = new Date(Files
+                            .readAttributes(this.timestampFile.toPath(), BasicFileAttributes.class).lastModifiedTime()
+                            .toMillis());
+                } catch (final IOException e) {
+                    throw new RuntimeException("Error getting attributes of timestamp file.", e);
+                }
+            }
+
+            if (cl.hasOption("r")) {
+                for (final String runString : cl.getOptionValues("r")) {
+                    final Integer acceptRun = Integer.parseInt(runString);
+                    this.acceptRuns.add(acceptRun);
+                    LOGGER.config("added accept run " + acceptRun);
+                }
+            }
+
+            if (cl.hasOption("p")) {
+                this.printSummary = true;
+            }
+
+            if (cl.hasOption("u")) {
+                this.update = true;
+            }
+
         } catch (final ParseException e) {
             throw new RuntimeException("Error parsing options.", e);
         }
@@ -323,27 +567,92 @@
         return this;
     }
 
-    public void run() {
+    void run() {
         final EnumSet<FileVisitOption> options = EnumSet.noneOf(FileVisitOption.class);
         final EvioFileVisitor visitor = new EvioFileVisitor();
+        if (this.timestamp != null) {
+            visitor.addFilter(new DateFilter(this.timestamp));
+            LOGGER.config("added date filter with timestamp " + this.timestamp);
+        }
+        if (!this.acceptRuns.isEmpty()) {
+            visitor.addFilter(new RunFilter(this.acceptRuns));
+            LOGGER.config("added run filter");
+        }
         try {
             Files.walkFileTree(this.rootDir.toPath(), options, Integer.MAX_VALUE, visitor);
         } catch (final IOException e) {
             throw new RuntimeException(e);
         }
 
-        final FileRunMap runMap = visitor.getRunMap();
-        runMap.sortFiles();
-        System.out.println("found files...");
-        for (final Integer run : runMap.getSortedRunNumbers()) {
-            System.out.println();
-            System.out.println("run " + run + " has " + runMap.get(run).size() + " files");
-            final EvioFileList files = runMap.get(run);
-            System.out.println("first file " + files.first());
-            System.out.println("last file " + files.last());
-            System.out.println("started at " + this.getRunStart(files.first()));
-            System.out.println("ended at " + this.getRunEnd(files.last()));
-            System.out.println("total events: " + files.getTotalEvents());
-        }
+        final RunLog runs = visitor.getRunLog();
+
+        LOGGER.fine("sorting files ...");
+        runs.sortAllFiles();
+        LOGGER.fine("compute total events ...");
+        runs.computeTotalEvents();
+
+        if (this.printSummary) {
+            runs.printRunSummaries();
+        }
+
+        if (this.update) {
+            update(runs);
+        }
+
+        if (this.timestampFile == null) {
+            this.timestampFile = new File("timestamp");
+            try {
+                this.timestampFile.createNewFile();
+            } catch (final IOException e) {
+                throw new RuntimeException(e);
+            }
+            LOGGER.info("created new timestamp file: " + this.timestampFile.getPath());
+        }
+        this.timestampFile.setLastModified(System.currentTimeMillis());
+        LOGGER.info("set modified on timestamp file: " + new Date(this.timestampFile.lastModified()));
+    }
+
+    void update(final RunLog runLog) {
+        LOGGER.info("updating database from run log ...");
+        final ConnectionParameters cp = new ConnectionParameters("root", "derp", "hps_run_db", "localhost");
+        Connection connection = null;
+        PreparedStatement runLogStatement = null;
+        try {
+            connection = cp.createConnection();
+            connection.setAutoCommit(false);
+            runLogStatement = connection
+                    .prepareStatement("INSERT INTO run_log (run, start_date, end_date, nevents, nfiles, end_ok, last_updated) VALUES(?, ?, ?, ?, ?, ?, NOW())");
+            for (final Integer run : runLog.getSortedRunNumbers()) {
+                LOGGER.info("inserting run " + run + " into database");
+                final RunSummary runSummary = runLog.get(run);
+                runLogStatement.setInt(1, run);
+                runLogStatement.setTimestamp(2, new java.sql.Timestamp(runSummary.getRunStart().getTime()));
+                runLogStatement.setTimestamp(3, new java.sql.Timestamp(runSummary.getRunEnd().getTime()));
+                runLogStatement.setInt(4, runSummary.getTotalEvents());
+                runLogStatement.setInt(5, runSummary.getFiles().size());
+                runLogStatement.setBoolean(6, runSummary.isEndOkay());
+                runLogStatement.executeUpdate();
+                connection.commit();
+            }
+        } catch (final SQLException e) {
+            LOGGER.log(Level.SEVERE, "rolling back transaction", e);
+            try {
+                connection.rollback();
+            } catch (final SQLException e2) {
+                throw new RuntimeException(e);
+            }
+        } finally {
+            if (connection != null) {
+                try {
+                    connection.setAutoCommit(true);
+                    if (!connection.isClosed()) {
+                        connection.close();
+                    }
+                } catch (final SQLException e) {
+                    e.printStackTrace();
+                }
+            }
+        }
+        LOGGER.info("database was updated!");
     }
 }