Author: [log in to unmask]
Date: Fri May 1 17:54:41 2015
New Revision: 2880
Log:
More work on EVIO file crawler.
Modified:
java/trunk/users/src/main/java/org/hps/users/jeremym/EvioFileScanner.java
Modified: java/trunk/users/src/main/java/org/hps/users/jeremym/EvioFileScanner.java
=============================================================================
--- java/trunk/users/src/main/java/org/hps/users/jeremym/EvioFileScanner.java (original)
+++ java/trunk/users/src/main/java/org/hps/users/jeremym/EvioFileScanner.java Fri May 1 17:54:41 2015
@@ -3,25 +3,33 @@
import java.io.File;
import java.io.FileFilter;
import java.io.IOException;
+import java.io.PrintStream;
import java.nio.file.FileVisitOption;
import java.nio.file.FileVisitResult;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes;
+import java.sql.Connection;
+import java.sql.PreparedStatement;
+import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.EnumSet;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.List;
+import java.util.Set;
+import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.cli.PosixParser;
+import org.hps.conditions.database.ConnectionParameters;
import org.hps.record.evio.EvioEventConstants;
import org.hps.record.evio.EvioEventUtilities;
import org.jlab.coda.jevio.BaseStructure;
@@ -30,40 +38,56 @@
import org.jlab.coda.jevio.EvioReader;
import org.lcsim.util.log.LogUtil;
+/**
+ * A utility for scanning EVIO files by run.
+ *
+ * @author <a href="mailto:[log in to unmask]">Jeremy McCormick</a>
+ */
// TODO:
//
-// Info from files:
-// -run number
-// -list of files
-// -start date (from PRESTART)
-// -end date (from END)
-// -total number of events
+// tasks per run:
//
-// -get trigger config
-// -get SVT config
-//
-// Command line args:
-
-// -start and end run number filter (outside range will be excluded)
-// -list of run numbers (not in list will be excluded)
-// -output timestamp file (when dir walk ends)
-// -list of "tasks" to execute for each EVIO file (register run data, put into file catalog, etc.)
-// -caching to cache first/last files or all files for run
+// -get list of files in run
+// -cache all files from run to /cache/mss
+// -get start and end dates
+// -get total number of events
+// -dump trigger config
+// -dump SVT config
+// -check for missing or corrupt files in run
+// -update run database
public class EvioFileScanner {
+ static class DateFilter implements FileFilter {
+
+ Date date;
+
+ DateFilter(final Date date) {
+ this.date = date;
+ }
+
+ @Override
+ public boolean accept(final File pathname) {
+ BasicFileAttributes attr = null;
+ try {
+ attr = Files.readAttributes(pathname.toPath(), BasicFileAttributes.class);
+ } catch (final IOException e) {
+ throw new RuntimeException("Error getting file attributes.", e);
+ }
+ return attr.creationTime().toMillis() > this.date.getTime();
+ }
+ }
+
static class EvioFileList extends ArrayList<File> {
- public File first() {
- return this.get(0);
- }
-
- public int getTotalEvents() {
- int totalEvents = 0;
+ int totalEvents = 0;
+
+ void computeTotalEvents() {
+ this.totalEvents = 0;
for (final File file : this) {
EvioReader reader = null;
try {
- reader = new EvioReader(file, false);
- totalEvents += reader.getEventCount();
+ reader = open(file);
+ this.totalEvents += reader.getEventCount();
} catch (EvioException | IOException e) {
throw new RuntimeException(e);
} finally {
@@ -76,14 +100,21 @@
}
}
}
- return totalEvents;
- }
-
- public File last() {
+ }
+
+ File first() {
+ return this.get(0);
+ }
+
+ int getTotalEvents() {
+ return this.totalEvents;
+ }
+
+ File last() {
return this.get(this.size() - 1);
}
- public void sort() {
+ void sort() {
final List<File> fileList = new ArrayList<File>(this);
Collections.sort(fileList, new EvioFileSequenceComparator());
this.clear();
@@ -101,47 +132,102 @@
}
}
- // hps_005077.evio.20
static class EvioFileVisitor extends SimpleFileVisitor<Path> {
- static SimpleEvioFileFilter FILTER = new SimpleEvioFileFilter();
-
- FileRunMap runMap = new FileRunMap();
-
- FileRunMap getRunMap() {
- return this.runMap;
+ boolean cache = false;
+
+ List<FileFilter> filters = new ArrayList<FileFilter>();
+
+ RunLog runs = new RunLog();
+
+ EvioFileVisitor() {
+ addFilter(new EvioFilter());
+ }
+
+ boolean accept(final File file) {
+ boolean accept = true;
+ for (final FileFilter filter : this.filters) {
+ accept = filter.accept(file);
+ if (accept == false) {
+ LOGGER.fine(filter.getClass().getSimpleName() + " rejected file: " + file.getPath());
+ break;
+ }
+ }
+ return accept;
+ }
+
+ void addFilter(final FileFilter filter) {
+ this.filters.add(filter);
+ LOGGER.config("added filter: " + filter.getClass().getSimpleName());
+ }
+
+ RunLog getRunLog() {
+ return this.runs;
+ }
+
+ void setCache(final boolean cache) {
+ this.cache = cache;
}
@Override
public FileVisitResult visitFile(final Path path, final BasicFileAttributes attrs) {
final File file = path.toFile();
- if (FILTER.accept(file)) {
- LOGGER.info("found EVIO file " + file.getPath());
-
- final Integer runNumber = getRunNumber(file);
- final Integer sequenceNumber = getSequenceNumber(file);
-
- LOGGER.info("run number " + runNumber);
- LOGGER.info("sequence number " + sequenceNumber);
-
- this.runMap.get(runNumber).add(file);
+ if (accept(file)) {
+ LOGGER.info("accepted EVIO file: " + file.getPath());
+
+ final Integer run = getRunFromName(file);
+ final Integer sequence = getSequenceNumber(file);
+
+ LOGGER.info("run: " + run);
+ LOGGER.info("sequence: " + sequence);
+
+ this.runs.get(run).addFile(file);
+ } else {
+ LOGGER.fine("rejected file: " + file.getPath());
}
return FileVisitResult.CONTINUE;
}
-
- }
-
- static class FileRunMap extends HashMap<Integer, EvioFileList> {
+ }
+
+ static class EvioFilter implements FileFilter {
@Override
- public EvioFileList get(final Object key) {
+ public boolean accept(final File pathname) {
+ return pathname.getName().contains(".evio");
+ }
+ }
+
+ static class RunFilter implements FileFilter {
+ Set<Integer> acceptRuns;
+
+ RunFilter(final Set<Integer> acceptRuns) {
+ this.acceptRuns = acceptRuns;
+ }
+
+ @Override
+ public boolean accept(final File file) {
+ return this.acceptRuns.contains(getRunFromName(file));
+ }
+ }
+
+ static class RunLog extends HashMap<Integer, RunSummary> {
+
+ void computeTotalEvents() {
+ for (final RunSummary runSummary : this.values()) {
+ runSummary.computeTotalEvents();
+ }
+ }
+
+ @Override
+ public RunSummary get(final Object key) {
if (!this.containsKey(key)) {
if (!(key instanceof Integer)) {
throw new IllegalArgumentException("The key argument has bad type.");
}
if (super.get(key) == null) {
- this.put(Integer.class.cast(key), new EvioFileList());
+ final int run = Integer.class.cast(key);
+ this.put(Integer.class.cast(key), new RunSummary(run));
}
}
return super.get(key);
@@ -153,56 +239,125 @@
return runList;
}
+ void printRunSummaries() {
+ for (final int run : this.keySet()) {
+ this.get(run).printRunSummary(System.out);
+ }
+ }
+
+ void sortAllFiles() {
+ for (final Integer run : this.keySet()) {
+ this.get(run).sortFiles();
+ }
+ }
+ }
+
+ static class RunSummary {
+
+ EvioFileList files = new EvioFileList();
+ int run;
+
+ RunSummary(final int run) {
+ this.run = run;
+ }
+
+ void addFile(final File file) {
+ this.files.add(file);
+ }
+
+ void computeTotalEvents() {
+ this.files.computeTotalEvents();
+ }
+
+ EvioFileList getFiles() {
+ return this.files;
+ }
+
+ Date getRunEnd() {
+ return EvioFileScanner.getRunEnd(this.files.last());
+ }
+
+ Date getRunStart() {
+ return EvioFileScanner.getRunStart(this.files.first());
+ }
+
+ int getTotalEvents() {
+ return this.files.getTotalEvents();
+ }
+
+ boolean isEndOkay() {
+ LOGGER.info("checking is END okay ...");
+ boolean isEndOkay = false;
+ final File lastFile = this.files.last();
+ EvioReader reader = null;
+ try {
+ reader = open(lastFile);
+ reader.gotoEventNumber(reader.getEventCount() - 5);
+ EvioEvent event = null;
+ while ((event = reader.parseNextEvent()) != null) {
+ if (event.getHeader().getTag() == EvioEventConstants.END_EVENT_TAG) {
+ isEndOkay = true;
+ break;
+ }
+ }
+ } catch (EvioException | IOException e) {
+ throw new RuntimeException(e);
+ } finally {
+ if (reader != null) {
+ try {
+ reader.close();
+ } catch (final IOException e) {
+ e.printStackTrace();
+ }
+ }
+ }
+ return isEndOkay;
+ }
+
+ void printRunSummary(final PrintStream ps) {
+ ps.println("--------------------------------------------");
+ ps.println("run: " + this.run);
+ ps.println("first file: " + this.files.first());
+ ps.println("last file: " + this.files.last());
+ ps.println("started: " + getRunStart());
+ ps.println("ended: " + getRunEnd());
+ ps.println("total events: " + this.files.getTotalEvents());
+ ps.println("files: " + this.files.size());
+ for (final File file : this.files) {
+ ps.println(file.getPath());
+ }
+ }
+
void sortFiles() {
- for (final Integer run : this.keySet()) {
- this.get(run).sort();
- }
- }
- }
-
- static class SimpleEvioFileFilter implements FileFilter {
-
- @Override
- public boolean accept(final File pathname) {
- return pathname.getName().contains(".evio");
+ this.files.sort();
}
}
private static final Logger LOGGER = LogUtil.create(EvioFileVisitor.class);
-
- static final long MILLISECONDS = 1000L;
+ private static final long MILLISECONDS = 1000L;
private static final Options OPTIONS = new Options();
static {
- OPTIONS.addOption("t", "timestamp", true, "timestamp file");
- OPTIONS.addOption("d", "dir", true, "starting directory");
- }
-
- static Integer getRunNumber(final File file) {
- final String name = file.getName();
- // FIXME: Better way would be opening the file and getting the PRESTART or head bank value of run.
- return Integer.parseInt(name.substring(0, name.indexOf(".")).replace("hps_", "").replace("cosmic_", ""));
- }
-
- static Integer getSequenceNumber(final File file) {
- final String name = file.getName();
- return Integer.parseInt(name.substring(name.lastIndexOf(".") + 1));
- }
-
- public static void main(final String[] args) {
- new EvioFileScanner().parse(args).run();
- }
-
- final PosixParser parser = new PosixParser();
-
- File rootDir = new File(System.getProperty("user.dir"));
-
- Date getDate(final File file, final int eventTag, final int gotoEvent) {
+ LOGGER.setLevel(Level.ALL);
+ }
+
+ static {
+ OPTIONS.addOption("t", "timestamp-file", true,
+ "timestamp file for date filtering; modified time will be set at end of job");
+ OPTIONS.addOption("d", "directory", true, "starting directory");
+ OPTIONS.addOption("r", "runs", true, "list of runs to accept (others will be excluded)");
+ OPTIONS.addOption("c", "cache", false, "cache files to /cache/mss from MSS (only works at JLAB)");
+ OPTIONS.addOption("p", "print", false, "print run summary at end of job");
+ OPTIONS.addOption("L", "log-level", true, "set log level (INFO, FINE, etc.)");
+ OPTIONS.addOption("u", "update", false, "update the run database");
+ }
+
+ static Date getDate(final File file, final int eventTag, final int gotoEvent) {
Date date = null;
EvioReader reader = null;
try {
- reader = new EvioReader(file.getPath(), false);
+ reader = open(file);
EvioEvent event;
if (gotoEvent > 0) {
reader.gotoEventNumber(gotoEvent);
@@ -213,7 +368,6 @@
if (event.getHeader().getTag() == eventTag) {
final int[] data = EvioEventUtilities.getControlEventData(event);
final long seconds = data[0];
- System.out.printf("control: %d %d %d %n", data[0], data[1], data[2]);
date = new Date(seconds * MILLISECONDS);
break;
}
@@ -232,32 +386,34 @@
return date;
}
- Date getHeadBankDate(final EvioEvent event) {
+ static Date getHeadBankDate(final EvioEvent event) {
Date date = null;
final BaseStructure headBank = EvioEventUtilities.getHeadBank(event);
if (headBank != null) {
final int[] data = headBank.getIntData();
final long time = data[3];
- System.out.printf("head bank: %d %d %d %d %d%n", data[0], data[1], data[2], data[3], data[4]);
- System.out.println("time from head bank: " + time);
- date = new Date(time);
+ if (time != 0L) {
+ date = new Date(time * MILLISECONDS);
+ }
}
return date;
}
- Date getRunEnd(final File file) {
- System.out.println("getRunEnd");
- Date date = this.getDate(file, EvioEventConstants.END_EVENT_TAG, -10);
+ static Date getRunEnd(final File file) {
+ Date date = getDate(file, EvioEventConstants.END_EVENT_TAG, -10);
if (date == null) {
- System.out.println("END tag not found; looking at last event ...");
EvioReader reader = null;
try {
- reader = new EvioReader(file.getPath(), false);
- System.out.println("event count: " + reader.getEventCount());
- final EvioEvent lastEvent = reader.getEvent(reader.getEventCount() - 1);
- reader.parseEvent(lastEvent);
- System.out.println("getting date from last event " + lastEvent.getEventNumber());
- date = this.getHeadBankDate(lastEvent);
+ reader = open(file);
+ reader.gotoEventNumber(reader.getEventCount() - 11);
+ EvioEvent event = null;
+ while ((event = reader.parseNextEvent()) != null) {
+ if (EvioEventUtilities.isPhysicsEvent(event)) {
+ if ((date = getHeadBankDate(event)) != null) {
+ break;
+ }
+ }
+ }
} catch (EvioException | IOException e) {
throw new RuntimeException(e);
} finally {
@@ -273,19 +429,27 @@
return date;
}
- Date getRunStart(final File file) {
- System.out.println("getRunStart");
- Date date = this.getDate(file, EvioEventConstants.PRESTART_EVENT_TAG, 0);
+ static Integer getRunFromName(final File file) {
+ final String name = file.getName();
+ final int startIndex = name.lastIndexOf("_") + 1;
+ final int endIndex = name.indexOf(".");
+ return Integer.parseInt(name.substring(startIndex, endIndex));
+ }
+
+ static Date getRunStart(final File file) {
+ Date date = getDate(file, EvioEventConstants.PRESTART_EVENT_TAG, 0);
if (date == null) {
- System.out.println("PRESTART not found; looking at first event ...");
EvioReader reader = null;
try {
- reader = new EvioReader(file.getPath(), false);
+ reader = open(file);
EvioEvent event = null;
- while (!EvioEventUtilities.isPhysicsEvent(event = reader.parseNextEvent())) {
- }
- System.out.println("looking at head bank of event " + event.getEventNumber());
- date = this.getHeadBankDate(event);
+ while ((event = reader.parseNextEvent()) != null) {
+ if (EvioEventUtilities.isPhysicsEvent(event)) {
+ if ((date = getHeadBankDate(event)) != null) {
+ break;
+ }
+ }
+ }
} catch (EvioException | IOException e) {
throw new RuntimeException(e);
} finally {
@@ -301,10 +465,59 @@
return date;
}
- public EvioFileScanner parse(final String args[]) {
-
+ static Integer getSequenceNumber(final File file) {
+ final String name = file.getName();
+ return Integer.parseInt(name.substring(name.lastIndexOf(".") + 1));
+ }
+
+ public static void main(final String[] args) {
+ new EvioFileScanner().parse(args).run();
+ }
+
+ static EvioReader open(final File file) throws IOException, EvioException {
+ final long start = System.currentTimeMillis();
+ // final EvioReader reader = new EvioReader(file, false, true);
+ final EvioReader reader = new EvioReader(file, false, false);
+ final long end = System.currentTimeMillis() - start;
+ LOGGER.info("opened " + file.getPath() + " in " + end / MILLISECONDS + " seconds");
+ return reader;
+ }
+
+ final Set<Integer> acceptRuns = new HashSet<Integer>();
+
+ final PosixParser parser = new PosixParser();
+
+ boolean printSummary = false;
+
+ File rootDir = new File(System.getProperty("user.dir"));
+
+ Date timestamp = null;
+
+ File timestampFile = null;
+
+ boolean update = false;
+
+ void cache(final File file) {
+ if (!file.getPath().startsWith("/mss")) {
+ throw new IllegalArgumentException("Only files on /mss can be cached.");
+ }
+ try {
+ new ProcessBuilder("jcache", "submit", "default", file.getPath()).start();
+ } catch (final IOException e) {
+ throw new RuntimeException(e);
+ }
+ LOGGER.info("process started to cache " + file.getPath());
+ }
+
+ EvioFileScanner parse(final String args[]) {
try {
final CommandLine cl = this.parser.parse(OPTIONS, args);
+
+ if (cl.hasOption("L")) {
+ final Level level = Level.parse(cl.getOptionValue("L"));
+ LOGGER.info("setting log level to " + level);
+ LOGGER.setLevel(level);
+ }
if (cl.hasOption("d")) {
this.rootDir = new File(cl.getOptionValue("d"));
@@ -316,6 +529,37 @@
}
}
+ if (cl.hasOption("t")) {
+ this.timestampFile = new File(cl.getOptionValue("t"));
+ if (!this.timestampFile.exists()) {
+ throw new IllegalArgumentException("The timestamp file does not exist: "
+ + this.timestampFile.getPath());
+ }
+ try {
+ this.timestamp = new Date(Files
+ .readAttributes(this.timestampFile.toPath(), BasicFileAttributes.class).lastModifiedTime()
+ .toMillis());
+ } catch (final IOException e) {
+ throw new RuntimeException("Error getting attributes of timestamp file.", e);
+ }
+ }
+
+ if (cl.hasOption("r")) {
+ for (final String runString : cl.getOptionValues("r")) {
+ final Integer acceptRun = Integer.parseInt(runString);
+ this.acceptRuns.add(acceptRun);
+ LOGGER.config("added accept run " + acceptRun);
+ }
+ }
+
+ if (cl.hasOption("p")) {
+ this.printSummary = true;
+ }
+
+ if (cl.hasOption("u")) {
+ this.update = true;
+ }
+
} catch (final ParseException e) {
throw new RuntimeException("Error parsing options.", e);
}
@@ -323,27 +567,92 @@
return this;
}
- public void run() {
+ void run() {
final EnumSet<FileVisitOption> options = EnumSet.noneOf(FileVisitOption.class);
final EvioFileVisitor visitor = new EvioFileVisitor();
+ if (this.timestamp != null) {
+ visitor.addFilter(new DateFilter(this.timestamp));
+ LOGGER.config("added date filter with timestamp " + this.timestamp);
+ }
+ if (!this.acceptRuns.isEmpty()) {
+ visitor.addFilter(new RunFilter(this.acceptRuns));
+ LOGGER.config("added run filter");
+ }
try {
Files.walkFileTree(this.rootDir.toPath(), options, Integer.MAX_VALUE, visitor);
} catch (final IOException e) {
throw new RuntimeException(e);
}
- final FileRunMap runMap = visitor.getRunMap();
- runMap.sortFiles();
- System.out.println("found files...");
- for (final Integer run : runMap.getSortedRunNumbers()) {
- System.out.println();
- System.out.println("run " + run + " has " + runMap.get(run).size() + " files");
- final EvioFileList files = runMap.get(run);
- System.out.println("first file " + files.first());
- System.out.println("last file " + files.last());
- System.out.println("started at " + this.getRunStart(files.first()));
- System.out.println("ended at " + this.getRunEnd(files.last()));
- System.out.println("total events: " + files.getTotalEvents());
- }
+ final RunLog runs = visitor.getRunLog();
+
+ LOGGER.fine("sorting files ...");
+ runs.sortAllFiles();
+ LOGGER.fine("compute total events ...");
+ runs.computeTotalEvents();
+
+ if (this.printSummary) {
+ runs.printRunSummaries();
+ }
+
+ if (this.update) {
+ update(runs);
+ }
+
+ if (this.timestampFile == null) {
+ this.timestampFile = new File("timestamp");
+ try {
+ this.timestampFile.createNewFile();
+ } catch (final IOException e) {
+ throw new RuntimeException(e);
+ }
+ LOGGER.info("created new timestamp file: " + this.timestampFile.getPath());
+ }
+ this.timestampFile.setLastModified(System.currentTimeMillis());
+ LOGGER.info("set modified on timestamp file: " + new Date(this.timestampFile.lastModified()));
+ }
+
+ void update(final RunLog runLog) {
+ LOGGER.info("updating database from run log ...");
+ final ConnectionParameters cp = new ConnectionParameters("root", "derp", "hps_run_db", "localhost");
+ Connection connection = null;
+ PreparedStatement runLogStatement = null;
+ try {
+ connection = cp.createConnection();
+ connection.setAutoCommit(false);
+ runLogStatement = connection
+ .prepareStatement("INSERT INTO run_log (run, start_date, end_date, nevents, nfiles, end_ok, last_updated) VALUES(?, ?, ?, ?, ?, ?, NOW())");
+ for (final Integer run : runLog.getSortedRunNumbers()) {
+ LOGGER.info("inserting run " + run + " into database");
+ final RunSummary runSummary = runLog.get(run);
+ runLogStatement.setInt(1, run);
+ runLogStatement.setTimestamp(2, new java.sql.Timestamp(runSummary.getRunStart().getTime()));
+ runLogStatement.setTimestamp(3, new java.sql.Timestamp(runSummary.getRunEnd().getTime()));
+ runLogStatement.setInt(4, runSummary.getTotalEvents());
+ runLogStatement.setInt(5, runSummary.getFiles().size());
+ runLogStatement.setBoolean(6, runSummary.isEndOkay());
+ runLogStatement.executeUpdate();
+ connection.commit();
+ }
+ } catch (final SQLException e) {
+ LOGGER.log(Level.SEVERE, "rolling back transaction", e);
+ try {
+ connection.rollback();
+ } catch (final SQLException e2) {
+ throw new RuntimeException(e);
+ }
+ } finally {
+ if (connection != null) {
+ try {
+ connection.setAutoCommit(true);
+ if (!connection.isClosed()) {
+ connection.close();
+ }
+ } catch (final SQLException e) {
+ e.printStackTrace();
+ }
+ }
+ }
+ LOGGER.info("database was updated!");
}
}
|