Author: [log in to unmask]
Date: Wed Sep 23 07:49:16 2015
New Revision: 3677
Log:
Updates to crawler and run database.
Added:
java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerFeature.java
java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerFileUtilities.java
java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerFileVisitor.java
- copied, changed from r3655, java/trunk/crawler/src/main/java/org/hps/crawler/EvioFileVisitor.java
java/trunk/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java
java/trunk/crawler/src/main/java/org/hps/crawler/DatacatUtilities.java
java/trunk/crawler/src/main/java/org/hps/crawler/EvioMetadataReader.java
java/trunk/crawler/src/main/java/org/hps/crawler/FileFormatFilter.java
java/trunk/crawler/src/main/java/org/hps/crawler/FileMetadataReader.java
java/trunk/crawler/src/main/java/org/hps/crawler/FileSet.java
java/trunk/crawler/src/main/java/org/hps/crawler/LcioMetadataReader.java
java/trunk/crawler/src/main/java/org/hps/crawler/RootDstMetadataReader.java
java/trunk/run-database/src/main/java/org/hps/run/database/RunProcessor.java
Removed:
java/trunk/crawler/src/main/java/org/hps/crawler/EvioDatacatUtilities.java
java/trunk/crawler/src/main/java/org/hps/crawler/EvioFileVisitor.java
java/trunk/crawler/src/main/java/org/hps/crawler/RunProcessor.java
Modified:
java/trunk/crawler/pom.xml
java/trunk/crawler/src/main/java/org/hps/crawler/Crawler.java
java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerConfig.java
java/trunk/run-database/src/main/java/org/hps/run/database/RunSummary.java
java/trunk/run-database/src/main/java/org/hps/run/database/RunSummaryDaoImpl.java
java/trunk/run-database/src/main/java/org/hps/run/database/RunSummaryImpl.java
Modified: java/trunk/crawler/pom.xml
=============================================================================
--- java/trunk/crawler/pom.xml (original)
+++ java/trunk/crawler/pom.xml Wed Sep 23 07:49:16 2015
@@ -19,5 +19,10 @@
<groupId>org.hps</groupId>
<artifactId>hps-run-database</artifactId>
</dependency>
+ <dependency>
+ <groupId>org.freehep</groupId>
+ <artifactId>freehep-rootio</artifactId>
+ <version>2.2.1</version>
+ </dependency>
</dependencies>
</project>
Modified: java/trunk/crawler/src/main/java/org/hps/crawler/Crawler.java
=============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/Crawler.java (original)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/Crawler.java Wed Sep 23 07:49:16 2015
@@ -10,7 +10,6 @@
import java.util.Date;
import java.util.EnumSet;
import java.util.HashSet;
-import java.util.List;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
@@ -21,9 +20,12 @@
import org.apache.commons.cli.ParseException;
import org.apache.commons.cli.PosixParser;
import org.hps.conditions.database.ConnectionParameters;
-import org.hps.record.evio.EvioFileMetadata;
+import org.hps.datacat.client.DatacatClient;
+import org.hps.datacat.client.DatacatClientFactory;
+import org.hps.datacat.client.DatasetFileFormat;
import org.hps.run.database.RunDatabaseDaoFactory;
import org.hps.run.database.RunManager;
+import org.hps.run.database.RunProcessor;
import org.hps.run.database.RunSummary;
import org.hps.run.database.RunSummaryDao;
import org.hps.run.database.RunSummaryImpl;
@@ -31,45 +33,75 @@
import org.lcsim.util.log.LogUtil;
/**
- * Search for EVIO files in a directory tree, group the files that are found by run, extract meta data from these files,
- * and optionally update a run database with the information that was found.
+ * Crawls a directory tree for data files and performs tasks related to this information.
+ * <p>
+ * The crawler can find EVIO, LCIO, or ROOT files in a directory tree and then perform various tasks based on
+ * information extracted from them.
*
* @author Jeremy McCormick, SLAC
*/
public final class Crawler {
/**
+ * Make a list of available features for printing help.
+ */
+ private static String AVAILABLE_FEATURES;
+
+ /**
+ * Make a list of available features for printing help.
+ */
+ private static String AVAILABLE_FORMATS;
+
+ /**
* Setup the logger.
*/
- private static final Logger LOGGER = LogUtil.create(Crawler.class, new DefaultLogFormatter(), Level.ALL);
+ private static final Logger LOGGER = LogUtil.create(Crawler.class, new DefaultLogFormatter(), Level.CONFIG);
/**
* Constant for milliseconds conversion.
*/
private static final long MILLISECONDS = 1000L;
-
/**
* Command line options for the crawler.
*/
private static final Options OPTIONS = new Options();
+
+ static {
+ final StringBuffer buffer = new StringBuffer();
+ for (final CrawlerFeature feature : CrawlerFeature.values()) {
+ buffer.append(feature.name() + " ");
+ }
+ buffer.setLength(buffer.length() - 1);
+ AVAILABLE_FEATURES = buffer.toString();
+ }
+ static {
+ final StringBuffer buffer = new StringBuffer();
+ for (final DatasetFileFormat format : DatasetFileFormat.values()) {
+ buffer.append(format.name() + " ");
+ }
+ buffer.setLength(buffer.length() - 1);
+ AVAILABLE_FORMATS = buffer.toString();
+ }
/**
* Statically define the command options.
*/
static {
OPTIONS.addOption("b", "min-date", true, "min date for a file (example \"2015-03-26 11:28:59\")");
- OPTIONS.addOption("c", "datacat", true, "update the data catalog using the specified folder (off by default)");
+ OPTIONS.addOption("c", "datacat", true, "use the specified datacat folder");
OPTIONS.addOption("C", "cache", false, "cache files from MSS (JLAB only and not for batch farm use!)");
+ OPTIONS.addOption("e", "enable", true, "enable a feature: " + AVAILABLE_FEATURES);
+ OPTIONS.addOption("D", "default-features", false, "enable default features");
+ OPTIONS.addOption("F", "default-formats", false, "enable default file filters");
+ OPTIONS.addOption("f", "format", true, "add a file format for filtering: " + AVAILABLE_FORMATS);
OPTIONS.addOption("p", "connection-properties", true, "database connection properties file (required)");
OPTIONS.addOption("d", "directory", true, "root directory to start crawling (default is current dir)");
OPTIONS.addOption("E", "evio-processor", true, "class name of an EvioEventProcessor to execute");
OPTIONS.addOption("h", "help", false, "print help and exit (overrides all other arguments)");
- OPTIONS.addOption("i", "insert", false, "insert information into the run database (not done by default)");
OPTIONS.addOption("L", "log-level", true, "set the log level (INFO, FINE, etc.)");
OPTIONS.addOption("r", "run", true, "add a run number to accept (others will be excluded)");
OPTIONS.addOption("t", "timestamp-file", true, "existing or new timestamp file name");
OPTIONS.addOption("w", "max-cache-wait", true, "time per run allowed for file caching in seconds");
- OPTIONS.addOption("u", "update", false, "allow replacement of existing data in the run db (off by default)");
OPTIONS.addOption("x", "max-depth", true, "max depth to crawl");
}
@@ -87,29 +119,6 @@
}
/**
- * Process all the runs that were found.
- *
- * @param runs the run log containing the list of run summaries
- * @throws Exception if there is an error processing one of the runs
- */
- static RunProcessor processRun(final RunSummary runSummary) throws Exception {
-
- LOGGER.info("processing run" + runSummary.getRun());
-
- // Create a processor to process all the EVIO events in the run.
- LOGGER.info("creating run processor for " + runSummary.getRun());
- final RunProcessor runProcessor = new RunProcessor((RunSummaryImpl) runSummary);
-
- // Process all of the files from the run.
- LOGGER.info("processing run " + runSummary.getRun());
- runProcessor.processRun();
-
- LOGGER.getHandlers()[0].flush();
-
- return runProcessor;
- }
-
- /**
* The class for managing the file caching using the 'jcache' command.
*/
private final JCacheManager cacheManager = new JCacheManager();
@@ -154,6 +163,30 @@
}
/**
+ * Create a run processor from the current configuration.
+ *
+ * @return the run processor
+ */
+ private RunProcessor createEvioRunProcessor(final RunSummaryImpl runSummary) {
+
+ final RunProcessor runProcessor = new RunProcessor(runSummary);
+
+ final Set<CrawlerFeature> features = config.getFeatures();
+
+ if (features.contains(CrawlerFeature.EPICS)) {
+ runProcessor.addEpicsProcessor();
+ }
+ if (features.contains(CrawlerFeature.SCALERS)) {
+ runProcessor.addScalerProcessor();
+ }
+ if (features.contains(CrawlerFeature.TRIGGER)) {
+ runProcessor.addTriggerTimeProcessor();
+ }
+
+ return runProcessor;
+ }
+
+ /**
* Parse command line options and create a new {@link Crawler} object from the configuration.
*
* @param args the command line arguments
@@ -161,9 +194,9 @@
*/
private Crawler parse(final String args[]) {
- LOGGER.info("parsing command line options");
-
- config = new CrawlerConfig();
+ LOGGER.config("parsing command line options");
+
+ this.config = new CrawlerConfig();
try {
final CommandLine cl = this.parser.parse(OPTIONS, args);
@@ -176,11 +209,37 @@
// Log level.
if (cl.hasOption("L")) {
final Level level = Level.parse(cl.getOptionValue("L"));
- LOGGER.info("setting log level to " + level);
+ LOGGER.config("setting log level to " + level);
LOGGER.setLevel(level);
}
+ // Enable default features.
+ if (cl.hasOption("D")) {
+ LOGGER.config("enabling default features");
+ this.config.addDefaultFeatures();
+ }
+
+ // Enable default file formats.
+ if (cl.hasOption("F")) {
+ LOGGER.config("enabling default file formats");
+ this.config.addDefaultFileFormats();
+ }
+
+ // Root directory for file crawling.
+ if (cl.hasOption("d")) {
+ final File rootDir = new File(cl.getOptionValue("d"));
+ if (!rootDir.exists()) {
+ throw new IllegalArgumentException("The directory does not exist.");
+ }
+ if (!rootDir.isDirectory()) {
+ throw new IllegalArgumentException("The specified path is not a directory.");
+ }
+ config.setRootDir(rootDir);
+ LOGGER.config("root dir set to " + config.rootDir());
+ }
+
// Database connection properties file (this is not optional).
+ // FIXME: This only needs to be set for updating the run database.
if (cl.hasOption("p")) {
final String dbPropPath = cl.getOptionValue("p");
final File dbPropFile = new File(dbPropPath);
@@ -194,19 +253,6 @@
} else {
throw new RuntimeException(
"The -p switch providing the database connection properties file is a required argument.");
- }
-
- // Root directory for file crawling.
- if (cl.hasOption("d")) {
- final File rootDir = new File(cl.getOptionValue("d"));
- if (!rootDir.exists()) {
- throw new IllegalArgumentException("The directory does not exist.");
- }
- if (!rootDir.isDirectory()) {
- throw new IllegalArgumentException("The specified path is not a directory.");
- }
- config.setRootDir(rootDir);
- LOGGER.config("root dir for crawling set to " + config.rootDir());
}
// Timestamp file for date filtering.
@@ -247,12 +293,6 @@
config.setAcceptRuns(acceptRuns);
}
- // Enable updating of run database.
- if (cl.hasOption("i")) {
- config.setUpdateRunLog(true);
- LOGGER.config("inserting into run database is enabled");
- }
-
// Enable file cache usage for running at JLAB.
if (cl.hasOption("C")) {
config.setUseFileCache(true);
@@ -264,12 +304,6 @@
final Long waitTime = Long.parseLong(cl.getOptionValue("w")) * MILLISECONDS;
config.setWaitTime(waitTime);
LOGGER.config("max time for file caching set to " + config.waitTime());
- }
-
- // Allow deletion and replacement of records in run database.
- if (cl.hasOption("u")) {
- config.setAllowUpdates(true);
- LOGGER.config("deletion and replacement of existing runs in the database is enabled");
}
// User supplied timestamp string that is converted to a date for file filtering.
@@ -315,13 +349,62 @@
if (datacatFolder == null) {
throw new IllegalArgumentException("missing -c argument with data catalog folder");
}
+
+ // Set datacat folder.
LOGGER.config("using data catalog folder " + datacatFolder);
config.setDatacatFolder(datacatFolder);
- config.setUpdateDatacat(true);
- }
-
+
+ // Assume datacat should be enabled if folder name was given.
+ config.getFeatures().add(CrawlerFeature.DATACAT);
+ LOGGER.config(CrawlerFeature.DATACAT + " is enabled");
+ }
+
+ // Configure enabled features.
+ if (cl.hasOption("e")) {
+ for (final String arg : cl.getOptionValues("e")) {
+ CrawlerFeature feature = null;
+ try {
+ feature = CrawlerFeature.valueOf(arg);
+ } catch (IllegalArgumentException | NullPointerException e) {
+ throw new IllegalArgumentException("The feature " + arg + " is not valid.", e);
+ }
+ this.config.addFeature(feature);
+ }
+ }
+
+ // Configure enabled file formats.
+ if (cl.hasOption("f")) {
+ for (final String arg : cl.getOptionValues("f")) {
+ DatasetFileFormat format = null;
+ try {
+ format = DatasetFileFormat.valueOf(arg);
+ } catch (IllegalArgumentException | NullPointerException e) {
+ throw new IllegalArgumentException("The feature " + arg + " is not valid.", e);
+ }
+ LOGGER.config("adding format " + format.name());
+ this.config.addFileFormat(format);
+ }
+ }
} catch (final ParseException e) {
throw new RuntimeException("Error parsing options.", e);
+ }
+
+ // Check that there is at least one file format enabled for filtering.
+ if (this.config.getFileFormats().isEmpty()) {
+ throw new IllegalStateException(
+ "There are no file formats enabled. Enable defaults using -F or add a format using the -f switch.");
+ }
+
+ // Print a message if no features are enabled; this is not a fatal error but the job won't update anything.
+ if (this.config.getFeatures().isEmpty()) {
+ LOGGER.warning("no features are enabled");
+ }
+
+ // Check that EVIO file filter is active if run database is being updated.
+ // Don't add by default because the user may have made a mistake in the options they provided.
+ if (this.config.getFeatures().contains(CrawlerFeature.RUNDB_INSERT)
+ && !this.config.getFileFormats().contains(DatasetFileFormat.EVIO)) {
+ throw new IllegalStateException("Run database is enabled without EVIO file filter active.");
}
// Configure the max wait time for file caching operations.
@@ -340,11 +423,35 @@
*/
private void printUsage() {
final HelpFormatter help = new HelpFormatter();
- help.printHelp("EvioFileCrawler", "", OPTIONS, "");
+ // FIXME: include more info here and improve the way this looks (line width should be increased)
+ help.printHelp("Crawler [options]", "", OPTIONS, "");
System.exit(0);
}
/**
+ * Process a run using its run summary.
+ *
+ * @param runs the run log containing the list of run summaries
+ * @throws Exception if there is an error processing one of the runs
+ */
+ private RunProcessor processRun(final RunSummary runSummary) throws Exception {
+
+ LOGGER.info("processing run" + runSummary.getRun());
+
+ // Create a processor to process all the EVIO events in the run.
+ LOGGER.info("creating run processor for " + runSummary.getRun());
+ final RunProcessor runProcessor = this.createEvioRunProcessor((RunSummaryImpl) runSummary);
+
+ // Process all of the files from the run.
+ LOGGER.info("processing run " + runSummary.getRun());
+ runProcessor.processRun();
+
+ LOGGER.getHandlers()[0].flush();
+
+ return runProcessor;
+ }
+
+ /**
* Run the full crawler job.
*
* @throws Exception if there is some error during the job
@@ -354,7 +461,18 @@
LOGGER.info("starting Crawler job");
// Create the file visitor for crawling the root directory with the given date filter.
- final EvioFileVisitor visitor = new EvioFileVisitor(config.timestamp());
+ final CrawlerFileVisitor visitor = new CrawlerFileVisitor();
+
+ if (config.timestamp() != null) {
+ // Add date filter if timestamp is supplied.
+ visitor.addFilter(new DateFileFilter(config.timestamp()));
+ }
+
+ // Add file format filter.
+ for (final DatasetFileFormat fileFormat : config.getFileFormats()) {
+ LOGGER.info("adding file format filter for " + fileFormat.name());
+ }
+ visitor.addFilter(new FileFormatFilter(config.getFileFormats()));
// Walk the file tree using the visitor.
this.walk(visitor);
@@ -362,33 +480,45 @@
// Get the list of run data created by the visitor.
final RunSummaryMap runMap = visitor.getRunMap();
- // Process all runs that were found.
- for (RunSummary runSummary : runMap.getRunSummaries()) {
-
+ LOGGER.info("found " + runMap.size() + " runs from crawl job");
+
+ // Process all runs that were found.
+ for (final RunSummary runSummary : runMap.getRunSummaries()) {
+
if (runSummary == null) {
throw new IllegalArgumentException("The run summary is null for some weird reason.");
}
-
- LOGGER.info("starting full processing of run " + runSummary.getRun());
-
+
+ LOGGER.info("starting processing of run " + runSummary.getRun());
+
// Cache files from MSS.
this.cacheFiles(runSummary);
- // Process the run's files.
- RunProcessor runProcessor = processRun(runSummary);
-
- // Execute the run database update.
- this.updateRunDatabase(runSummary);
+ // Process the run's EVIO files.
+ if (!runSummary.getFiles(DatasetFileFormat.EVIO).isEmpty()) {
+ final RunProcessor runProcessor = this.processRun(runSummary);
+ }
+
+ if (config.getFeatures().contains(CrawlerFeature.RUNDB_INSERT)) {
+ // Execute the run database update.
+ this.updateRunDatabase(runSummary);
+ } else {
+ LOGGER.info("updating run database is not enabled");
+ }
// Update the data catalog.
- this.updateDatacat(runProcessor.getEvioFileMetaData());
-
+ if (this.config.getFeatures().contains(CrawlerFeature.DATACAT)) {
+ this.updateDatacat(runSummary);
+ }
+
LOGGER.info("completed full processing of run " + runSummary);
- }
+ }
// Update the timestamp output file.
this.updateTimestamp();
+ LOGGER.getHandlers()[0].flush();
+
LOGGER.info("Crawler job is done!");
}
@@ -397,12 +527,20 @@
*
* @param runMap the map of run information including the EVIO file list
*/
- private void updateDatacat(List<EvioFileMetadata> metadataList) {
- if (this.config.updateDatacat()) {
- EvioDatacatUtilities.addEvioFiles(metadataList, config.datacatFolder());
- LOGGER.info("done updating data catalog");
- } else {
- LOGGER.info("updating data catalog is disabled");
+ private void updateDatacat(final RunSummary runSummary) {
+ final DatacatClient datacatClient = new DatacatClientFactory().createClient();
+ for (final DatasetFileFormat fileFormat : config.getFileFormats()) {
+ LOGGER.info("adding files to datacat with format " + fileFormat.name());
+ for (final File file : runSummary.getFiles(fileFormat)) {
+
+ LOGGER.info("adding file " + file.getPath() + " to datacat");
+
+ // Get folder for file by stripping out root directory.
+ final String folder = DatacatUtilities.getFolder(config.rootDir().getPath(), file);
+
+ // Register file in the catalog.
+ // DatacatUtilities.addFile(datacatClient, folder, file);
+ }
}
}
@@ -413,43 +551,38 @@
* @throws SQLException if there is a database query error
*/
private void updateRunDatabase(final RunSummary runSummary) throws SQLException {
- // Insert the run information into the database.
- if (config.updateRunDatabase()) {
-
- LOGGER.info("updating run database for run " + runSummary.getRun());
-
- // Open a DB connection.
- final Connection connection = config.connectionParameters().createConnection();
-
- // Create factory for interfacing to run database.
- RunManager runManager = new RunManager();
- runManager.setConnection(connection);
- final RunDatabaseDaoFactory dbFactory = runManager.createDaoFactory();
-
- // Create object for updating run info in the database.
- final RunSummaryDao runSummaryDao = dbFactory.createRunSummaryDao();
-
- // Delete existing run summary if necessary.
- if (runSummaryDao.runSummaryExists(runSummary.getRun())) {
- if (this.config.allowUpdates()) {
- LOGGER.info("deleting existing information for run " + runSummary.getRun());
- runSummaryDao.deleteFullRunSummary(runSummary);
- } else {
- throw new RuntimeException("Run " + runSummary.getRun() + " exists in database and deletion is not enabled.");
- }
- }
-
- // Insert run summary into database.
- runSummaryDao.insertFullRunSummary(runSummary);
-
- // Close the DB connection.
- connection.close();
-
- LOGGER.info("done updating run database");
-
- } else {
- LOGGER.info("updating run database is disabled");
- }
+
+ LOGGER.info("updating run database for run " + runSummary.getRun());
+
+ // Open a DB connection.
+ final Connection connection = config.connectionParameters().createConnection();
+
+ // Create factory for interfacing to run database.
+ final RunManager runManager = new RunManager();
+ runManager.setConnection(connection);
+ final RunDatabaseDaoFactory dbFactory = runManager.createDaoFactory();
+
+ // Create object for updating run info in the database.
+ final RunSummaryDao runSummaryDao = dbFactory.createRunSummaryDao();
+
+ // Delete existing run summary if necessary.
+ if (runSummaryDao.runSummaryExists(runSummary.getRun())) {
+ if (this.config.features.contains(CrawlerFeature.RUNDB_UPDATE)) {
+ LOGGER.info("deleting existing information for run " + runSummary.getRun());
+ runSummaryDao.deleteFullRunSummary(runSummary);
+ } else {
+ throw new RuntimeException("Run " + runSummary.getRun()
+ + " exists in database and deletion is not enabled.");
+ }
+ }
+
+ // Insert run summary into database.
+ runSummaryDao.insertFullRunSummary(runSummary);
+
+ // Close the DB connection.
+ connection.close();
+
+ LOGGER.info("done updating run database");
LOGGER.getHandlers()[0].flush();
}
@@ -481,7 +614,7 @@
*
* @param visitor the file visitor
*/
- private void walk(final EvioFileVisitor visitor) {
+ private void walk(final CrawlerFileVisitor visitor) {
if (config.timestamp() != null) {
// Date filter from timestamp.
visitor.addFilter(new DateFileFilter(config.timestamp()));
@@ -494,7 +627,7 @@
visitor.addFilter(new RunFilter(config.acceptRuns()));
LOGGER.config("added run number filter");
} else {
- LOGGER.config("no run number filter used");
+ LOGGER.config("no run number filter will be used");
}
try {
@@ -505,5 +638,4 @@
throw new RuntimeException("Error while walking the directory tree.", e);
}
}
-
}
Modified: java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerConfig.java
=============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerConfig.java (original)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerConfig.java Wed Sep 23 07:49:16 2015
@@ -4,12 +4,16 @@
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Date;
+import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import org.hps.conditions.database.ConnectionParameters;
+import org.hps.datacat.client.DatasetFileFormat;
+import org.hps.datacat.client.DatasetSite;
import org.hps.record.evio.EvioEventProcessor;
/**
@@ -50,6 +54,21 @@
private String datacatFolder = null;
/**
+ * Set whether extraction of metadata from files is enabled.
+ */
+ private boolean enableMetadata;
+
+ /**
+ * Set of features enabled in this configuration.
+ */
+ Set<CrawlerFeature> features = new HashSet<CrawlerFeature>();
+
+ /**
+ * Set of file formats for filtering files.
+ */
+ Set<DatasetFileFormat> formats = new HashSet<DatasetFileFormat>();
+
+ /**
* The maximum depth to crawl.
*/
private Integer maxDepth = Integer.MAX_VALUE;
@@ -68,6 +87,11 @@
* The root directory to search for files, which defaults to the current directory.
*/
private File rootDir = new File(System.getProperty("user.dir"));
+
+ /**
+ * The dataset site for the datacat.
+ */
+ private DatasetSite site;
/**
* A timestamp to use for filtering input files on their creation date.
@@ -106,6 +130,44 @@
*/
Set<Integer> acceptRuns() {
return acceptRuns;
+ }
+
+ /**
+ * Add the default set of features.
+ */
+ CrawlerConfig addDefaultFeatures() {
+ final List<CrawlerFeature> defaultFeatures = Arrays.asList(CrawlerFeature.values());
+ this.features.addAll(defaultFeatures);
+ return this;
+ }
+
+ /**
+ * Add the default file formats.
+ */
+ CrawlerConfig addDefaultFileFormats() {
+ final List<DatasetFileFormat> defaultFormats = Arrays.asList(DatasetFileFormat.values());
+ this.formats.addAll(defaultFormats);
+ return this;
+ }
+
+ /**
+ * Add a feature to enable it.
+ *
+ * @return this object
+ */
+ CrawlerConfig addFeature(final CrawlerFeature feature) {
+ this.features.add(feature);
+ return this;
+ }
+
+ /**
+ * Add a file format for filtering.
+ *
+ * @param format the file format
+ */
+ CrawlerConfig addFileFormat(final DatasetFileFormat format) {
+ this.formats.add(format);
+ return this;
}
/**
@@ -162,6 +224,42 @@
}
/**
+ * Get the dataset site.
+ *
+ * @return the dataset site
+ */
+ DatasetSite datasetSite() {
+ return this.site;
+ }
+
+ /**
+ * Return <code>true</code> if metadata extraction from files is enabled.
+ *
+ * @return <code>true</code> if metadata extraction is enabled
+ */
+ boolean enableMetaData() {
+ return this.enableMetadata;
+ }
+
+ /**
+ * Get the set of enabled features.
+ *
+ * @return the set of enabled features
+ */
+ Set<CrawlerFeature> getFeatures() {
+ return this.features;
+ }
+
+ /**
+ * Get the file formats for filtering.
+ *
+ * @return the file formats for filtering
+ */
+ Set<DatasetFileFormat> getFileFormats() {
+ return this.formats;
+ }
+
+ /**
* Get the max depth in the directory tree to crawl.
*
* @return the max depth
@@ -191,6 +289,16 @@
}
/**
+ * Remove a feature to disable it.
+ *
+ * @return this object
+ */
+ CrawlerConfig removeFeature(final CrawlerFeature feature) {
+ this.features.remove(feature);
+ return this;
+ }
+
+ /**
* Get the root directory for the file search.
*
* @return the root directory for the file search
@@ -243,6 +351,26 @@
}
/**
+ * Set the dataset site.
+ *
+ * @return this object
+ */
+ void setDatasetSite(final DatasetSite site) {
+ this.site = site;
+ }
+
+ /**
+ * Set whether metadata extraction is enabled.
+ *
+ * @param enableMetadata <code>true</code> to enable metadata
+ * @return this object
+ */
+ CrawlerConfig setEnableMetadata(final boolean enableMetadata) {
+ this.enableMetadata = enableMetadata;
+ return this;
+ }
+
+ /**
* Set the max depth.
*
* @param maxDepth the max depth
Added: java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerFeature.java
=============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerFeature.java (added)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerFeature.java Wed Sep 23 07:49:16 2015
@@ -0,0 +1,32 @@
+package org.hps.crawler;
+
+/**
+ * Enum for enabling or disabling features in the file crawler.
+ */
+enum CrawlerFeature {
+ /**
+ * Allow inserts into run database.
+ */
+ RUNDB_INSERT,
+ /**
+ * Allow updating the run database if the run exists already.
+ */
+ RUNDB_UPDATE,
+ /**
+ * Create list of EPICS data for inserting into run database.
+ */
+ EPICS,
+ /**
+ * Create list of scaler data for inserting into run database.
+ */
+ SCALERS,
+ /**
+ * Extract trigger config for inserting into run database.
+ */
+ TRIGGER,
+ /**
+ * Populate the data catalog with files that are found when crawling.
+ */
+ DATACAT
+}
+
Added: java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerFileUtilities.java
=============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerFileUtilities.java (added)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerFileUtilities.java Wed Sep 23 07:49:16 2015
@@ -0,0 +1,15 @@
+package org.hps.crawler;
+
+import java.io.File;
+
+public class CrawlerFileUtilities {
+
+ static boolean isHpsFile(File file) {
+ return file.getName().startsWith("hps");
+ }
+
+ static int getRunFromFileName(File file) {
+ String name = file.getName();
+ return Integer.parseInt(name.substring(4, 8));
+ }
+}
Copied: java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerFileVisitor.java (from r3655, java/trunk/crawler/src/main/java/org/hps/crawler/EvioFileVisitor.java)
=============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/EvioFileVisitor.java (original)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerFileVisitor.java Wed Sep 23 07:49:16 2015
@@ -7,27 +7,28 @@
import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.ArrayList;
-import java.util.Date;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
-import org.hps.record.evio.EvioFileFilter;
-import org.hps.record.evio.EvioFileUtilities;
+import org.hps.datacat.client.DatasetFileFormat;
import org.lcsim.util.log.DefaultLogFormatter;
import org.lcsim.util.log.LogUtil;
/**
* A file visitor that crawls directories for EVIO files and returns the information as a {@link RunSummaryMap}.
+ * <p>
+ * The {@link #addFilter(FileFilter)} method can be used to add a file filter. Paths must pass all filters to
+ * be accepted.
*
* @author Jeremy McCormick, SLAC
*/
-final class EvioFileVisitor extends SimpleFileVisitor<Path> {
+final class CrawlerFileVisitor extends SimpleFileVisitor<Path> {
/**
* Setup logger.
*/
- private static final Logger LOGGER = LogUtil.create(EvioFileVisitor.class, new DefaultLogFormatter(), Level.FINE);
+ private static final Logger LOGGER = LogUtil.create(CrawlerFileVisitor.class, new DefaultLogFormatter(), Level.FINE);
/**
* A list of file filters to apply.
@@ -37,6 +38,7 @@
/**
* The run log containing information about files from each run.
*/
+ // FIXME: This should be replaced by a map of run summary to file set.
private final RunSummaryMap runs = new RunSummaryMap();
/**
@@ -44,12 +46,7 @@
*
* @param timestamp the timestamp which is used for date filtering
*/
- EvioFileVisitor(final Date timestamp) {
- this.addFilter(new EvioFileFilter());
- if (timestamp != null) {
- // Add date filter if timestamp is supplied.
- this.addFilter(new DateFileFilter(timestamp));
- }
+ CrawlerFileVisitor() {
}
/**
@@ -97,23 +94,25 @@
*/
@Override
public FileVisitResult visitFile(final Path path, final BasicFileAttributes attrs) {
+
final File file = path.toFile();
- if (this.accept(file)) {
+
+ if (this.accept(file)) {
// Get the run number from the file name.
- final Integer run = EvioFileUtilities.getRunFromName(file);
+ final Integer run = CrawlerFileUtilities.getRunFromFileName(file);
- // Get the sequence number from the file name.
- final Integer seq = EvioFileUtilities.getSequenceFromName(file);
+ // Get the file format.
+ DatasetFileFormat format = DatacatUtilities.getFileFormat(file);
- LOGGER.info("accepted file " + file.getPath() + " with run " + run + " and seq " + seq);
-
- // Add this file to the file list for the run.
- this.runs.getRunSummary(run).addFile(file);
+ LOGGER.info("accepted file " + file.getPath() + " with run " + run);
+
+ // Add file to run summary.
+ this.runs.getRunSummary(run).addFile(format, file);
} else {
// File was rejected by one of the filters.
- LOGGER.finer("rejected file " + file.getPath());
+ LOGGER.info("file " + file.getPath() + " was rejected");
}
// Always continue crawling.
return FileVisitResult.CONTINUE;
Added: java/trunk/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java
=============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java (added)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java Wed Sep 23 07:49:16 2015
@@ -0,0 +1,414 @@
+package org.hps.crawler;
+
+import java.io.File;
+import java.io.FileFilter;
+import java.io.IOException;
+import java.nio.file.FileVisitOption;
+import java.nio.file.FileVisitResult;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.SimpleFileVisitor;
+import java.nio.file.attribute.BasicFileAttributes;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.EnumSet;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.cli.PosixParser;
+import org.hps.datacat.client.DatacatClient;
+import org.hps.datacat.client.DatacatClientFactory;
+import org.hps.datacat.client.DatasetFileFormat;
+import org.lcsim.util.log.DefaultLogFormatter;
+import org.lcsim.util.log.LogUtil;
+
+/**
+ * Command line file crawler for populating the data catalog.
+ *
+ * @author Jeremy McCormick, SLAC
+ */
+public class DatacatCrawler {
+
+ /**
+ * Visitor which creates a {@link FileSet} from walking a directory tree.
+ * <p>
+ * Any number of {@link java.io.FileFilter} objects can be registered with this visitor to restrict which files are
+ * accepted.
+ *
+ * @author Jeremy McCormick, SLAC
+ */
+ final class DatacatFileVisitor extends SimpleFileVisitor<Path> {
+
+ /**
+ * The run log containing information about files from each run.
+ */
+ private final FileSet fileSet = new FileSet();
+
+ /**
+ * A list of file filters to apply.
+ */
+ private final List<FileFilter> filters = new ArrayList<FileFilter>();
+
+ /**
+ * Run the filters on the file to tell whether it should be accepted or not.
+ *
+ * @param file the EVIO file
+ * @return <code>true</code> if file should be accepted
+ */
+ private boolean accept(final File file) {
+ boolean accept = true;
+ for (final FileFilter filter : this.filters) {
+ accept = filter.accept(file);
+ if (!accept) {
+ break;
+ }
+ }
+ return accept;
+ }
+
+ /**
+ * Add a file filter.
+ *
+ * @param filter the file filter
+ */
+ void addFilter(final FileFilter filter) {
+ this.filters.add(filter);
+ }
+
+ /**
+ * Get the file set created by visiting the directory tree.
+ *
+ * @return the file set from visiting the directory tree
+ */
+ FileSet getFileSet() {
+ return this.fileSet;
+ }
+
+ /**
+ * Visit a single file.
+ *
+ * @param path the file to visit
+ * @param attrs the file attributes
+ */
+ @Override
+ public FileVisitResult visitFile(final Path path, final BasicFileAttributes attrs) {
+ final File file = path.toFile();
+ if (this.accept(file)) {
+ final DatasetFileFormat format = DatacatUtilities.getFileFormat(file);
+ fileSet.addFile(format, file);
+ }
+ return FileVisitResult.CONTINUE;
+ }
+ }
+
+ /**
+ * Make a list of available file formats for printing help.
+ */
+ private static String AVAILABLE_FORMATS;
+
+ /**
+ * Setup the logger.
+ */
+ private static final Logger LOGGER = LogUtil.create(Crawler.class, new DefaultLogFormatter(), Level.CONFIG);
+
+ /**
+ * Command line options for the crawler.
+ */
+ private static final Options OPTIONS = new Options();
+ static {
+ final StringBuffer buffer = new StringBuffer();
+ for (final DatasetFileFormat format : DatasetFileFormat.values()) {
+ buffer.append(format.name() + " ");
+ }
+ buffer.setLength(buffer.length() - 1);
+ AVAILABLE_FORMATS = buffer.toString();
+ }
+
+ /**
+ * Statically define the command options.
+ */
+ static {
+ OPTIONS.addOption("L", "log-level", true, "set the log level (INFO, FINE, etc.)");
+ OPTIONS.addOption("b", "min-date", true, "min date for a file (example \"2015-03-26 11:28:59\")");
+ OPTIONS.addOption("d", "directory", true, "root directory to crawl");
+ OPTIONS.addOption("f", "folder", true, "datacat folder");
+ OPTIONS.addOption("h", "help", false, "print help and exit (overrides all other arguments)");
+ OPTIONS.addOption("o", "format", true, "add a file format for filtering: " + AVAILABLE_FORMATS);
+ OPTIONS.addOption("m", "metadata", false, "create metadata for datasets");
+ OPTIONS.addOption("s", "site", true, "datacat site");
+ OPTIONS.addOption("t", "timestamp-file", true, "existing or new timestamp file name");
+ OPTIONS.addOption("x", "max-depth", true, "max depth to crawl");
+ }
+
+ /**
+ * Main method.
+ *
+ * @param args the command line arguments
+ */
+ public static void main(final String[] args) {
+ new DatacatCrawler().parse(args).run();
+ }
+
+ /**
+ * The crawler configuration.
+ */
+ private CrawlerConfig config;
+
+ /**
+ * The options parser.
+ */
+ private final PosixParser parser = new PosixParser();
+
+ /**
+ * Throw an exception if the path doesn't exist in the data catalog or it is not a folder.
+ *
+ * @param folder the folder in the datacat
+ * @throws RuntimeException if the given path does not exist or it is not a folder
+ */
+ void checkFolder(final String folder) {
+ final DatacatClient datacatClient = new DatacatClientFactory().createClient();
+ if (!datacatClient.exists(folder)) {
+ throw new RuntimeException("The folder " + folder + " does not exist in the data catalog.");
+ }
+ if (!datacatClient.isFolder(folder)) {
+ throw new RuntimeException("The path " + folder + " is not a folder.");
+ }
+ }
+
+ /**
+ * Parse command line options.
+ *
+ * @param args the command line arguments
+ * @return this object (for method chaining)
+ */
+ public DatacatCrawler parse(final String[] args) {
+ config = new CrawlerConfig();
+
+ LOGGER.config("parsing command line options");
+
+ this.config = new CrawlerConfig();
+
+ try {
+ final CommandLine cl = this.parser.parse(OPTIONS, args);
+
+ // Print help.
+ if (cl.hasOption("h") || args.length == 0) {
+ this.printUsage();
+ }
+
+ // Log level.
+ if (cl.hasOption("L")) {
+ final Level level = Level.parse(cl.getOptionValue("L"));
+ LOGGER.config("setting log level to " + level);
+ LOGGER.setLevel(level);
+ }
+
+ // Root directory for file crawling.
+ if (cl.hasOption("d")) {
+ final File rootDir = new File(cl.getOptionValue("d"));
+ if (!rootDir.exists()) {
+ throw new IllegalArgumentException("The directory does not exist.");
+ }
+ if (!rootDir.isDirectory()) {
+ throw new IllegalArgumentException("The specified path is not a directory.");
+ }
+ config.setRootDir(rootDir);
+ LOGGER.config("root dir set to " + config.rootDir());
+ }
+
+ // Timestamp file for date filtering.
+ if (cl.hasOption("t")) {
+ final File timestampFile = new File(cl.getOptionValue("t"));
+ config.setTimestampFile(timestampFile);
+ if (!timestampFile.exists()) {
+ try {
+ // Create new time stamp file which will have its date updated at the end of the job.
+ LOGGER.config("creating new timestamp file " + timestampFile.getPath());
+ timestampFile.createNewFile();
+ } catch (final IOException e) {
+ throw new IllegalArgumentException("Error creating timestamp file: " + timestampFile.getPath());
+ }
+ } else {
+ try {
+ // Get the date filter for files from an existing time stamp file provided by the user.
+ final Date timestamp = new Date(Files
+ .readAttributes(config.timestampFile().toPath(), BasicFileAttributes.class)
+ .lastModifiedTime().toMillis());
+ config.setTimestamp(timestamp);
+ LOGGER.config("got timestamp " + timestamp + " from existing file "
+ + config.timestampFile().getPath());
+ } catch (final IOException e) {
+ throw new RuntimeException("Error getting attributes of timestamp file.", e);
+ }
+ }
+ }
+
+ // User supplied timestamp string that is converted to a date for file filtering.
+ if (cl.hasOption("b")) {
+ try {
+ if (config.timestamp() != null) {
+ LOGGER.warning("existing timestamp from file " + config.timestamp()
+ + " will be overridden by date from -b argument");
+ }
+ config.setTimestamp(cl.getOptionValue("b"));
+ LOGGER.config("set timestamp to " + config.timestamp() + " from -b argument");
+ } catch (final java.text.ParseException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ // Max depth to crawl.
+ if (cl.hasOption("x")) {
+ final Integer maxDepth = Integer.parseInt(cl.getOptionValue("x"));
+ if (maxDepth < 1) {
+ throw new IllegalArgumentException("invalid -x argument for maxDepth: " + maxDepth);
+ }
+ config.setMaxDepth(maxDepth);
+ LOGGER.config("set max depth to " + maxDepth);
+ }
+
+ // Configure enabled file formats.
+ if (cl.hasOption("o")) {
+ for (final String arg : cl.getOptionValues("o")) {
+ DatasetFileFormat format = null;
+ try {
+ format = DatasetFileFormat.valueOf(arg);
+ } catch (IllegalArgumentException | NullPointerException e) {
+ throw new IllegalArgumentException("The format " + arg + " is not valid.", e);
+ }
+ LOGGER.config("adding format " + format.name());
+ this.config.addFileFormat(format);
+ }
+ } else {
+ throw new RuntimeException("The -o argument with data format must be supplied at least once.");
+ }
+
+ // Enable metadata extraction from files.
+ if (cl.hasOption("m")) {
+ config.setEnableMetadata(true);
+ LOGGER.config("metadata extraction enabled");
+ }
+
+ // Datacat folder.
+ if (cl.hasOption("f")) {
+ config.setDatacatFolder(cl.getOptionValue("f"));
+ LOGGER.config("set datacat folder to " + config.datacatFolder());
+ } else {
+ throw new RuntimeException("The -f argument with the datacat folder is required.");
+ }
+
+ } catch (final ParseException e) {
+ throw new RuntimeException("Error parsing options.", e);
+ }
+
+ // Check the datacat folder which must already exist.
+ this.checkFolder(config.datacatFolder());
+
+ // Check that there is at least one file format enabled for filtering.
+ if (this.config.getFileFormats().isEmpty()) {
+ throw new IllegalStateException("At least one file format must be provided with the -f switch.");
+ }
+
+ LOGGER.info("done parsing command line options");
+ LOGGER.getHandlers()[0].flush();
+
+ return this;
+ }
+
+ /**
+ * Print the usage statement for this tool to the console and then exit the program.
+ */
+ private void printUsage() {
+ final HelpFormatter help = new HelpFormatter();
+ help.printHelp(70, "DatacatCrawler [options]", "", OPTIONS, "");
+ System.exit(0);
+ }
+
+ /**
+ * Run the crawler job.
+ */
+ void run() {
+
+ // Create the file visitor for crawling the root directory with the given date filter.
+ final DatacatFileVisitor visitor = new DatacatFileVisitor();
+
+ // Add date filter if timestamp is supplied.
+ if (config.timestamp() != null) {
+ visitor.addFilter(new DateFileFilter(config.timestamp()));
+ }
+
+ // Add file format filter.
+ for (final DatasetFileFormat fileFormat : config.getFileFormats()) {
+ LOGGER.info("adding file format filter for " + fileFormat.name());
+ }
+ visitor.addFilter(new FileFormatFilter(config.getFileFormats()));
+
+ // Walk the file tree using the visitor.
+ this.walk(visitor);
+
+ // Update the data catalog.
+ this.updateDatacat(visitor.getFileSet());
+ }
+
+ /**
+ * Update the data catalog.
+ *
+ * @param runMap the map of run information including the EVIO file list
+ */
+ private void updateDatacat(final FileSet fileSet) {
+ final DatacatClient datacatClient = new DatacatClientFactory().createClient();
+ for (final DatasetFileFormat fileFormat : config.getFileFormats()) {
+ LOGGER.info("adding files to datacat with format " + fileFormat.name());
+ for (final File file : fileSet.get(fileFormat)) {
+
+ LOGGER.info("adding file " + file.getAbsolutePath() + " to datacat");
+
+ // Create metadata if this is enabled (takes awhile).
+ Map<String, Object> metadata = new HashMap<String, Object>();
+ if (config.enableMetaData()) {
+ metadata = DatacatUtilities.createMetadata(file);
+ }
+
+ // Register file in the catalog.
+ DatacatUtilities.addFile(datacatClient, config.datacatFolder(), file, metadata);
+ }
+ }
+ }
+
+ /**
+ * Walk the directory tree to find EVIO files for the runs that are being processed in the job.
+ *
+ * @param visitor the file visitor
+ */
+ private void walk(final DatacatFileVisitor visitor) {
+ if (config.timestamp() != null) {
+ // Date filter from timestamp.
+ visitor.addFilter(new DateFileFilter(config.timestamp()));
+ LOGGER.config("added date filter with time stamp " + config.timestamp());
+ }
+
+ // Is the accept run list not empty? (Empty means accept all runs.)
+ if (!config.acceptRuns().isEmpty()) {
+ // List of run numbers to accept.
+ visitor.addFilter(new RunFilter(config.acceptRuns()));
+ LOGGER.config("added run number filter");
+ } else {
+ LOGGER.config("no run number filter will be used");
+ }
+
+ try {
+ // Walk the file tree from the root directory.
+ final EnumSet<FileVisitOption> options = EnumSet.noneOf(FileVisitOption.class);
+ Files.walkFileTree(config.rootDir().toPath(), options, config.maxDepth(), visitor);
+ } catch (final IOException e) {
+ throw new RuntimeException("Error while walking the directory tree.", e);
+ }
+ }
+}
Added: java/trunk/crawler/src/main/java/org/hps/crawler/DatacatUtilities.java
=============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/DatacatUtilities.java (added)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/DatacatUtilities.java Wed Sep 23 07:49:16 2015
@@ -0,0 +1,134 @@
+package org.hps.crawler;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.hps.datacat.client.DatacatClient;
+import org.hps.datacat.client.DatasetDataType;
+import org.hps.datacat.client.DatasetFileFormat;
+import org.hps.datacat.client.DatasetSite;
+
+/**
+ * Datacat utilities for the crawler.
+ *
+ * @author Jeremy McCormick, SLAC
+ */
+class DatacatUtilities {
+
+ static Map<String, DatasetFileFormat> formatMap = new HashMap<String, DatasetFileFormat>();
+ static {
+ for (final DatasetFileFormat format : DatasetFileFormat.values()) {
+ formatMap.put(format.extension(), format);
+ }
+ }
+
+ static void addFile(final DatacatClient datacatClient, final String folder, final File file,
+ final Map<String, Object> metadata) {
+ final DatasetFileFormat fileFormat = DatacatUtilities.getFileFormat(file);
+ final DatasetDataType dataType = DatacatUtilities.getDataType(file);
+ DatacatUtilities.addFile(datacatClient, folder, file, metadata, fileFormat, dataType, DatasetSite.SLAC);
+ }
+
+ /**
+ * Add a file to the data catalog.
+ *
+ * @param client the data catalog client
+ * @param folder the folder name e.g. "data/raw"
+ * @param fileMetadata the file's meta data including the path
+ * @param fileFormat the file's format (EVIO, LCIO etc.)
+ * @param dataType the file's data type (RAW, RECON, etc.)
+ * @return the HTTP response code
+ */
+ static int addFile(final DatacatClient client, final String folder, final File file,
+ final Map<String, Object> metadata, final DatasetFileFormat fileFormat, final DatasetDataType dataType,
+ final DatasetSite site) {
+
+ // Add the dataset to the data catalog using the REST API.
+ final int response = client.addDataset(folder, dataType, file.getAbsolutePath(), site, fileFormat,
+ file.getName(), metadata);
+
+ return response;
+ }
+
+ static Map<String, Object> createMetadata(final File file) {
+ final DatasetFileFormat fileFormat = DatacatUtilities.getFileFormat(file);
+ final DatasetDataType dataType = DatacatUtilities.getDataType(file);
+ final FileMetadataReader reader = DatacatUtilities.getFileMetaDataReader(fileFormat, dataType);
+ if (reader == null) {
+ throw new RuntimeException("No metadata reader found for format " + fileFormat.name() + " and type " + dataType.name() + ".");
+ }
+ Map<String, Object> metadata;
+ try {
+ metadata = reader.getMetadata(file);
+ } catch (final IOException e) {
+ throw new RuntimeException(e);
+ }
+ return metadata;
+ }
+
+ static DatasetDataType getDataType(final File file) {
+ final DatasetFileFormat fileFormat = getFileFormat(file);
+ DatasetDataType dataType = null;
+ if (fileFormat == null) {
+ throw new IllegalArgumentException("File has unknown format: " + file.getAbsolutePath());
+ }
+ if (fileFormat.equals(DatasetFileFormat.EVIO)) {
+ dataType = DatasetDataType.RAW;
+ } else if (fileFormat.equals(DatasetFileFormat.LCIO)) {
+ dataType = DatasetDataType.RECON;
+ } else if (fileFormat.equals(DatasetFileFormat.ROOT)) {
+ // FIXME: This should probably open the file and determine what it contains.
+ if (file.getName().contains("_dqm")) {
+ dataType = DatasetDataType.DQM;
+ } else if (file.getName().contains("_dst")) {
+ dataType = DatasetDataType.DST;
+ }
+ } else if (fileFormat.equals(DatasetFileFormat.AIDA)) {
+ dataType = DatasetDataType.DQM;
+ }
+ if (dataType == null) {
+ throw new IllegalArgumentException("Could not determine data type for format: " + fileFormat.name());
+ }
+ return dataType;
+ }
+
+ static DatasetFileFormat getFileFormat(final File pathname) {
+ String name = pathname.getName();
+ if (name.contains(DatasetFileFormat.EVIO.extension()) && !name.endsWith(DatasetFileFormat.EVIO.extension())) {
+ name = stripEvioFileNumber(name);
+ }
+ final String extension = name.substring(name.lastIndexOf(".") + 1);
+ return formatMap.get(extension);
+ }
+
+ static FileMetadataReader getFileMetaDataReader(final DatasetFileFormat fileFormat, final DatasetDataType dataType) {
+ FileMetadataReader reader = null;
+ if (fileFormat.equals(DatasetFileFormat.LCIO)) {
+ reader = new LcioMetadataReader();
+ } else if (fileFormat.equals(DatasetFileFormat.EVIO)) {
+ reader = new EvioMetadataReader();
+ } else if (fileFormat.equals(DatasetFileFormat.ROOT) && dataType.equals(DatasetDataType.DST)) {
+ reader = new RootDstMetadataReader();
+ }
+ return reader;
+ }
+
+ static String getFolder(final String rootDir, final File file) {
+ String stripDir = rootDir;
+ if (!stripDir.endsWith("/")) {
+ stripDir += "/";
+ }
+ final String folder = file.getParentFile().getPath().replace(stripDir, "");
+ return folder;
+ }
+
+ static String stripEvioFileNumber(final String name) {
+ String strippedName = name;
+ if (!name.endsWith(DatasetFileFormat.EVIO.extension())) {
+ strippedName = name.substring(0, name.lastIndexOf("."));
+ }
+ return strippedName;
+ }
+}
Added: java/trunk/crawler/src/main/java/org/hps/crawler/EvioMetadataReader.java
=============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/EvioMetadataReader.java (added)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/EvioMetadataReader.java Wed Sep 23 07:49:16 2015
@@ -0,0 +1,148 @@
+package org.hps.crawler;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.hps.record.evio.EventTagConstant;
+import org.hps.record.evio.EvioEventUtilities;
+import org.hps.record.evio.EvioFileUtilities;
+import org.jlab.coda.jevio.EvioEvent;
+import org.jlab.coda.jevio.EvioException;
+import org.jlab.coda.jevio.EvioReader;
+import org.lcsim.util.log.DefaultLogFormatter;
+import org.lcsim.util.log.LogUtil;
+
+/**
+ * Reads metadata from EVIO files.
+ *
+ * @author Jeremy McCormick, SLAC
+ */
+public class EvioMetadataReader implements FileMetadataReader {
+
+ private static Logger LOGGER = LogUtil.create(EvioMetadataReader.class, new DefaultLogFormatter(), Level.ALL);
+
+ /**
+ * Get the EVIO file metadata.
+ *
+ * @param file the EVIO file
+ * @return the metadata map of key and value pairs
+ */
+ @Override
+ public Map<String, Object> getMetadata(File file) throws IOException {
+
+ Date startDate = null;
+ Date endDate = null;
+ int badEventCount = 0;
+ int eventCount = 0;
+ int byteCount = 0;
+ boolean hasPrestart = false;
+ boolean hasEnd = false;
+ int[] eventIdData = null;
+ Integer run = null;
+ Integer endEvent = null;
+ Integer startEvent = null;
+ Long lastTimestamp = null;
+
+ EvioReader evioReader = null;
+ try {
+ evioReader = EvioFileUtilities.open(file, false);
+ } catch (EvioException e) {
+ throw new IOException(e);
+ }
+
+ int fileNumber = EvioFileUtilities.getSequenceFromName(file);
+
+ EvioEvent evioEvent = null;
+
+ while (true) {
+ try {
+ evioEvent = evioReader.parseNextEvent();
+ } catch (IOException | EvioException e) {
+ ++badEventCount;
+ continue;
+ }
+ if (evioEvent == null) {
+ break;
+ }
+ byteCount += evioEvent.getTotalBytes();
+ if (EventTagConstant.PRESTART.equals(evioEvent)) {
+ LOGGER.info("found PRESTART");
+ hasPrestart = true;
+ final int[] controlEventData = EvioEventUtilities.getControlEventData(evioEvent);
+ final long timestamp = controlEventData[0] * 1000L;
+ startDate = new Date(timestamp);
+ LOGGER.info("set start date to " + startDate + " from PRESTART");
+ if (run == null) {
+ run = controlEventData[1];
+ LOGGER.info("set run to " + run);
+ }
+ } else if (EventTagConstant.END.equals(evioEvent)) {
+ LOGGER.info("found END event");
+ hasEnd = true;
+ final int[] controlEventData = EvioEventUtilities.getControlEventData(evioEvent);
+ final long timestamp = controlEventData[0] * 1000L;
+ endDate = new Date(timestamp);
+ LOGGER.info("set end date to " + endDate);
+ if (run == null) {
+ run = controlEventData[1];
+ LOGGER.info("set run to " + run);
+ }
+ } else if (EvioEventUtilities.isPhysicsEvent(evioEvent)) {
+ final int[] headBankData = EvioEventUtilities.getHeadBankData(evioEvent);
+ if (startDate == null) {
+ if (headBankData[3] != 0) {
+ startDate = new Date(headBankData[3] * 1000L);
+ LOGGER.info("set start date to " + startDate + " from physics event");
+ }
+ }
+ if (run == null) {
+ run = headBankData[1];
+ LOGGER.info("set run to " + run + " from physics event");
+ }
+ eventIdData = EvioEventUtilities.getEventIdData(evioEvent);
+ if (startEvent == null) {
+ startEvent = eventIdData[0];
+ LOGGER.info("set start event " + startEvent);
+ }
+ if (headBankData[3] != 0) {
+ lastTimestamp = headBankData[3] * 1000L;
+ }
+ ++eventCount;
+ }
+ }
+
+ // Set end date from last valid timestamp.
+ if (endDate == null) {
+ endDate = new Date(lastTimestamp);
+ LOGGER.info("set end date to " + endDate + " from last timestamp " + lastTimestamp);
+ }
+
+ // Set end event number.
+ if (eventIdData != null) {
+ endEvent = eventIdData[0];
+ LOGGER.info("set end event " + endEvent);
+ }
+
+ Map<String, Object> metaDataMap = new HashMap<String, Object>();
+
+ metaDataMap.put("runMin", run);
+ metaDataMap.put("runMax", run);
+ metaDataMap.put("eventCount", eventCount);
+ metaDataMap.put("size", byteCount);
+ metaDataMap.put("fileNumber", fileNumber);
+ metaDataMap.put("badEventCount", badEventCount);
+ metaDataMap.put("endTimestamp", endDate.getTime());
+ metaDataMap.put("startTimestamp", startDate.getTime());
+ metaDataMap.put("startEvent", startEvent);
+ metaDataMap.put("endEvent", endEvent);
+ metaDataMap.put("hasEnd", hasEnd ? 1 : 0);
+ metaDataMap.put("hasPrestart", hasPrestart ? 1 : 0);
+
+ return metaDataMap;
+ }
+}
Added: java/trunk/crawler/src/main/java/org/hps/crawler/FileFormatFilter.java
=============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/FileFormatFilter.java (added)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/FileFormatFilter.java Wed Sep 23 07:49:16 2015
@@ -0,0 +1,64 @@
+package org.hps.crawler;
+
+import java.io.File;
+import java.io.FileFilter;
+import java.util.Set;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.hps.datacat.client.DatasetFileFormat;
+import org.lcsim.util.log.DefaultLogFormatter;
+import org.lcsim.util.log.LogUtil;
+
+/**
+ * Filter files on their format.
+ * <p>
+ * Only files matching the format will be accepted by the file visitor.
+ *
+ * @author Jeremy McCormick, SLAC
+ */
+public class FileFormatFilter implements FileFilter {
+
+ /**
+ * Setup logger.
+ */
+ private static final Logger LOGGER = LogUtil.create(FileFormatFilter.class, new DefaultLogFormatter(), Level.ALL);
+
+ /**
+ * The file format.
+ */
+ private Set<DatasetFileFormat> formats;
+
+ /**
+ * Create a new filter with the given format.
+ *
+ * @param format the file format
+ */
+ FileFormatFilter(Set<DatasetFileFormat> formats) {
+ if (formats == null) {
+ throw new IllegalArgumentException("The formats collection is null.");
+ }
+ if (formats.isEmpty()) {
+ throw new IllegalArgumentException("The formats collection is empty.");
+ }
+ this.formats = formats;
+ }
+
+ /**
+ * Returns <code>true</code> if the file should be accepted, e.g. it matches the filer's format.
+ *
+ * @param pathname the file's full path
+ */
+ @Override
+ public boolean accept(File pathname) {
+ LOGGER.info(pathname.getPath());
+ DatasetFileFormat fileFormat = DatacatUtilities.getFileFormat(pathname);
+ if (fileFormat != null) {
+ LOGGER.info("file " + pathname.getPath() + " has format " + fileFormat.name());
+ return formats.contains(fileFormat);
+ } else {
+ LOGGER.info("rejected file " + pathname.getPath() + " with unknown format");
+ return false;
+ }
+ }
+}
Added: java/trunk/crawler/src/main/java/org/hps/crawler/FileMetadataReader.java
=============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/FileMetadataReader.java (added)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/FileMetadataReader.java Wed Sep 23 07:49:16 2015
@@ -0,0 +1,11 @@
+package org.hps.crawler;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Map;
+
+
+public interface FileMetadataReader {
+
+ public Map<String, Object> getMetadata(File file) throws IOException;
+}
Added: java/trunk/crawler/src/main/java/org/hps/crawler/FileSet.java
=============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/FileSet.java (added)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/FileSet.java Wed Sep 23 07:49:16 2015
@@ -0,0 +1,27 @@
+package org.hps.crawler;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+import org.hps.datacat.client.DatasetFileFormat;
+
+/**
+ * Map of file format to a list of files.
+ *
+ * @author Jeremy McCormick, SLAC
+ */
+public class FileSet extends HashMap<DatasetFileFormat, List<File>> {
+
+ public List<File> get(DatasetFileFormat format) {
+ if (super.get(format) == null) {
+ this.put(format, new ArrayList<File>());
+ }
+ return super.get(format);
+ }
+
+ public void addFile(DatasetFileFormat format, File file) {
+ this.get(format).add(file);
+ }
+}
Added: java/trunk/crawler/src/main/java/org/hps/crawler/LcioMetadataReader.java
=============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/LcioMetadataReader.java (added)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/LcioMetadataReader.java Wed Sep 23 07:49:16 2015
@@ -0,0 +1,70 @@
+package org.hps.crawler;
+
+import java.io.EOFException;
+import java.io.File;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.lcsim.conditions.ConditionsManager;
+import org.lcsim.conditions.ConditionsManagerImplementation;
+import org.lcsim.conditions.ConditionsReader;
+import org.lcsim.event.EventHeader;
+import org.lcsim.lcio.LCIOReader;
+import org.lcsim.util.loop.DummyConditionsConverter;
+import org.lcsim.util.loop.DummyDetector;
+
+/**
+ * Reads metadata from LCIO files with reconstructed data.
+ *
+ * @author Jeremy McCormick, SLAC
+ */
+public class LcioMetadataReader implements FileMetadataReader {
+
+ /*
+ * Setup the conditions system in dummy mode.
+ */
+ static {
+ ConditionsManager conditionsManager = ConditionsManager.defaultInstance();
+ ConditionsReader dummyReader = ConditionsReader.createDummy();
+ ((ConditionsManagerImplementation) conditionsManager).setConditionsReader(dummyReader, "DUMMY");
+ DummyDetector detector = new DummyDetector("DUMMY");
+ conditionsManager.registerConditionsConverter(new DummyConditionsConverter(detector));
+ }
+
+ /**
+ * Get the metadata for the LCIO file.
+ *
+ * @param file the LCIO file
+ * @return the metadata map with key and value pairs
+ */
+ @Override
+ public Map<String, Object> getMetadata(File file) throws IOException {
+ Map<String, Object> metaData = new HashMap<String, Object>();
+ LCIOReader reader = null;
+ try {
+ reader = new LCIOReader(file);
+ EventHeader eventHeader = null;
+ int eventCount = 0;
+ Integer run = null;
+ try {
+ while((eventHeader = reader.read()) != null) {
+ if (run == null) {
+ run = eventHeader.getRunNumber();
+ }
+ eventCount++;
+ }
+ } catch (EOFException e) {
+ e.printStackTrace();
+ }
+ metaData.put("eventCount", eventCount);
+ metaData.put("runMin", run);
+ metaData.put("runMax", run);
+ } finally {
+ if (reader != null) {
+ reader.close();
+ }
+ }
+ return metaData;
+ }
+}
Added: java/trunk/crawler/src/main/java/org/hps/crawler/RootDstMetadataReader.java
=============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/RootDstMetadataReader.java (added)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/RootDstMetadataReader.java Wed Sep 23 07:49:16 2015
@@ -0,0 +1,65 @@
+package org.hps.crawler;
+
+import hep.io.root.RootClassNotFound;
+import hep.io.root.RootFileReader;
+import hep.io.root.interfaces.TBranch;
+import hep.io.root.interfaces.TLeafElement;
+import hep.io.root.interfaces.TObjArray;
+import hep.io.root.interfaces.TTree;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * This is a very simple metadata reader for ROOT DST files.
+ * <p>
+ * It currently only sets the standard metadata for event count and run number.
+ *
+ * @author Jeremy McCormick, SLAC
+ */
+public class RootDstMetadataReader implements FileMetadataReader {
+
+ /**
+ * Get the metadata for a ROOT DST file.
+ *
+ * @return the metadata for a ROOT DST file
+ */
+ public Map<String, Object> getMetadata(File file) throws IOException {
+ Map<String, Object> metadata = new HashMap<String, Object>();
+ RootFileReader rootReader = null;
+ long eventCount = 0;
+ int runMin = 0;
+ int runMax = 0;
+ long size = 0;
+ try {
+ rootReader = new RootFileReader(file.getAbsolutePath());
+ TTree tree = (TTree) rootReader.get("HPS_Event");
+ //TBranch branch = tree.getBranch("Event");
+ eventCount = tree.getEntries();
+ size = tree.getTotBytes();
+ TObjArray leaves = tree.getLeaves();
+
+ for (Object object : leaves) {
+ TLeafElement leaf = (TLeafElement) object;
+ if ("run_number".equals(leaf.getName())) {
+ runMin = (int) leaf.getWrappedValue(0);
+ runMax = (int) leaf.getWrappedValue(0);
+ break;
+ }
+ }
+ } catch (IOException | RootClassNotFound e) {
+ throw new IOException(e);
+ } finally {
+ if (rootReader != null) {
+ rootReader.close();
+ }
+ }
+ metadata.put("eventCount", eventCount);
+ metadata.put("runMin", runMin);
+ metadata.put("runMax", runMax);
+ metadata.put("size", size);
+ return metadata;
+ }
+}
Added: java/trunk/run-database/src/main/java/org/hps/run/database/RunProcessor.java
=============================================================================
--- java/trunk/run-database/src/main/java/org/hps/run/database/RunProcessor.java (added)
+++ java/trunk/run-database/src/main/java/org/hps/run/database/RunProcessor.java Wed Sep 23 07:49:16 2015
@@ -0,0 +1,228 @@
+package org.hps.run.database;
+
+import java.io.File;
+import java.util.Collections;
+import java.util.List;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.hps.datacat.client.DatasetFileFormat;
+import org.hps.record.epics.EpicsRunProcessor;
+import org.hps.record.evio.EvioFileMetadata;
+import org.hps.record.evio.EvioFileMetadataAdapter;
+import org.hps.record.evio.EvioFileSequenceComparator;
+import org.hps.record.evio.EvioFileSource;
+import org.hps.record.evio.EvioLoop;
+import org.hps.record.scalers.ScalersEvioProcessor;
+import org.hps.record.triggerbank.TiTimeOffsetEvioProcessor;
+import org.hps.record.triggerbank.TriggerConfig;
+import org.hps.record.triggerbank.TriggerConfigVariable;
+import org.lcsim.util.log.DefaultLogFormatter;
+import org.lcsim.util.log.LogUtil;
+
+/**
+ * Processes EVIO files from a run and extracts meta data for updating the run database.
+ *
+ * @author Jeremy McCormick, SLAC
+ */
+public final class RunProcessor {
+
+ /**
+ * Setup logger.
+ */
+ private static final Logger LOGGER = LogUtil.create(RunProcessor.class, new DefaultLogFormatter(), Level.FINE);
+
+ /**
+ * Processor for extracting EPICS information.
+ */
+ private EpicsRunProcessor epicsProcessor;
+
+ /**
+ * The data source with the list of EVIO files to process.
+ */
+ private final EvioFileSource evioFileSource;
+
+ /**
+ * The EVIO event processing loop.
+ */
+ private final EvioLoop evioLoop = new EvioLoop();
+
+ /**
+ * Processor for extracting scaler data.
+ */
+ private ScalersEvioProcessor scalersProcessor;
+
+ /**
+ * Processor for extracting TI time offset.
+ */
+ private TiTimeOffsetEvioProcessor triggerTimeProcessor;
+
+ /**
+ * Record loop adapter for getting file metadata.
+ */
+ private final EvioFileMetadataAdapter metadataAdapter = new EvioFileMetadataAdapter();
+
+ /**
+ * The run summary for the run.
+ */
+ private RunSummaryImpl runSummary;
+
+ /**
+ * Create a run processor.
+ *
+ * @param runSummary the run summary object for the run
+ * @return the run processor
+ */
+ public RunProcessor(RunSummaryImpl runSummary) {
+
+ this.runSummary = runSummary;
+
+ List<File> evioFiles = runSummary.getFiles(DatasetFileFormat.EVIO);
+ if (evioFiles == null || evioFiles.isEmpty()) {
+ throw new IllegalArgumentException("No EVIO files found in file set.");
+ }
+
+ // Sort the list of EVIO files.
+ Collections.sort(runSummary.getFiles(DatasetFileFormat.EVIO), new EvioFileSequenceComparator());
+
+ // Setup record loop.
+ evioFileSource = new EvioFileSource(evioFiles);
+ evioLoop.setEvioFileSource(evioFileSource);
+
+ // Add file metadata processor.
+ evioLoop.addRecordListener(metadataAdapter);
+ evioLoop.addLoopListener(metadataAdapter);
+ }
+
+ public void addEpicsProcessor() {
+ // Add EPICS processor.
+ this.epicsProcessor = new EpicsRunProcessor();
+ evioLoop.addEvioEventProcessor(epicsProcessor);
+ }
+
+ public void addScalerProcessor() {
+ // Add scaler data processor.
+ scalersProcessor = new ScalersEvioProcessor();
+ scalersProcessor.setResetEveryEvent(false);
+ evioLoop.addEvioEventProcessor(scalersProcessor);
+ }
+
+ public void addTriggerTimeProcessor() {
+ // Add processor for extracting TI time offset.
+ triggerTimeProcessor = new TiTimeOffsetEvioProcessor();
+ evioLoop.addEvioEventProcessor(triggerTimeProcessor);
+ }
+
+ /**
+ * Extract meta data from first file in run.
+ */
+ private void processFirstFile() {
+ final EvioFileMetadata metadata = metadataAdapter.getEvioFileMetadata().get(0);
+ if (metadata == null) {
+ throw new IllegalStateException("No meta data exists for first file.");
+ }
+ LOGGER.info("first file metadata: " + metadata.toString());
+ if (metadata.getStartDate() == null) {
+ throw new IllegalStateException("The start date is not set in the metadata.");
+ }
+ LOGGER.info("setting unix start time to " + metadata.getStartDate().getTime() + " from meta data");
+ runSummary.setStartDate(metadata.getStartDate());
+ }
+
+ /**
+ * Extract meta data from last file in run.
+ */
+ private void processLastFile() {
+ LOGGER.info("looking for " + runSummary.getEvioFiles().get(runSummary.getEvioFiles().size() - 1).getPath() + " metadata");
+ LOGGER.getHandlers()[0].flush();
+ final EvioFileMetadata metadata = this.metadataAdapter.getEvioFileMetadata().get(this.metadataAdapter.getEvioFileMetadata().size() - 1);
+ if (metadata == null) {
+ throw new IllegalStateException("Failed to find metadata for last file.");
+ }
+ LOGGER.info("last file metadata: " + metadata.toString());
+ if (metadata.getEndDate() == null) {
+ throw new IllegalStateException("The end date is not set in the metadata.");
+ }
+ LOGGER.info("setting unix end time to " + metadata.getEndDate().getTime() + " from meta data");
+ runSummary.setEndDate(metadata.getEndDate());
+ LOGGER.info("setting has END to " + metadata.hasEnd());
+ runSummary.setEndOkay(metadata.hasEnd());
+ }
+
+ /**
+ * Process the run by executing the registered {@link org.hps.record.evio.EvioEventProcessor}s and extracting the
+ * start and end dates.
+ * <p>
+ * This method will also execute file caching from MSS, if enabled by the {@link #useFileCache} option.
+ *
+ * @throws Exception if there is an error processing a file
+ */
+ public void processRun() throws Exception {
+
+ LOGGER.info("processing " + this.runSummary.getEvioFiles().size() + " files from run "
+ + this.runSummary.getRun());
+
+ // Run processors over all files.
+ LOGGER.info("looping over all events");
+ evioLoop.loop(-1);
+
+ LOGGER.info("got " + metadataAdapter.getEvioFileMetadata().size() + " metadata objects from loop");
+ LOGGER.getHandlers()[0].flush();
+
+ // Set start date from first file.
+ LOGGER.info("processing first file");
+ this.processFirstFile();
+
+ // Set end date from last file.
+ LOGGER.info("processing last file");
+ this.processLastFile();
+
+ // Update run summary from processors.
+ LOGGER.info("updating run summary");
+ this.updateRunSummary();
+
+ LOGGER.info("run processor done with run " + this.runSummary.getRun());
+ }
+
+ /**
+ * Update the current run summary by copying data to it from the EVIO processors and the event loop.
+ */
+ private void updateRunSummary() {
+
+ // Set total number of events from the event loop.
+ LOGGER.info("setting total events " + evioLoop.getTotalCountableConsumed());
+ runSummary.setTotalEvents((int) evioLoop.getTotalCountableConsumed());
+
+ if (scalersProcessor != null) {
+ // Add scaler data from the scalers EVIO processor.
+ LOGGER.info("adding " + this.scalersProcessor.getScalerData().size() + " scaler data objects");
+ runSummary.setScalerData(this.scalersProcessor.getScalerData());
+ }
+
+ if (epicsProcessor != null) {
+ // Add EPICS data from the EPICS EVIO processor.
+ LOGGER.info("adding " + this.epicsProcessor.getEpicsData().size() + " EPICS data objects");
+ runSummary.setEpicsData(this.epicsProcessor.getEpicsData());
+ }
+
+ if (triggerTimeProcessor != null) {
+ // Add trigger config from the trigger time processor.
+ LOGGER.info("updating trigger config");
+ final TriggerConfig triggerConfig = new TriggerConfig();
+ this.triggerTimeProcessor.updateTriggerConfig(triggerConfig);
+ LOGGER.info("tiTimeOffset: " + triggerConfig.get(TriggerConfigVariable.TI_TIME_OFFSET));
+ runSummary.setTriggerConfig(triggerConfig);
+ }
+
+ LOGGER.getHandlers()[0].flush();
+ }
+
+ /**
+ * Get list of metadata created by processing the files.
+ *
+ * @return the list of metadata
+ */
+ public List<EvioFileMetadata> getEvioFileMetaData() {
+ return this.metadataAdapter.getEvioFileMetadata();
+ }
+}
Modified: java/trunk/run-database/src/main/java/org/hps/run/database/RunSummary.java
=============================================================================
--- java/trunk/run-database/src/main/java/org/hps/run/database/RunSummary.java (original)
+++ java/trunk/run-database/src/main/java/org/hps/run/database/RunSummary.java Wed Sep 23 07:49:16 2015
@@ -4,6 +4,7 @@
import java.util.Date;
import java.util.List;
+import org.hps.datacat.client.DatasetFileFormat;
import org.hps.record.epics.EpicsData;
import org.hps.record.scalers.ScalerData;
import org.hps.record.triggerbank.TriggerConfig;
@@ -121,7 +122,7 @@
int getTotalEvents();
/**
- * Get the total number of files for this run.
+ * Get the total number of EVIO files for this run.
*
* @return the total number of files for this run
*/
@@ -140,4 +141,13 @@
* @return the date when this run record was last updated
*/
Date getUpdated();
+
+ /**
+ * Get a list of files in the run by format (EVIO, LCIO etc.).
+ *
+ * @param format the file format
+ * @return the list of files with the given format
+ */
+ // FIXME: This should be removed from the run summary interface.
+ public List<File> getFiles(DatasetFileFormat format);
}
Modified: java/trunk/run-database/src/main/java/org/hps/run/database/RunSummaryDaoImpl.java
=============================================================================
--- java/trunk/run-database/src/main/java/org/hps/run/database/RunSummaryDaoImpl.java (original)
+++ java/trunk/run-database/src/main/java/org/hps/run/database/RunSummaryDaoImpl.java Wed Sep 23 07:49:16 2015
@@ -371,21 +371,36 @@
@Override
public void insertFullRunSummary(final RunSummary runSummary) {
+ if (runSummary == null) {
+ throw new IllegalArgumentException("The run summary is null.");
+ }
+
// Insert basic run log info.
this.insertRunSummary(runSummary);
// Insert EPICS data.
- LOGGER.info("inserting " + runSummary.getEpicsData().size() + " EPICS records");
- epicsDataDao.insertEpicsData(runSummary.getEpicsData());
+ if (runSummary.getEpicsData() != null && !runSummary.getEpicsData().isEmpty()) {
+ LOGGER.info("inserting " + runSummary.getEpicsData().size() + " EPICS records");
+ epicsDataDao.insertEpicsData(runSummary.getEpicsData());
+ } else {
+ LOGGER.warning("no EPICS data to insert");
+ }
// Insert scaler data.
- LOGGER.info("inserting " + runSummary.getScalerData().size() + " scaler data records");
- scalerDataDao.insertScalerData(runSummary.getScalerData(), runSummary.getRun());
+ if (runSummary.getScalerData() != null && !runSummary.getScalerData().isEmpty()) {
+ LOGGER.info("inserting " + runSummary.getScalerData().size() + " scaler data records");
+ scalerDataDao.insertScalerData(runSummary.getScalerData(), runSummary.getRun());
+ } else {
+ LOGGER.warning("no scaler data to insert");
+ }
// Insert trigger config.
- LOGGER.info("inserting " + runSummary.getTriggerConfig().size() + " trigger config variables");
- triggerConfigIntDao.insertTriggerConfig(runSummary.getTriggerConfig(), runSummary.getRun());
-
+ if (runSummary.getTriggerConfig() != null && !runSummary.getTriggerConfig().isEmpty()) {
+ LOGGER.info("inserting " + runSummary.getTriggerConfig().size() + " trigger config variables");
+ triggerConfigIntDao.insertTriggerConfig(runSummary.getTriggerConfig(), runSummary.getRun());
+ } else {
+ LOGGER.warning("no trigger config to insert");
+ }
}
/**
@@ -440,7 +455,7 @@
runSummary.setScalerData(scalerDataDao.getScalerData(run));
// Read trigger config.
- runSummary.setTriggerConfigInt(triggerConfigIntDao.getTriggerConfig(run));
+ runSummary.setTriggerConfig(triggerConfigIntDao.getTriggerConfig(run));
return runSummary;
}
Modified: java/trunk/run-database/src/main/java/org/hps/run/database/RunSummaryImpl.java
=============================================================================
--- java/trunk/run-database/src/main/java/org/hps/run/database/RunSummaryImpl.java (original)
+++ java/trunk/run-database/src/main/java/org/hps/run/database/RunSummaryImpl.java Wed Sep 23 07:49:16 2015
@@ -6,9 +6,12 @@
import java.util.ArrayList;
import java.util.Date;
import java.util.GregorianCalendar;
+import java.util.HashMap;
import java.util.List;
+import java.util.Map;
import java.util.TimeZone;
+import org.hps.datacat.client.DatasetFileFormat;
import org.hps.record.epics.EpicsData;
import org.hps.record.scalers.ScalerData;
import org.hps.record.triggerbank.TriggerConfig;
@@ -54,11 +57,6 @@
private List<EpicsData> epicsDataList;
/**
- * The list of EVIO files in the run.
- */
- private List<File> evioFileList = new ArrayList<File>();
-
- /**
* The run number.
*/
private final int run;
@@ -76,7 +74,7 @@
/**
* The trigger data for the run.
*/
- private TriggerConfig triggerConfigInt;
+ private TriggerConfig triggerConfig;
/**
* Start date of run.
@@ -97,6 +95,11 @@
* Date when the run record was last updated.
*/
private Date updated;
+
+ /**
+ * Lists of files indexed by their format.
+ */
+ private Map<DatasetFileFormat, List<File>> fileMap = new HashMap<DatasetFileFormat, List<File>>();
/**
* Create a run summary.
@@ -112,8 +115,8 @@
*
* @param file the file to add
*/
- public void addFile(final File file) {
- this.evioFileList.add(file);
+ public void addEvioFile(final File file) {
+ this.getEvioFiles().add(file);
}
/**
@@ -171,7 +174,7 @@
* @return the list of EVIO files in this run
*/
public List<File> getEvioFiles() {
- return this.evioFileList;
+ return this.fileMap.get(DatasetFileFormat.EVIO);
}
/**
@@ -208,7 +211,7 @@
* @return the trigger config of this run
*/
public TriggerConfig getTriggerConfig() {
- return triggerConfigInt;
+ return triggerConfig;
}
/**
@@ -271,7 +274,7 @@
*
* @param startDate the start date
*/
- public void setEndDate(final Date endDate) {
+ void setEndDate(final Date endDate) {
this.endDate = endDate;
}
@@ -280,7 +283,7 @@
*
* @param endOkay <code>true</code> if end is okay
*/
- public void setEndOkay(final boolean endOkay) {
+ void setEndOkay(final boolean endOkay) {
this.endOkay = endOkay;
}
@@ -289,26 +292,17 @@
*
* @param epics the EPICS data for the run
*/
- public void setEpicsData(final List<EpicsData> epicsDataList) {
+ void setEpicsData(final List<EpicsData> epicsDataList) {
this.epicsDataList = epicsDataList;
}
-
- /**
- * Set the list of EVIO files for the run.
- *
- * @param evioFileList the list of EVIO files for the run
- */
- public void setEvioFiles(final List<File> evioFileList) {
- this.evioFileList = evioFileList;
- }
-
+
/**
* Set whether the run was "okay" meaning the data is usable for physics
* analysis.
*
* @param runOkay <code>true</code> if the run is okay
*/
- public void setRunOkay(final boolean runOkay) {
+ void setRunOkay(final boolean runOkay) {
this.runOkay = runOkay;
}
@@ -317,7 +311,7 @@
*
* @param scalerData the scaler data
*/
- public void setScalerData(final List<ScalerData> scalerDataList) {
+ void setScalerData(final List<ScalerData> scalerDataList) {
this.scalerDataList = scalerDataList;
}
@@ -326,8 +320,8 @@
*
* @param triggerConfig the trigger config
*/
- public void setTriggerConfigInt(final TriggerConfig triggerConfigInt) {
- this.triggerConfigInt = triggerConfigInt;
+ void setTriggerConfig(final TriggerConfig triggerConfig) {
+ this.triggerConfig = triggerConfig;
}
/**
@@ -335,7 +329,7 @@
*
* @param startDate the start date
*/
- public void setStartDate(final Date startDate) {
+ void setStartDate(final Date startDate) {
this.startDate = startDate;
}
@@ -344,7 +338,7 @@
*
* @param totalEvents the total number of physics events in the run
*/
- public void setTotalEvents(final int totalEvents) {
+ void setTotalEvents(final int totalEvents) {
this.totalEvents = totalEvents;
}
@@ -353,7 +347,7 @@
*
* @param totalFiles the total number of EVIO files in the run
*/
- public void setTotalFiles(final int totalFiles) {
+ void setTotalFiles(final int totalFiles) {
this.totalFiles = totalFiles;
}
@@ -362,8 +356,37 @@
*
* @param updated the date when the run record was last updated
*/
- public void setUpdated(final Date updated) {
+ void setUpdated(final Date updated) {
this.updated = updated;
+ }
+
+ /**
+ * Add a file associated with this run.
+ * <p>
+ * This is public because it is called by the file crawler.
+ *
+ * @param file a file associated with this run
+ */
+ // FIXME: This should be removed from the run summary interface.
+ public void addFile(DatasetFileFormat format, File file) {
+ List<File> files = this.fileMap.get(file);
+ if (files == null) {
+ this.fileMap.put(format, new ArrayList<File>());
+ }
+ this.fileMap.get(format).add(file);
+ }
+
+ /**
+ * Get a list of files in the run by format (EVIO, LCIO etc.).
+ *
+ * @param format the file format
+ * @return the list of files with the given format
+ */
+ public List<File> getFiles(DatasetFileFormat format) {
+ if (!this.fileMap.containsKey(format)) {
+ this.fileMap.put(format, new ArrayList<File>());
+ }
+ return this.fileMap.get(format);
}
/**
|