Author: [log in to unmask] Date: Wed Sep 23 07:49:16 2015 New Revision: 3677 Log: Updates to crawler and run database. Added: java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerFeature.java java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerFileUtilities.java java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerFileVisitor.java - copied, changed from r3655, java/trunk/crawler/src/main/java/org/hps/crawler/EvioFileVisitor.java java/trunk/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java java/trunk/crawler/src/main/java/org/hps/crawler/DatacatUtilities.java java/trunk/crawler/src/main/java/org/hps/crawler/EvioMetadataReader.java java/trunk/crawler/src/main/java/org/hps/crawler/FileFormatFilter.java java/trunk/crawler/src/main/java/org/hps/crawler/FileMetadataReader.java java/trunk/crawler/src/main/java/org/hps/crawler/FileSet.java java/trunk/crawler/src/main/java/org/hps/crawler/LcioMetadataReader.java java/trunk/crawler/src/main/java/org/hps/crawler/RootDstMetadataReader.java java/trunk/run-database/src/main/java/org/hps/run/database/RunProcessor.java Removed: java/trunk/crawler/src/main/java/org/hps/crawler/EvioDatacatUtilities.java java/trunk/crawler/src/main/java/org/hps/crawler/EvioFileVisitor.java java/trunk/crawler/src/main/java/org/hps/crawler/RunProcessor.java Modified: java/trunk/crawler/pom.xml java/trunk/crawler/src/main/java/org/hps/crawler/Crawler.java java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerConfig.java java/trunk/run-database/src/main/java/org/hps/run/database/RunSummary.java java/trunk/run-database/src/main/java/org/hps/run/database/RunSummaryDaoImpl.java java/trunk/run-database/src/main/java/org/hps/run/database/RunSummaryImpl.java Modified: java/trunk/crawler/pom.xml ============================================================================= --- java/trunk/crawler/pom.xml (original) +++ java/trunk/crawler/pom.xml Wed Sep 23 07:49:16 2015 @@ -19,5 +19,10 @@ <groupId>org.hps</groupId> <artifactId>hps-run-database</artifactId> </dependency> + <dependency> + <groupId>org.freehep</groupId> + <artifactId>freehep-rootio</artifactId> + <version>2.2.1</version> + </dependency> </dependencies> </project> Modified: java/trunk/crawler/src/main/java/org/hps/crawler/Crawler.java ============================================================================= --- java/trunk/crawler/src/main/java/org/hps/crawler/Crawler.java (original) +++ java/trunk/crawler/src/main/java/org/hps/crawler/Crawler.java Wed Sep 23 07:49:16 2015 @@ -10,7 +10,6 @@ import java.util.Date; import java.util.EnumSet; import java.util.HashSet; -import java.util.List; import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; @@ -21,9 +20,12 @@ import org.apache.commons.cli.ParseException; import org.apache.commons.cli.PosixParser; import org.hps.conditions.database.ConnectionParameters; -import org.hps.record.evio.EvioFileMetadata; +import org.hps.datacat.client.DatacatClient; +import org.hps.datacat.client.DatacatClientFactory; +import org.hps.datacat.client.DatasetFileFormat; import org.hps.run.database.RunDatabaseDaoFactory; import org.hps.run.database.RunManager; +import org.hps.run.database.RunProcessor; import org.hps.run.database.RunSummary; import org.hps.run.database.RunSummaryDao; import org.hps.run.database.RunSummaryImpl; @@ -31,45 +33,75 @@ import org.lcsim.util.log.LogUtil; /** - * Search for EVIO files in a directory tree, group the files that are found by run, extract meta data from these files, - * and optionally update a run database with the information that was found. + * Crawls a directory tree for data files and performs tasks related to this information. + * <p> + * The crawler can find EVIO, LCIO, or ROOT files in a directory tree and then perform various tasks based on + * information extracted from them. * * @author Jeremy McCormick, SLAC */ public final class Crawler { /** + * Make a list of available features for printing help. + */ + private static String AVAILABLE_FEATURES; + + /** + * Make a list of available features for printing help. + */ + private static String AVAILABLE_FORMATS; + + /** * Setup the logger. */ - private static final Logger LOGGER = LogUtil.create(Crawler.class, new DefaultLogFormatter(), Level.ALL); + private static final Logger LOGGER = LogUtil.create(Crawler.class, new DefaultLogFormatter(), Level.CONFIG); /** * Constant for milliseconds conversion. */ private static final long MILLISECONDS = 1000L; - /** * Command line options for the crawler. */ private static final Options OPTIONS = new Options(); + + static { + final StringBuffer buffer = new StringBuffer(); + for (final CrawlerFeature feature : CrawlerFeature.values()) { + buffer.append(feature.name() + " "); + } + buffer.setLength(buffer.length() - 1); + AVAILABLE_FEATURES = buffer.toString(); + } + static { + final StringBuffer buffer = new StringBuffer(); + for (final DatasetFileFormat format : DatasetFileFormat.values()) { + buffer.append(format.name() + " "); + } + buffer.setLength(buffer.length() - 1); + AVAILABLE_FORMATS = buffer.toString(); + } /** * Statically define the command options. */ static { OPTIONS.addOption("b", "min-date", true, "min date for a file (example \"2015-03-26 11:28:59\")"); - OPTIONS.addOption("c", "datacat", true, "update the data catalog using the specified folder (off by default)"); + OPTIONS.addOption("c", "datacat", true, "use the specified datacat folder"); OPTIONS.addOption("C", "cache", false, "cache files from MSS (JLAB only and not for batch farm use!)"); + OPTIONS.addOption("e", "enable", true, "enable a feature: " + AVAILABLE_FEATURES); + OPTIONS.addOption("D", "default-features", false, "enable default features"); + OPTIONS.addOption("F", "default-formats", false, "enable default file filters"); + OPTIONS.addOption("f", "format", true, "add a file format for filtering: " + AVAILABLE_FORMATS); OPTIONS.addOption("p", "connection-properties", true, "database connection properties file (required)"); OPTIONS.addOption("d", "directory", true, "root directory to start crawling (default is current dir)"); OPTIONS.addOption("E", "evio-processor", true, "class name of an EvioEventProcessor to execute"); OPTIONS.addOption("h", "help", false, "print help and exit (overrides all other arguments)"); - OPTIONS.addOption("i", "insert", false, "insert information into the run database (not done by default)"); OPTIONS.addOption("L", "log-level", true, "set the log level (INFO, FINE, etc.)"); OPTIONS.addOption("r", "run", true, "add a run number to accept (others will be excluded)"); OPTIONS.addOption("t", "timestamp-file", true, "existing or new timestamp file name"); OPTIONS.addOption("w", "max-cache-wait", true, "time per run allowed for file caching in seconds"); - OPTIONS.addOption("u", "update", false, "allow replacement of existing data in the run db (off by default)"); OPTIONS.addOption("x", "max-depth", true, "max depth to crawl"); } @@ -87,29 +119,6 @@ } /** - * Process all the runs that were found. - * - * @param runs the run log containing the list of run summaries - * @throws Exception if there is an error processing one of the runs - */ - static RunProcessor processRun(final RunSummary runSummary) throws Exception { - - LOGGER.info("processing run" + runSummary.getRun()); - - // Create a processor to process all the EVIO events in the run. - LOGGER.info("creating run processor for " + runSummary.getRun()); - final RunProcessor runProcessor = new RunProcessor((RunSummaryImpl) runSummary); - - // Process all of the files from the run. - LOGGER.info("processing run " + runSummary.getRun()); - runProcessor.processRun(); - - LOGGER.getHandlers()[0].flush(); - - return runProcessor; - } - - /** * The class for managing the file caching using the 'jcache' command. */ private final JCacheManager cacheManager = new JCacheManager(); @@ -154,6 +163,30 @@ } /** + * Create a run processor from the current configuration. + * + * @return the run processor + */ + private RunProcessor createEvioRunProcessor(final RunSummaryImpl runSummary) { + + final RunProcessor runProcessor = new RunProcessor(runSummary); + + final Set<CrawlerFeature> features = config.getFeatures(); + + if (features.contains(CrawlerFeature.EPICS)) { + runProcessor.addEpicsProcessor(); + } + if (features.contains(CrawlerFeature.SCALERS)) { + runProcessor.addScalerProcessor(); + } + if (features.contains(CrawlerFeature.TRIGGER)) { + runProcessor.addTriggerTimeProcessor(); + } + + return runProcessor; + } + + /** * Parse command line options and create a new {@link Crawler} object from the configuration. * * @param args the command line arguments @@ -161,9 +194,9 @@ */ private Crawler parse(final String args[]) { - LOGGER.info("parsing command line options"); - - config = new CrawlerConfig(); + LOGGER.config("parsing command line options"); + + this.config = new CrawlerConfig(); try { final CommandLine cl = this.parser.parse(OPTIONS, args); @@ -176,11 +209,37 @@ // Log level. if (cl.hasOption("L")) { final Level level = Level.parse(cl.getOptionValue("L")); - LOGGER.info("setting log level to " + level); + LOGGER.config("setting log level to " + level); LOGGER.setLevel(level); } + // Enable default features. + if (cl.hasOption("D")) { + LOGGER.config("enabling default features"); + this.config.addDefaultFeatures(); + } + + // Enable default file formats. + if (cl.hasOption("F")) { + LOGGER.config("enabling default file formats"); + this.config.addDefaultFileFormats(); + } + + // Root directory for file crawling. + if (cl.hasOption("d")) { + final File rootDir = new File(cl.getOptionValue("d")); + if (!rootDir.exists()) { + throw new IllegalArgumentException("The directory does not exist."); + } + if (!rootDir.isDirectory()) { + throw new IllegalArgumentException("The specified path is not a directory."); + } + config.setRootDir(rootDir); + LOGGER.config("root dir set to " + config.rootDir()); + } + // Database connection properties file (this is not optional). + // FIXME: This only needs to be set for updating the run database. if (cl.hasOption("p")) { final String dbPropPath = cl.getOptionValue("p"); final File dbPropFile = new File(dbPropPath); @@ -194,19 +253,6 @@ } else { throw new RuntimeException( "The -p switch providing the database connection properties file is a required argument."); - } - - // Root directory for file crawling. - if (cl.hasOption("d")) { - final File rootDir = new File(cl.getOptionValue("d")); - if (!rootDir.exists()) { - throw new IllegalArgumentException("The directory does not exist."); - } - if (!rootDir.isDirectory()) { - throw new IllegalArgumentException("The specified path is not a directory."); - } - config.setRootDir(rootDir); - LOGGER.config("root dir for crawling set to " + config.rootDir()); } // Timestamp file for date filtering. @@ -247,12 +293,6 @@ config.setAcceptRuns(acceptRuns); } - // Enable updating of run database. - if (cl.hasOption("i")) { - config.setUpdateRunLog(true); - LOGGER.config("inserting into run database is enabled"); - } - // Enable file cache usage for running at JLAB. if (cl.hasOption("C")) { config.setUseFileCache(true); @@ -264,12 +304,6 @@ final Long waitTime = Long.parseLong(cl.getOptionValue("w")) * MILLISECONDS; config.setWaitTime(waitTime); LOGGER.config("max time for file caching set to " + config.waitTime()); - } - - // Allow deletion and replacement of records in run database. - if (cl.hasOption("u")) { - config.setAllowUpdates(true); - LOGGER.config("deletion and replacement of existing runs in the database is enabled"); } // User supplied timestamp string that is converted to a date for file filtering. @@ -315,13 +349,62 @@ if (datacatFolder == null) { throw new IllegalArgumentException("missing -c argument with data catalog folder"); } + + // Set datacat folder. LOGGER.config("using data catalog folder " + datacatFolder); config.setDatacatFolder(datacatFolder); - config.setUpdateDatacat(true); - } - + + // Assume datacat should be enabled if folder name was given. + config.getFeatures().add(CrawlerFeature.DATACAT); + LOGGER.config(CrawlerFeature.DATACAT + " is enabled"); + } + + // Configure enabled features. + if (cl.hasOption("e")) { + for (final String arg : cl.getOptionValues("e")) { + CrawlerFeature feature = null; + try { + feature = CrawlerFeature.valueOf(arg); + } catch (IllegalArgumentException | NullPointerException e) { + throw new IllegalArgumentException("The feature " + arg + " is not valid.", e); + } + this.config.addFeature(feature); + } + } + + // Configure enabled file formats. + if (cl.hasOption("f")) { + for (final String arg : cl.getOptionValues("f")) { + DatasetFileFormat format = null; + try { + format = DatasetFileFormat.valueOf(arg); + } catch (IllegalArgumentException | NullPointerException e) { + throw new IllegalArgumentException("The feature " + arg + " is not valid.", e); + } + LOGGER.config("adding format " + format.name()); + this.config.addFileFormat(format); + } + } } catch (final ParseException e) { throw new RuntimeException("Error parsing options.", e); + } + + // Check that there is at least one file format enabled for filtering. + if (this.config.getFileFormats().isEmpty()) { + throw new IllegalStateException( + "There are no file formats enabled. Enable defaults using -F or add a format using the -f switch."); + } + + // Print a message if no features are enabled; this is not a fatal error but the job won't update anything. + if (this.config.getFeatures().isEmpty()) { + LOGGER.warning("no features are enabled"); + } + + // Check that EVIO file filter is active if run database is being updated. + // Don't add by default because the user may have made a mistake in the options they provided. + if (this.config.getFeatures().contains(CrawlerFeature.RUNDB_INSERT) + && !this.config.getFileFormats().contains(DatasetFileFormat.EVIO)) { + throw new IllegalStateException("Run database is enabled without EVIO file filter active."); } // Configure the max wait time for file caching operations. @@ -340,11 +423,35 @@ */ private void printUsage() { final HelpFormatter help = new HelpFormatter(); - help.printHelp("EvioFileCrawler", "", OPTIONS, ""); + // FIXME: include more info here and improve the way this looks (line width should be increased) + help.printHelp("Crawler [options]", "", OPTIONS, ""); System.exit(0); } /** + * Process a run using its run summary. + * + * @param runs the run log containing the list of run summaries + * @throws Exception if there is an error processing one of the runs + */ + private RunProcessor processRun(final RunSummary runSummary) throws Exception { + + LOGGER.info("processing run" + runSummary.getRun()); + + // Create a processor to process all the EVIO events in the run. + LOGGER.info("creating run processor for " + runSummary.getRun()); + final RunProcessor runProcessor = this.createEvioRunProcessor((RunSummaryImpl) runSummary); + + // Process all of the files from the run. + LOGGER.info("processing run " + runSummary.getRun()); + runProcessor.processRun(); + + LOGGER.getHandlers()[0].flush(); + + return runProcessor; + } + + /** * Run the full crawler job. * * @throws Exception if there is some error during the job @@ -354,7 +461,18 @@ LOGGER.info("starting Crawler job"); // Create the file visitor for crawling the root directory with the given date filter. - final EvioFileVisitor visitor = new EvioFileVisitor(config.timestamp()); + final CrawlerFileVisitor visitor = new CrawlerFileVisitor(); + + if (config.timestamp() != null) { + // Add date filter if timestamp is supplied. + visitor.addFilter(new DateFileFilter(config.timestamp())); + } + + // Add file format filter. + for (final DatasetFileFormat fileFormat : config.getFileFormats()) { + LOGGER.info("adding file format filter for " + fileFormat.name()); + } + visitor.addFilter(new FileFormatFilter(config.getFileFormats())); // Walk the file tree using the visitor. this.walk(visitor); @@ -362,33 +480,45 @@ // Get the list of run data created by the visitor. final RunSummaryMap runMap = visitor.getRunMap(); - // Process all runs that were found. - for (RunSummary runSummary : runMap.getRunSummaries()) { - + LOGGER.info("found " + runMap.size() + " runs from crawl job"); + + // Process all runs that were found. + for (final RunSummary runSummary : runMap.getRunSummaries()) { + if (runSummary == null) { throw new IllegalArgumentException("The run summary is null for some weird reason."); } - - LOGGER.info("starting full processing of run " + runSummary.getRun()); - + + LOGGER.info("starting processing of run " + runSummary.getRun()); + // Cache files from MSS. this.cacheFiles(runSummary); - // Process the run's files. - RunProcessor runProcessor = processRun(runSummary); - - // Execute the run database update. - this.updateRunDatabase(runSummary); + // Process the run's EVIO files. + if (!runSummary.getFiles(DatasetFileFormat.EVIO).isEmpty()) { + final RunProcessor runProcessor = this.processRun(runSummary); + } + + if (config.getFeatures().contains(CrawlerFeature.RUNDB_INSERT)) { + // Execute the run database update. + this.updateRunDatabase(runSummary); + } else { + LOGGER.info("updating run database is not enabled"); + } // Update the data catalog. - this.updateDatacat(runProcessor.getEvioFileMetaData()); - + if (this.config.getFeatures().contains(CrawlerFeature.DATACAT)) { + this.updateDatacat(runSummary); + } + LOGGER.info("completed full processing of run " + runSummary); - } + } // Update the timestamp output file. this.updateTimestamp(); + LOGGER.getHandlers()[0].flush(); + LOGGER.info("Crawler job is done!"); } @@ -397,12 +527,20 @@ * * @param runMap the map of run information including the EVIO file list */ - private void updateDatacat(List<EvioFileMetadata> metadataList) { - if (this.config.updateDatacat()) { - EvioDatacatUtilities.addEvioFiles(metadataList, config.datacatFolder()); - LOGGER.info("done updating data catalog"); - } else { - LOGGER.info("updating data catalog is disabled"); + private void updateDatacat(final RunSummary runSummary) { + final DatacatClient datacatClient = new DatacatClientFactory().createClient(); + for (final DatasetFileFormat fileFormat : config.getFileFormats()) { + LOGGER.info("adding files to datacat with format " + fileFormat.name()); + for (final File file : runSummary.getFiles(fileFormat)) { + + LOGGER.info("adding file " + file.getPath() + " to datacat"); + + // Get folder for file by stripping out root directory. + final String folder = DatacatUtilities.getFolder(config.rootDir().getPath(), file); + + // Register file in the catalog. + // DatacatUtilities.addFile(datacatClient, folder, file); + } } } @@ -413,43 +551,38 @@ * @throws SQLException if there is a database query error */ private void updateRunDatabase(final RunSummary runSummary) throws SQLException { - // Insert the run information into the database. - if (config.updateRunDatabase()) { - - LOGGER.info("updating run database for run " + runSummary.getRun()); - - // Open a DB connection. - final Connection connection = config.connectionParameters().createConnection(); - - // Create factory for interfacing to run database. - RunManager runManager = new RunManager(); - runManager.setConnection(connection); - final RunDatabaseDaoFactory dbFactory = runManager.createDaoFactory(); - - // Create object for updating run info in the database. - final RunSummaryDao runSummaryDao = dbFactory.createRunSummaryDao(); - - // Delete existing run summary if necessary. - if (runSummaryDao.runSummaryExists(runSummary.getRun())) { - if (this.config.allowUpdates()) { - LOGGER.info("deleting existing information for run " + runSummary.getRun()); - runSummaryDao.deleteFullRunSummary(runSummary); - } else { - throw new RuntimeException("Run " + runSummary.getRun() + " exists in database and deletion is not enabled."); - } - } - - // Insert run summary into database. - runSummaryDao.insertFullRunSummary(runSummary); - - // Close the DB connection. - connection.close(); - - LOGGER.info("done updating run database"); - - } else { - LOGGER.info("updating run database is disabled"); - } + + LOGGER.info("updating run database for run " + runSummary.getRun()); + + // Open a DB connection. + final Connection connection = config.connectionParameters().createConnection(); + + // Create factory for interfacing to run database. + final RunManager runManager = new RunManager(); + runManager.setConnection(connection); + final RunDatabaseDaoFactory dbFactory = runManager.createDaoFactory(); + + // Create object for updating run info in the database. + final RunSummaryDao runSummaryDao = dbFactory.createRunSummaryDao(); + + // Delete existing run summary if necessary. + if (runSummaryDao.runSummaryExists(runSummary.getRun())) { + if (this.config.features.contains(CrawlerFeature.RUNDB_UPDATE)) { + LOGGER.info("deleting existing information for run " + runSummary.getRun()); + runSummaryDao.deleteFullRunSummary(runSummary); + } else { + throw new RuntimeException("Run " + runSummary.getRun() + + " exists in database and deletion is not enabled."); + } + } + + // Insert run summary into database. + runSummaryDao.insertFullRunSummary(runSummary); + + // Close the DB connection. + connection.close(); + + LOGGER.info("done updating run database"); LOGGER.getHandlers()[0].flush(); } @@ -481,7 +614,7 @@ * * @param visitor the file visitor */ - private void walk(final EvioFileVisitor visitor) { + private void walk(final CrawlerFileVisitor visitor) { if (config.timestamp() != null) { // Date filter from timestamp. visitor.addFilter(new DateFileFilter(config.timestamp())); @@ -494,7 +627,7 @@ visitor.addFilter(new RunFilter(config.acceptRuns())); LOGGER.config("added run number filter"); } else { - LOGGER.config("no run number filter used"); + LOGGER.config("no run number filter will be used"); } try { @@ -505,5 +638,4 @@ throw new RuntimeException("Error while walking the directory tree.", e); } } - } Modified: java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerConfig.java ============================================================================= --- java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerConfig.java (original) +++ java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerConfig.java Wed Sep 23 07:49:16 2015 @@ -4,12 +4,16 @@ import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; +import java.util.Arrays; import java.util.Date; +import java.util.HashSet; import java.util.LinkedHashSet; import java.util.List; import java.util.Set; import org.hps.conditions.database.ConnectionParameters; +import org.hps.datacat.client.DatasetFileFormat; +import org.hps.datacat.client.DatasetSite; import org.hps.record.evio.EvioEventProcessor; /** @@ -50,6 +54,21 @@ private String datacatFolder = null; /** + * Set whether extraction of metadata from files is enabled. + */ + private boolean enableMetadata; + + /** + * Set of features enabled in this configuration. + */ + Set<CrawlerFeature> features = new HashSet<CrawlerFeature>(); + + /** + * Set of file formats for filtering files. + */ + Set<DatasetFileFormat> formats = new HashSet<DatasetFileFormat>(); + + /** * The maximum depth to crawl. */ private Integer maxDepth = Integer.MAX_VALUE; @@ -68,6 +87,11 @@ * The root directory to search for files, which defaults to the current directory. */ private File rootDir = new File(System.getProperty("user.dir")); + + /** + * The dataset site for the datacat. + */ + private DatasetSite site; /** * A timestamp to use for filtering input files on their creation date. @@ -106,6 +130,44 @@ */ Set<Integer> acceptRuns() { return acceptRuns; + } + + /** + * Add the default set of features. + */ + CrawlerConfig addDefaultFeatures() { + final List<CrawlerFeature> defaultFeatures = Arrays.asList(CrawlerFeature.values()); + this.features.addAll(defaultFeatures); + return this; + } + + /** + * Add the default file formats. + */ + CrawlerConfig addDefaultFileFormats() { + final List<DatasetFileFormat> defaultFormats = Arrays.asList(DatasetFileFormat.values()); + this.formats.addAll(defaultFormats); + return this; + } + + /** + * Add a feature to enable it. + * + * @return this object + */ + CrawlerConfig addFeature(final CrawlerFeature feature) { + this.features.add(feature); + return this; + } + + /** + * Add a file format for filtering. + * + * @param format the file format + */ + CrawlerConfig addFileFormat(final DatasetFileFormat format) { + this.formats.add(format); + return this; } /** @@ -162,6 +224,42 @@ } /** + * Get the dataset site. + * + * @return the dataset site + */ + DatasetSite datasetSite() { + return this.site; + } + + /** + * Return <code>true</code> if metadata extraction from files is enabled. + * + * @return <code>true</code> if metadata extraction is enabled + */ + boolean enableMetaData() { + return this.enableMetadata; + } + + /** + * Get the set of enabled features. + * + * @return the set of enabled features + */ + Set<CrawlerFeature> getFeatures() { + return this.features; + } + + /** + * Get the file formats for filtering. + * + * @return the file formats for filtering + */ + Set<DatasetFileFormat> getFileFormats() { + return this.formats; + } + + /** * Get the max depth in the directory tree to crawl. * * @return the max depth @@ -191,6 +289,16 @@ } /** + * Remove a feature to disable it. + * + * @return this object + */ + CrawlerConfig removeFeature(final CrawlerFeature feature) { + this.features.remove(feature); + return this; + } + + /** * Get the root directory for the file search. * * @return the root directory for the file search @@ -243,6 +351,26 @@ } /** + * Set the dataset site. + * + * @return this object + */ + void setDatasetSite(final DatasetSite site) { + this.site = site; + } + + /** + * Set whether metadata extraction is enabled. + * + * @param enableMetadata <code>true</code> to enable metadata + * @return this object + */ + CrawlerConfig setEnableMetadata(final boolean enableMetadata) { + this.enableMetadata = enableMetadata; + return this; + } + + /** * Set the max depth. * * @param maxDepth the max depth Added: java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerFeature.java ============================================================================= --- java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerFeature.java (added) +++ java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerFeature.java Wed Sep 23 07:49:16 2015 @@ -0,0 +1,32 @@ +package org.hps.crawler; + +/** + * Enum for enabling or disabling features in the file crawler. + */ +enum CrawlerFeature { + /** + * Allow inserts into run database. + */ + RUNDB_INSERT, + /** + * Allow updating the run database if the run exists already. + */ + RUNDB_UPDATE, + /** + * Create list of EPICS data for inserting into run database. + */ + EPICS, + /** + * Create list of scaler data for inserting into run database. + */ + SCALERS, + /** + * Extract trigger config for inserting into run database. + */ + TRIGGER, + /** + * Populate the data catalog with files that are found when crawling. + */ + DATACAT +} + Added: java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerFileUtilities.java ============================================================================= --- java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerFileUtilities.java (added) +++ java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerFileUtilities.java Wed Sep 23 07:49:16 2015 @@ -0,0 +1,15 @@ +package org.hps.crawler; + +import java.io.File; + +public class CrawlerFileUtilities { + + static boolean isHpsFile(File file) { + return file.getName().startsWith("hps"); + } + + static int getRunFromFileName(File file) { + String name = file.getName(); + return Integer.parseInt(name.substring(4, 8)); + } +} Copied: java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerFileVisitor.java (from r3655, java/trunk/crawler/src/main/java/org/hps/crawler/EvioFileVisitor.java) ============================================================================= --- java/trunk/crawler/src/main/java/org/hps/crawler/EvioFileVisitor.java (original) +++ java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerFileVisitor.java Wed Sep 23 07:49:16 2015 @@ -7,27 +7,28 @@ import java.nio.file.SimpleFileVisitor; import java.nio.file.attribute.BasicFileAttributes; import java.util.ArrayList; -import java.util.Date; import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; -import org.hps.record.evio.EvioFileFilter; -import org.hps.record.evio.EvioFileUtilities; +import org.hps.datacat.client.DatasetFileFormat; import org.lcsim.util.log.DefaultLogFormatter; import org.lcsim.util.log.LogUtil; /** * A file visitor that crawls directories for EVIO files and returns the information as a {@link RunSummaryMap}. + * <p> + * The {@link #addFilter(FileFilter)} method can be used to add a file filter. Paths must pass all filters to + * be accepted. * * @author Jeremy McCormick, SLAC */ -final class EvioFileVisitor extends SimpleFileVisitor<Path> { +final class CrawlerFileVisitor extends SimpleFileVisitor<Path> { /** * Setup logger. */ - private static final Logger LOGGER = LogUtil.create(EvioFileVisitor.class, new DefaultLogFormatter(), Level.FINE); + private static final Logger LOGGER = LogUtil.create(CrawlerFileVisitor.class, new DefaultLogFormatter(), Level.FINE); /** * A list of file filters to apply. @@ -37,6 +38,7 @@ /** * The run log containing information about files from each run. */ + // FIXME: This should be replaced by a map of run summary to file set. private final RunSummaryMap runs = new RunSummaryMap(); /** @@ -44,12 +46,7 @@ * * @param timestamp the timestamp which is used for date filtering */ - EvioFileVisitor(final Date timestamp) { - this.addFilter(new EvioFileFilter()); - if (timestamp != null) { - // Add date filter if timestamp is supplied. - this.addFilter(new DateFileFilter(timestamp)); - } + CrawlerFileVisitor() { } /** @@ -97,23 +94,25 @@ */ @Override public FileVisitResult visitFile(final Path path, final BasicFileAttributes attrs) { + final File file = path.toFile(); - if (this.accept(file)) { + + if (this.accept(file)) { // Get the run number from the file name. - final Integer run = EvioFileUtilities.getRunFromName(file); + final Integer run = CrawlerFileUtilities.getRunFromFileName(file); - // Get the sequence number from the file name. - final Integer seq = EvioFileUtilities.getSequenceFromName(file); + // Get the file format. + DatasetFileFormat format = DatacatUtilities.getFileFormat(file); - LOGGER.info("accepted file " + file.getPath() + " with run " + run + " and seq " + seq); - - // Add this file to the file list for the run. - this.runs.getRunSummary(run).addFile(file); + LOGGER.info("accepted file " + file.getPath() + " with run " + run); + + // Add file to run summary. + this.runs.getRunSummary(run).addFile(format, file); } else { // File was rejected by one of the filters. - LOGGER.finer("rejected file " + file.getPath()); + LOGGER.info("file " + file.getPath() + " was rejected"); } // Always continue crawling. return FileVisitResult.CONTINUE; Added: java/trunk/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java ============================================================================= --- java/trunk/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java (added) +++ java/trunk/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java Wed Sep 23 07:49:16 2015 @@ -0,0 +1,414 @@ +package org.hps.crawler; + +import java.io.File; +import java.io.FileFilter; +import java.io.IOException; +import java.nio.file.FileVisitOption; +import java.nio.file.FileVisitResult; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.SimpleFileVisitor; +import java.nio.file.attribute.BasicFileAttributes; +import java.util.ArrayList; +import java.util.Date; +import java.util.EnumSet; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.logging.Level; +import java.util.logging.Logger; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; +import org.apache.commons.cli.PosixParser; +import org.hps.datacat.client.DatacatClient; +import org.hps.datacat.client.DatacatClientFactory; +import org.hps.datacat.client.DatasetFileFormat; +import org.lcsim.util.log.DefaultLogFormatter; +import org.lcsim.util.log.LogUtil; + +/** + * Command line file crawler for populating the data catalog. + * + * @author Jeremy McCormick, SLAC + */ +public class DatacatCrawler { + + /** + * Visitor which creates a {@link FileSet} from walking a directory tree. + * <p> + * Any number of {@link java.io.FileFilter} objects can be registered with this visitor to restrict which files are + * accepted. + * + * @author Jeremy McCormick, SLAC + */ + final class DatacatFileVisitor extends SimpleFileVisitor<Path> { + + /** + * The run log containing information about files from each run. + */ + private final FileSet fileSet = new FileSet(); + + /** + * A list of file filters to apply. + */ + private final List<FileFilter> filters = new ArrayList<FileFilter>(); + + /** + * Run the filters on the file to tell whether it should be accepted or not. + * + * @param file the EVIO file + * @return <code>true</code> if file should be accepted + */ + private boolean accept(final File file) { + boolean accept = true; + for (final FileFilter filter : this.filters) { + accept = filter.accept(file); + if (!accept) { + break; + } + } + return accept; + } + + /** + * Add a file filter. + * + * @param filter the file filter + */ + void addFilter(final FileFilter filter) { + this.filters.add(filter); + } + + /** + * Get the file set created by visiting the directory tree. + * + * @return the file set from visiting the directory tree + */ + FileSet getFileSet() { + return this.fileSet; + } + + /** + * Visit a single file. + * + * @param path the file to visit + * @param attrs the file attributes + */ + @Override + public FileVisitResult visitFile(final Path path, final BasicFileAttributes attrs) { + final File file = path.toFile(); + if (this.accept(file)) { + final DatasetFileFormat format = DatacatUtilities.getFileFormat(file); + fileSet.addFile(format, file); + } + return FileVisitResult.CONTINUE; + } + } + + /** + * Make a list of available file formats for printing help. + */ + private static String AVAILABLE_FORMATS; + + /** + * Setup the logger. + */ + private static final Logger LOGGER = LogUtil.create(Crawler.class, new DefaultLogFormatter(), Level.CONFIG); + + /** + * Command line options for the crawler. + */ + private static final Options OPTIONS = new Options(); + static { + final StringBuffer buffer = new StringBuffer(); + for (final DatasetFileFormat format : DatasetFileFormat.values()) { + buffer.append(format.name() + " "); + } + buffer.setLength(buffer.length() - 1); + AVAILABLE_FORMATS = buffer.toString(); + } + + /** + * Statically define the command options. + */ + static { + OPTIONS.addOption("L", "log-level", true, "set the log level (INFO, FINE, etc.)"); + OPTIONS.addOption("b", "min-date", true, "min date for a file (example \"2015-03-26 11:28:59\")"); + OPTIONS.addOption("d", "directory", true, "root directory to crawl"); + OPTIONS.addOption("f", "folder", true, "datacat folder"); + OPTIONS.addOption("h", "help", false, "print help and exit (overrides all other arguments)"); + OPTIONS.addOption("o", "format", true, "add a file format for filtering: " + AVAILABLE_FORMATS); + OPTIONS.addOption("m", "metadata", false, "create metadata for datasets"); + OPTIONS.addOption("s", "site", true, "datacat site"); + OPTIONS.addOption("t", "timestamp-file", true, "existing or new timestamp file name"); + OPTIONS.addOption("x", "max-depth", true, "max depth to crawl"); + } + + /** + * Main method. + * + * @param args the command line arguments + */ + public static void main(final String[] args) { + new DatacatCrawler().parse(args).run(); + } + + /** + * The crawler configuration. + */ + private CrawlerConfig config; + + /** + * The options parser. + */ + private final PosixParser parser = new PosixParser(); + + /** + * Throw an exception if the path doesn't exist in the data catalog or it is not a folder. + * + * @param folder the folder in the datacat + * @throws RuntimeException if the given path does not exist or it is not a folder + */ + void checkFolder(final String folder) { + final DatacatClient datacatClient = new DatacatClientFactory().createClient(); + if (!datacatClient.exists(folder)) { + throw new RuntimeException("The folder " + folder + " does not exist in the data catalog."); + } + if (!datacatClient.isFolder(folder)) { + throw new RuntimeException("The path " + folder + " is not a folder."); + } + } + + /** + * Parse command line options. + * + * @param args the command line arguments + * @return this object (for method chaining) + */ + public DatacatCrawler parse(final String[] args) { + config = new CrawlerConfig(); + + LOGGER.config("parsing command line options"); + + this.config = new CrawlerConfig(); + + try { + final CommandLine cl = this.parser.parse(OPTIONS, args); + + // Print help. + if (cl.hasOption("h") || args.length == 0) { + this.printUsage(); + } + + // Log level. + if (cl.hasOption("L")) { + final Level level = Level.parse(cl.getOptionValue("L")); + LOGGER.config("setting log level to " + level); + LOGGER.setLevel(level); + } + + // Root directory for file crawling. + if (cl.hasOption("d")) { + final File rootDir = new File(cl.getOptionValue("d")); + if (!rootDir.exists()) { + throw new IllegalArgumentException("The directory does not exist."); + } + if (!rootDir.isDirectory()) { + throw new IllegalArgumentException("The specified path is not a directory."); + } + config.setRootDir(rootDir); + LOGGER.config("root dir set to " + config.rootDir()); + } + + // Timestamp file for date filtering. + if (cl.hasOption("t")) { + final File timestampFile = new File(cl.getOptionValue("t")); + config.setTimestampFile(timestampFile); + if (!timestampFile.exists()) { + try { + // Create new time stamp file which will have its date updated at the end of the job. + LOGGER.config("creating new timestamp file " + timestampFile.getPath()); + timestampFile.createNewFile(); + } catch (final IOException e) { + throw new IllegalArgumentException("Error creating timestamp file: " + timestampFile.getPath()); + } + } else { + try { + // Get the date filter for files from an existing time stamp file provided by the user. + final Date timestamp = new Date(Files + .readAttributes(config.timestampFile().toPath(), BasicFileAttributes.class) + .lastModifiedTime().toMillis()); + config.setTimestamp(timestamp); + LOGGER.config("got timestamp " + timestamp + " from existing file " + + config.timestampFile().getPath()); + } catch (final IOException e) { + throw new RuntimeException("Error getting attributes of timestamp file.", e); + } + } + } + + // User supplied timestamp string that is converted to a date for file filtering. + if (cl.hasOption("b")) { + try { + if (config.timestamp() != null) { + LOGGER.warning("existing timestamp from file " + config.timestamp() + + " will be overridden by date from -b argument"); + } + config.setTimestamp(cl.getOptionValue("b")); + LOGGER.config("set timestamp to " + config.timestamp() + " from -b argument"); + } catch (final java.text.ParseException e) { + throw new RuntimeException(e); + } + } + + // Max depth to crawl. + if (cl.hasOption("x")) { + final Integer maxDepth = Integer.parseInt(cl.getOptionValue("x")); + if (maxDepth < 1) { + throw new IllegalArgumentException("invalid -x argument for maxDepth: " + maxDepth); + } + config.setMaxDepth(maxDepth); + LOGGER.config("set max depth to " + maxDepth); + } + + // Configure enabled file formats. + if (cl.hasOption("o")) { + for (final String arg : cl.getOptionValues("o")) { + DatasetFileFormat format = null; + try { + format = DatasetFileFormat.valueOf(arg); + } catch (IllegalArgumentException | NullPointerException e) { + throw new IllegalArgumentException("The format " + arg + " is not valid.", e); + } + LOGGER.config("adding format " + format.name()); + this.config.addFileFormat(format); + } + } else { + throw new RuntimeException("The -o argument with data format must be supplied at least once."); + } + + // Enable metadata extraction from files. + if (cl.hasOption("m")) { + config.setEnableMetadata(true); + LOGGER.config("metadata extraction enabled"); + } + + // Datacat folder. + if (cl.hasOption("f")) { + config.setDatacatFolder(cl.getOptionValue("f")); + LOGGER.config("set datacat folder to " + config.datacatFolder()); + } else { + throw new RuntimeException("The -f argument with the datacat folder is required."); + } + + } catch (final ParseException e) { + throw new RuntimeException("Error parsing options.", e); + } + + // Check the datacat folder which must already exist. + this.checkFolder(config.datacatFolder()); + + // Check that there is at least one file format enabled for filtering. + if (this.config.getFileFormats().isEmpty()) { + throw new IllegalStateException("At least one file format must be provided with the -f switch."); + } + + LOGGER.info("done parsing command line options"); + LOGGER.getHandlers()[0].flush(); + + return this; + } + + /** + * Print the usage statement for this tool to the console and then exit the program. + */ + private void printUsage() { + final HelpFormatter help = new HelpFormatter(); + help.printHelp(70, "DatacatCrawler [options]", "", OPTIONS, ""); + System.exit(0); + } + + /** + * Run the crawler job. + */ + void run() { + + // Create the file visitor for crawling the root directory with the given date filter. + final DatacatFileVisitor visitor = new DatacatFileVisitor(); + + // Add date filter if timestamp is supplied. + if (config.timestamp() != null) { + visitor.addFilter(new DateFileFilter(config.timestamp())); + } + + // Add file format filter. + for (final DatasetFileFormat fileFormat : config.getFileFormats()) { + LOGGER.info("adding file format filter for " + fileFormat.name()); + } + visitor.addFilter(new FileFormatFilter(config.getFileFormats())); + + // Walk the file tree using the visitor. + this.walk(visitor); + + // Update the data catalog. + this.updateDatacat(visitor.getFileSet()); + } + + /** + * Update the data catalog. + * + * @param runMap the map of run information including the EVIO file list + */ + private void updateDatacat(final FileSet fileSet) { + final DatacatClient datacatClient = new DatacatClientFactory().createClient(); + for (final DatasetFileFormat fileFormat : config.getFileFormats()) { + LOGGER.info("adding files to datacat with format " + fileFormat.name()); + for (final File file : fileSet.get(fileFormat)) { + + LOGGER.info("adding file " + file.getAbsolutePath() + " to datacat"); + + // Create metadata if this is enabled (takes awhile). + Map<String, Object> metadata = new HashMap<String, Object>(); + if (config.enableMetaData()) { + metadata = DatacatUtilities.createMetadata(file); + } + + // Register file in the catalog. + DatacatUtilities.addFile(datacatClient, config.datacatFolder(), file, metadata); + } + } + } + + /** + * Walk the directory tree to find EVIO files for the runs that are being processed in the job. + * + * @param visitor the file visitor + */ + private void walk(final DatacatFileVisitor visitor) { + if (config.timestamp() != null) { + // Date filter from timestamp. + visitor.addFilter(new DateFileFilter(config.timestamp())); + LOGGER.config("added date filter with time stamp " + config.timestamp()); + } + + // Is the accept run list not empty? (Empty means accept all runs.) + if (!config.acceptRuns().isEmpty()) { + // List of run numbers to accept. + visitor.addFilter(new RunFilter(config.acceptRuns())); + LOGGER.config("added run number filter"); + } else { + LOGGER.config("no run number filter will be used"); + } + + try { + // Walk the file tree from the root directory. + final EnumSet<FileVisitOption> options = EnumSet.noneOf(FileVisitOption.class); + Files.walkFileTree(config.rootDir().toPath(), options, config.maxDepth(), visitor); + } catch (final IOException e) { + throw new RuntimeException("Error while walking the directory tree.", e); + } + } +} Added: java/trunk/crawler/src/main/java/org/hps/crawler/DatacatUtilities.java ============================================================================= --- java/trunk/crawler/src/main/java/org/hps/crawler/DatacatUtilities.java (added) +++ java/trunk/crawler/src/main/java/org/hps/crawler/DatacatUtilities.java Wed Sep 23 07:49:16 2015 @@ -0,0 +1,134 @@ +package org.hps.crawler; + +import java.io.File; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +import org.hps.datacat.client.DatacatClient; +import org.hps.datacat.client.DatasetDataType; +import org.hps.datacat.client.DatasetFileFormat; +import org.hps.datacat.client.DatasetSite; + +/** + * Datacat utilities for the crawler. + * + * @author Jeremy McCormick, SLAC + */ +class DatacatUtilities { + + static Map<String, DatasetFileFormat> formatMap = new HashMap<String, DatasetFileFormat>(); + static { + for (final DatasetFileFormat format : DatasetFileFormat.values()) { + formatMap.put(format.extension(), format); + } + } + + static void addFile(final DatacatClient datacatClient, final String folder, final File file, + final Map<String, Object> metadata) { + final DatasetFileFormat fileFormat = DatacatUtilities.getFileFormat(file); + final DatasetDataType dataType = DatacatUtilities.getDataType(file); + DatacatUtilities.addFile(datacatClient, folder, file, metadata, fileFormat, dataType, DatasetSite.SLAC); + } + + /** + * Add a file to the data catalog. + * + * @param client the data catalog client + * @param folder the folder name e.g. "data/raw" + * @param fileMetadata the file's meta data including the path + * @param fileFormat the file's format (EVIO, LCIO etc.) + * @param dataType the file's data type (RAW, RECON, etc.) + * @return the HTTP response code + */ + static int addFile(final DatacatClient client, final String folder, final File file, + final Map<String, Object> metadata, final DatasetFileFormat fileFormat, final DatasetDataType dataType, + final DatasetSite site) { + + // Add the dataset to the data catalog using the REST API. + final int response = client.addDataset(folder, dataType, file.getAbsolutePath(), site, fileFormat, + file.getName(), metadata); + + return response; + } + + static Map<String, Object> createMetadata(final File file) { + final DatasetFileFormat fileFormat = DatacatUtilities.getFileFormat(file); + final DatasetDataType dataType = DatacatUtilities.getDataType(file); + final FileMetadataReader reader = DatacatUtilities.getFileMetaDataReader(fileFormat, dataType); + if (reader == null) { + throw new RuntimeException("No metadata reader found for format " + fileFormat.name() + " and type " + dataType.name() + "."); + } + Map<String, Object> metadata; + try { + metadata = reader.getMetadata(file); + } catch (final IOException e) { + throw new RuntimeException(e); + } + return metadata; + } + + static DatasetDataType getDataType(final File file) { + final DatasetFileFormat fileFormat = getFileFormat(file); + DatasetDataType dataType = null; + if (fileFormat == null) { + throw new IllegalArgumentException("File has unknown format: " + file.getAbsolutePath()); + } + if (fileFormat.equals(DatasetFileFormat.EVIO)) { + dataType = DatasetDataType.RAW; + } else if (fileFormat.equals(DatasetFileFormat.LCIO)) { + dataType = DatasetDataType.RECON; + } else if (fileFormat.equals(DatasetFileFormat.ROOT)) { + // FIXME: This should probably open the file and determine what it contains. + if (file.getName().contains("_dqm")) { + dataType = DatasetDataType.DQM; + } else if (file.getName().contains("_dst")) { + dataType = DatasetDataType.DST; + } + } else if (fileFormat.equals(DatasetFileFormat.AIDA)) { + dataType = DatasetDataType.DQM; + } + if (dataType == null) { + throw new IllegalArgumentException("Could not determine data type for format: " + fileFormat.name()); + } + return dataType; + } + + static DatasetFileFormat getFileFormat(final File pathname) { + String name = pathname.getName(); + if (name.contains(DatasetFileFormat.EVIO.extension()) && !name.endsWith(DatasetFileFormat.EVIO.extension())) { + name = stripEvioFileNumber(name); + } + final String extension = name.substring(name.lastIndexOf(".") + 1); + return formatMap.get(extension); + } + + static FileMetadataReader getFileMetaDataReader(final DatasetFileFormat fileFormat, final DatasetDataType dataType) { + FileMetadataReader reader = null; + if (fileFormat.equals(DatasetFileFormat.LCIO)) { + reader = new LcioMetadataReader(); + } else if (fileFormat.equals(DatasetFileFormat.EVIO)) { + reader = new EvioMetadataReader(); + } else if (fileFormat.equals(DatasetFileFormat.ROOT) && dataType.equals(DatasetDataType.DST)) { + reader = new RootDstMetadataReader(); + } + return reader; + } + + static String getFolder(final String rootDir, final File file) { + String stripDir = rootDir; + if (!stripDir.endsWith("/")) { + stripDir += "/"; + } + final String folder = file.getParentFile().getPath().replace(stripDir, ""); + return folder; + } + + static String stripEvioFileNumber(final String name) { + String strippedName = name; + if (!name.endsWith(DatasetFileFormat.EVIO.extension())) { + strippedName = name.substring(0, name.lastIndexOf(".")); + } + return strippedName; + } +} Added: java/trunk/crawler/src/main/java/org/hps/crawler/EvioMetadataReader.java ============================================================================= --- java/trunk/crawler/src/main/java/org/hps/crawler/EvioMetadataReader.java (added) +++ java/trunk/crawler/src/main/java/org/hps/crawler/EvioMetadataReader.java Wed Sep 23 07:49:16 2015 @@ -0,0 +1,148 @@ +package org.hps.crawler; + +import java.io.File; +import java.io.IOException; +import java.util.Date; +import java.util.HashMap; +import java.util.Map; +import java.util.logging.Level; +import java.util.logging.Logger; + +import org.hps.record.evio.EventTagConstant; +import org.hps.record.evio.EvioEventUtilities; +import org.hps.record.evio.EvioFileUtilities; +import org.jlab.coda.jevio.EvioEvent; +import org.jlab.coda.jevio.EvioException; +import org.jlab.coda.jevio.EvioReader; +import org.lcsim.util.log.DefaultLogFormatter; +import org.lcsim.util.log.LogUtil; + +/** + * Reads metadata from EVIO files. + * + * @author Jeremy McCormick, SLAC + */ +public class EvioMetadataReader implements FileMetadataReader { + + private static Logger LOGGER = LogUtil.create(EvioMetadataReader.class, new DefaultLogFormatter(), Level.ALL); + + /** + * Get the EVIO file metadata. + * + * @param file the EVIO file + * @return the metadata map of key and value pairs + */ + @Override + public Map<String, Object> getMetadata(File file) throws IOException { + + Date startDate = null; + Date endDate = null; + int badEventCount = 0; + int eventCount = 0; + int byteCount = 0; + boolean hasPrestart = false; + boolean hasEnd = false; + int[] eventIdData = null; + Integer run = null; + Integer endEvent = null; + Integer startEvent = null; + Long lastTimestamp = null; + + EvioReader evioReader = null; + try { + evioReader = EvioFileUtilities.open(file, false); + } catch (EvioException e) { + throw new IOException(e); + } + + int fileNumber = EvioFileUtilities.getSequenceFromName(file); + + EvioEvent evioEvent = null; + + while (true) { + try { + evioEvent = evioReader.parseNextEvent(); + } catch (IOException | EvioException e) { + ++badEventCount; + continue; + } + if (evioEvent == null) { + break; + } + byteCount += evioEvent.getTotalBytes(); + if (EventTagConstant.PRESTART.equals(evioEvent)) { + LOGGER.info("found PRESTART"); + hasPrestart = true; + final int[] controlEventData = EvioEventUtilities.getControlEventData(evioEvent); + final long timestamp = controlEventData[0] * 1000L; + startDate = new Date(timestamp); + LOGGER.info("set start date to " + startDate + " from PRESTART"); + if (run == null) { + run = controlEventData[1]; + LOGGER.info("set run to " + run); + } + } else if (EventTagConstant.END.equals(evioEvent)) { + LOGGER.info("found END event"); + hasEnd = true; + final int[] controlEventData = EvioEventUtilities.getControlEventData(evioEvent); + final long timestamp = controlEventData[0] * 1000L; + endDate = new Date(timestamp); + LOGGER.info("set end date to " + endDate); + if (run == null) { + run = controlEventData[1]; + LOGGER.info("set run to " + run); + } + } else if (EvioEventUtilities.isPhysicsEvent(evioEvent)) { + final int[] headBankData = EvioEventUtilities.getHeadBankData(evioEvent); + if (startDate == null) { + if (headBankData[3] != 0) { + startDate = new Date(headBankData[3] * 1000L); + LOGGER.info("set start date to " + startDate + " from physics event"); + } + } + if (run == null) { + run = headBankData[1]; + LOGGER.info("set run to " + run + " from physics event"); + } + eventIdData = EvioEventUtilities.getEventIdData(evioEvent); + if (startEvent == null) { + startEvent = eventIdData[0]; + LOGGER.info("set start event " + startEvent); + } + if (headBankData[3] != 0) { + lastTimestamp = headBankData[3] * 1000L; + } + ++eventCount; + } + } + + // Set end date from last valid timestamp. + if (endDate == null) { + endDate = new Date(lastTimestamp); + LOGGER.info("set end date to " + endDate + " from last timestamp " + lastTimestamp); + } + + // Set end event number. + if (eventIdData != null) { + endEvent = eventIdData[0]; + LOGGER.info("set end event " + endEvent); + } + + Map<String, Object> metaDataMap = new HashMap<String, Object>(); + + metaDataMap.put("runMin", run); + metaDataMap.put("runMax", run); + metaDataMap.put("eventCount", eventCount); + metaDataMap.put("size", byteCount); + metaDataMap.put("fileNumber", fileNumber); + metaDataMap.put("badEventCount", badEventCount); + metaDataMap.put("endTimestamp", endDate.getTime()); + metaDataMap.put("startTimestamp", startDate.getTime()); + metaDataMap.put("startEvent", startEvent); + metaDataMap.put("endEvent", endEvent); + metaDataMap.put("hasEnd", hasEnd ? 1 : 0); + metaDataMap.put("hasPrestart", hasPrestart ? 1 : 0); + + return metaDataMap; + } +} Added: java/trunk/crawler/src/main/java/org/hps/crawler/FileFormatFilter.java ============================================================================= --- java/trunk/crawler/src/main/java/org/hps/crawler/FileFormatFilter.java (added) +++ java/trunk/crawler/src/main/java/org/hps/crawler/FileFormatFilter.java Wed Sep 23 07:49:16 2015 @@ -0,0 +1,64 @@ +package org.hps.crawler; + +import java.io.File; +import java.io.FileFilter; +import java.util.Set; +import java.util.logging.Level; +import java.util.logging.Logger; + +import org.hps.datacat.client.DatasetFileFormat; +import org.lcsim.util.log.DefaultLogFormatter; +import org.lcsim.util.log.LogUtil; + +/** + * Filter files on their format. + * <p> + * Only files matching the format will be accepted by the file visitor. + * + * @author Jeremy McCormick, SLAC + */ +public class FileFormatFilter implements FileFilter { + + /** + * Setup logger. + */ + private static final Logger LOGGER = LogUtil.create(FileFormatFilter.class, new DefaultLogFormatter(), Level.ALL); + + /** + * The file format. + */ + private Set<DatasetFileFormat> formats; + + /** + * Create a new filter with the given format. + * + * @param format the file format + */ + FileFormatFilter(Set<DatasetFileFormat> formats) { + if (formats == null) { + throw new IllegalArgumentException("The formats collection is null."); + } + if (formats.isEmpty()) { + throw new IllegalArgumentException("The formats collection is empty."); + } + this.formats = formats; + } + + /** + * Returns <code>true</code> if the file should be accepted, e.g. it matches the filer's format. + * + * @param pathname the file's full path + */ + @Override + public boolean accept(File pathname) { + LOGGER.info(pathname.getPath()); + DatasetFileFormat fileFormat = DatacatUtilities.getFileFormat(pathname); + if (fileFormat != null) { + LOGGER.info("file " + pathname.getPath() + " has format " + fileFormat.name()); + return formats.contains(fileFormat); + } else { + LOGGER.info("rejected file " + pathname.getPath() + " with unknown format"); + return false; + } + } +} Added: java/trunk/crawler/src/main/java/org/hps/crawler/FileMetadataReader.java ============================================================================= --- java/trunk/crawler/src/main/java/org/hps/crawler/FileMetadataReader.java (added) +++ java/trunk/crawler/src/main/java/org/hps/crawler/FileMetadataReader.java Wed Sep 23 07:49:16 2015 @@ -0,0 +1,11 @@ +package org.hps.crawler; + +import java.io.File; +import java.io.IOException; +import java.util.Map; + + +public interface FileMetadataReader { + + public Map<String, Object> getMetadata(File file) throws IOException; +} Added: java/trunk/crawler/src/main/java/org/hps/crawler/FileSet.java ============================================================================= --- java/trunk/crawler/src/main/java/org/hps/crawler/FileSet.java (added) +++ java/trunk/crawler/src/main/java/org/hps/crawler/FileSet.java Wed Sep 23 07:49:16 2015 @@ -0,0 +1,27 @@ +package org.hps.crawler; + +import java.io.File; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; + +import org.hps.datacat.client.DatasetFileFormat; + +/** + * Map of file format to a list of files. + * + * @author Jeremy McCormick, SLAC + */ +public class FileSet extends HashMap<DatasetFileFormat, List<File>> { + + public List<File> get(DatasetFileFormat format) { + if (super.get(format) == null) { + this.put(format, new ArrayList<File>()); + } + return super.get(format); + } + + public void addFile(DatasetFileFormat format, File file) { + this.get(format).add(file); + } +} Added: java/trunk/crawler/src/main/java/org/hps/crawler/LcioMetadataReader.java ============================================================================= --- java/trunk/crawler/src/main/java/org/hps/crawler/LcioMetadataReader.java (added) +++ java/trunk/crawler/src/main/java/org/hps/crawler/LcioMetadataReader.java Wed Sep 23 07:49:16 2015 @@ -0,0 +1,70 @@ +package org.hps.crawler; + +import java.io.EOFException; +import java.io.File; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +import org.lcsim.conditions.ConditionsManager; +import org.lcsim.conditions.ConditionsManagerImplementation; +import org.lcsim.conditions.ConditionsReader; +import org.lcsim.event.EventHeader; +import org.lcsim.lcio.LCIOReader; +import org.lcsim.util.loop.DummyConditionsConverter; +import org.lcsim.util.loop.DummyDetector; + +/** + * Reads metadata from LCIO files with reconstructed data. + * + * @author Jeremy McCormick, SLAC + */ +public class LcioMetadataReader implements FileMetadataReader { + + /* + * Setup the conditions system in dummy mode. + */ + static { + ConditionsManager conditionsManager = ConditionsManager.defaultInstance(); + ConditionsReader dummyReader = ConditionsReader.createDummy(); + ((ConditionsManagerImplementation) conditionsManager).setConditionsReader(dummyReader, "DUMMY"); + DummyDetector detector = new DummyDetector("DUMMY"); + conditionsManager.registerConditionsConverter(new DummyConditionsConverter(detector)); + } + + /** + * Get the metadata for the LCIO file. + * + * @param file the LCIO file + * @return the metadata map with key and value pairs + */ + @Override + public Map<String, Object> getMetadata(File file) throws IOException { + Map<String, Object> metaData = new HashMap<String, Object>(); + LCIOReader reader = null; + try { + reader = new LCIOReader(file); + EventHeader eventHeader = null; + int eventCount = 0; + Integer run = null; + try { + while((eventHeader = reader.read()) != null) { + if (run == null) { + run = eventHeader.getRunNumber(); + } + eventCount++; + } + } catch (EOFException e) { + e.printStackTrace(); + } + metaData.put("eventCount", eventCount); + metaData.put("runMin", run); + metaData.put("runMax", run); + } finally { + if (reader != null) { + reader.close(); + } + } + return metaData; + } +} Added: java/trunk/crawler/src/main/java/org/hps/crawler/RootDstMetadataReader.java ============================================================================= --- java/trunk/crawler/src/main/java/org/hps/crawler/RootDstMetadataReader.java (added) +++ java/trunk/crawler/src/main/java/org/hps/crawler/RootDstMetadataReader.java Wed Sep 23 07:49:16 2015 @@ -0,0 +1,65 @@ +package org.hps.crawler; + +import hep.io.root.RootClassNotFound; +import hep.io.root.RootFileReader; +import hep.io.root.interfaces.TBranch; +import hep.io.root.interfaces.TLeafElement; +import hep.io.root.interfaces.TObjArray; +import hep.io.root.interfaces.TTree; + +import java.io.File; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +/** + * This is a very simple metadata reader for ROOT DST files. + * <p> + * It currently only sets the standard metadata for event count and run number. + * + * @author Jeremy McCormick, SLAC + */ +public class RootDstMetadataReader implements FileMetadataReader { + + /** + * Get the metadata for a ROOT DST file. + * + * @return the metadata for a ROOT DST file + */ + public Map<String, Object> getMetadata(File file) throws IOException { + Map<String, Object> metadata = new HashMap<String, Object>(); + RootFileReader rootReader = null; + long eventCount = 0; + int runMin = 0; + int runMax = 0; + long size = 0; + try { + rootReader = new RootFileReader(file.getAbsolutePath()); + TTree tree = (TTree) rootReader.get("HPS_Event"); + //TBranch branch = tree.getBranch("Event"); + eventCount = tree.getEntries(); + size = tree.getTotBytes(); + TObjArray leaves = tree.getLeaves(); + + for (Object object : leaves) { + TLeafElement leaf = (TLeafElement) object; + if ("run_number".equals(leaf.getName())) { + runMin = (int) leaf.getWrappedValue(0); + runMax = (int) leaf.getWrappedValue(0); + break; + } + } + } catch (IOException | RootClassNotFound e) { + throw new IOException(e); + } finally { + if (rootReader != null) { + rootReader.close(); + } + } + metadata.put("eventCount", eventCount); + metadata.put("runMin", runMin); + metadata.put("runMax", runMax); + metadata.put("size", size); + return metadata; + } +} Added: java/trunk/run-database/src/main/java/org/hps/run/database/RunProcessor.java ============================================================================= --- java/trunk/run-database/src/main/java/org/hps/run/database/RunProcessor.java (added) +++ java/trunk/run-database/src/main/java/org/hps/run/database/RunProcessor.java Wed Sep 23 07:49:16 2015 @@ -0,0 +1,228 @@ +package org.hps.run.database; + +import java.io.File; +import java.util.Collections; +import java.util.List; +import java.util.logging.Level; +import java.util.logging.Logger; + +import org.hps.datacat.client.DatasetFileFormat; +import org.hps.record.epics.EpicsRunProcessor; +import org.hps.record.evio.EvioFileMetadata; +import org.hps.record.evio.EvioFileMetadataAdapter; +import org.hps.record.evio.EvioFileSequenceComparator; +import org.hps.record.evio.EvioFileSource; +import org.hps.record.evio.EvioLoop; +import org.hps.record.scalers.ScalersEvioProcessor; +import org.hps.record.triggerbank.TiTimeOffsetEvioProcessor; +import org.hps.record.triggerbank.TriggerConfig; +import org.hps.record.triggerbank.TriggerConfigVariable; +import org.lcsim.util.log.DefaultLogFormatter; +import org.lcsim.util.log.LogUtil; + +/** + * Processes EVIO files from a run and extracts meta data for updating the run database. + * + * @author Jeremy McCormick, SLAC + */ +public final class RunProcessor { + + /** + * Setup logger. + */ + private static final Logger LOGGER = LogUtil.create(RunProcessor.class, new DefaultLogFormatter(), Level.FINE); + + /** + * Processor for extracting EPICS information. + */ + private EpicsRunProcessor epicsProcessor; + + /** + * The data source with the list of EVIO files to process. + */ + private final EvioFileSource evioFileSource; + + /** + * The EVIO event processing loop. + */ + private final EvioLoop evioLoop = new EvioLoop(); + + /** + * Processor for extracting scaler data. + */ + private ScalersEvioProcessor scalersProcessor; + + /** + * Processor for extracting TI time offset. + */ + private TiTimeOffsetEvioProcessor triggerTimeProcessor; + + /** + * Record loop adapter for getting file metadata. + */ + private final EvioFileMetadataAdapter metadataAdapter = new EvioFileMetadataAdapter(); + + /** + * The run summary for the run. + */ + private RunSummaryImpl runSummary; + + /** + * Create a run processor. + * + * @param runSummary the run summary object for the run + * @return the run processor + */ + public RunProcessor(RunSummaryImpl runSummary) { + + this.runSummary = runSummary; + + List<File> evioFiles = runSummary.getFiles(DatasetFileFormat.EVIO); + if (evioFiles == null || evioFiles.isEmpty()) { + throw new IllegalArgumentException("No EVIO files found in file set."); + } + + // Sort the list of EVIO files. + Collections.sort(runSummary.getFiles(DatasetFileFormat.EVIO), new EvioFileSequenceComparator()); + + // Setup record loop. + evioFileSource = new EvioFileSource(evioFiles); + evioLoop.setEvioFileSource(evioFileSource); + + // Add file metadata processor. + evioLoop.addRecordListener(metadataAdapter); + evioLoop.addLoopListener(metadataAdapter); + } + + public void addEpicsProcessor() { + // Add EPICS processor. + this.epicsProcessor = new EpicsRunProcessor(); + evioLoop.addEvioEventProcessor(epicsProcessor); + } + + public void addScalerProcessor() { + // Add scaler data processor. + scalersProcessor = new ScalersEvioProcessor(); + scalersProcessor.setResetEveryEvent(false); + evioLoop.addEvioEventProcessor(scalersProcessor); + } + + public void addTriggerTimeProcessor() { + // Add processor for extracting TI time offset. + triggerTimeProcessor = new TiTimeOffsetEvioProcessor(); + evioLoop.addEvioEventProcessor(triggerTimeProcessor); + } + + /** + * Extract meta data from first file in run. + */ + private void processFirstFile() { + final EvioFileMetadata metadata = metadataAdapter.getEvioFileMetadata().get(0); + if (metadata == null) { + throw new IllegalStateException("No meta data exists for first file."); + } + LOGGER.info("first file metadata: " + metadata.toString()); + if (metadata.getStartDate() == null) { + throw new IllegalStateException("The start date is not set in the metadata."); + } + LOGGER.info("setting unix start time to " + metadata.getStartDate().getTime() + " from meta data"); + runSummary.setStartDate(metadata.getStartDate()); + } + + /** + * Extract meta data from last file in run. + */ + private void processLastFile() { + LOGGER.info("looking for " + runSummary.getEvioFiles().get(runSummary.getEvioFiles().size() - 1).getPath() + " metadata"); + LOGGER.getHandlers()[0].flush(); + final EvioFileMetadata metadata = this.metadataAdapter.getEvioFileMetadata().get(this.metadataAdapter.getEvioFileMetadata().size() - 1); + if (metadata == null) { + throw new IllegalStateException("Failed to find metadata for last file."); + } + LOGGER.info("last file metadata: " + metadata.toString()); + if (metadata.getEndDate() == null) { + throw new IllegalStateException("The end date is not set in the metadata."); + } + LOGGER.info("setting unix end time to " + metadata.getEndDate().getTime() + " from meta data"); + runSummary.setEndDate(metadata.getEndDate()); + LOGGER.info("setting has END to " + metadata.hasEnd()); + runSummary.setEndOkay(metadata.hasEnd()); + } + + /** + * Process the run by executing the registered {@link org.hps.record.evio.EvioEventProcessor}s and extracting the + * start and end dates. + * <p> + * This method will also execute file caching from MSS, if enabled by the {@link #useFileCache} option. + * + * @throws Exception if there is an error processing a file + */ + public void processRun() throws Exception { + + LOGGER.info("processing " + this.runSummary.getEvioFiles().size() + " files from run " + + this.runSummary.getRun()); + + // Run processors over all files. + LOGGER.info("looping over all events"); + evioLoop.loop(-1); + + LOGGER.info("got " + metadataAdapter.getEvioFileMetadata().size() + " metadata objects from loop"); + LOGGER.getHandlers()[0].flush(); + + // Set start date from first file. + LOGGER.info("processing first file"); + this.processFirstFile(); + + // Set end date from last file. + LOGGER.info("processing last file"); + this.processLastFile(); + + // Update run summary from processors. + LOGGER.info("updating run summary"); + this.updateRunSummary(); + + LOGGER.info("run processor done with run " + this.runSummary.getRun()); + } + + /** + * Update the current run summary by copying data to it from the EVIO processors and the event loop. + */ + private void updateRunSummary() { + + // Set total number of events from the event loop. + LOGGER.info("setting total events " + evioLoop.getTotalCountableConsumed()); + runSummary.setTotalEvents((int) evioLoop.getTotalCountableConsumed()); + + if (scalersProcessor != null) { + // Add scaler data from the scalers EVIO processor. + LOGGER.info("adding " + this.scalersProcessor.getScalerData().size() + " scaler data objects"); + runSummary.setScalerData(this.scalersProcessor.getScalerData()); + } + + if (epicsProcessor != null) { + // Add EPICS data from the EPICS EVIO processor. + LOGGER.info("adding " + this.epicsProcessor.getEpicsData().size() + " EPICS data objects"); + runSummary.setEpicsData(this.epicsProcessor.getEpicsData()); + } + + if (triggerTimeProcessor != null) { + // Add trigger config from the trigger time processor. + LOGGER.info("updating trigger config"); + final TriggerConfig triggerConfig = new TriggerConfig(); + this.triggerTimeProcessor.updateTriggerConfig(triggerConfig); + LOGGER.info("tiTimeOffset: " + triggerConfig.get(TriggerConfigVariable.TI_TIME_OFFSET)); + runSummary.setTriggerConfig(triggerConfig); + } + + LOGGER.getHandlers()[0].flush(); + } + + /** + * Get list of metadata created by processing the files. + * + * @return the list of metadata + */ + public List<EvioFileMetadata> getEvioFileMetaData() { + return this.metadataAdapter.getEvioFileMetadata(); + } +} Modified: java/trunk/run-database/src/main/java/org/hps/run/database/RunSummary.java ============================================================================= --- java/trunk/run-database/src/main/java/org/hps/run/database/RunSummary.java (original) +++ java/trunk/run-database/src/main/java/org/hps/run/database/RunSummary.java Wed Sep 23 07:49:16 2015 @@ -4,6 +4,7 @@ import java.util.Date; import java.util.List; +import org.hps.datacat.client.DatasetFileFormat; import org.hps.record.epics.EpicsData; import org.hps.record.scalers.ScalerData; import org.hps.record.triggerbank.TriggerConfig; @@ -121,7 +122,7 @@ int getTotalEvents(); /** - * Get the total number of files for this run. + * Get the total number of EVIO files for this run. * * @return the total number of files for this run */ @@ -140,4 +141,13 @@ * @return the date when this run record was last updated */ Date getUpdated(); + + /** + * Get a list of files in the run by format (EVIO, LCIO etc.). + * + * @param format the file format + * @return the list of files with the given format + */ + // FIXME: This should be removed from the run summary interface. + public List<File> getFiles(DatasetFileFormat format); } Modified: java/trunk/run-database/src/main/java/org/hps/run/database/RunSummaryDaoImpl.java ============================================================================= --- java/trunk/run-database/src/main/java/org/hps/run/database/RunSummaryDaoImpl.java (original) +++ java/trunk/run-database/src/main/java/org/hps/run/database/RunSummaryDaoImpl.java Wed Sep 23 07:49:16 2015 @@ -371,21 +371,36 @@ @Override public void insertFullRunSummary(final RunSummary runSummary) { + if (runSummary == null) { + throw new IllegalArgumentException("The run summary is null."); + } + // Insert basic run log info. this.insertRunSummary(runSummary); // Insert EPICS data. - LOGGER.info("inserting " + runSummary.getEpicsData().size() + " EPICS records"); - epicsDataDao.insertEpicsData(runSummary.getEpicsData()); + if (runSummary.getEpicsData() != null && !runSummary.getEpicsData().isEmpty()) { + LOGGER.info("inserting " + runSummary.getEpicsData().size() + " EPICS records"); + epicsDataDao.insertEpicsData(runSummary.getEpicsData()); + } else { + LOGGER.warning("no EPICS data to insert"); + } // Insert scaler data. - LOGGER.info("inserting " + runSummary.getScalerData().size() + " scaler data records"); - scalerDataDao.insertScalerData(runSummary.getScalerData(), runSummary.getRun()); + if (runSummary.getScalerData() != null && !runSummary.getScalerData().isEmpty()) { + LOGGER.info("inserting " + runSummary.getScalerData().size() + " scaler data records"); + scalerDataDao.insertScalerData(runSummary.getScalerData(), runSummary.getRun()); + } else { + LOGGER.warning("no scaler data to insert"); + } // Insert trigger config. - LOGGER.info("inserting " + runSummary.getTriggerConfig().size() + " trigger config variables"); - triggerConfigIntDao.insertTriggerConfig(runSummary.getTriggerConfig(), runSummary.getRun()); - + if (runSummary.getTriggerConfig() != null && !runSummary.getTriggerConfig().isEmpty()) { + LOGGER.info("inserting " + runSummary.getTriggerConfig().size() + " trigger config variables"); + triggerConfigIntDao.insertTriggerConfig(runSummary.getTriggerConfig(), runSummary.getRun()); + } else { + LOGGER.warning("no trigger config to insert"); + } } /** @@ -440,7 +455,7 @@ runSummary.setScalerData(scalerDataDao.getScalerData(run)); // Read trigger config. - runSummary.setTriggerConfigInt(triggerConfigIntDao.getTriggerConfig(run)); + runSummary.setTriggerConfig(triggerConfigIntDao.getTriggerConfig(run)); return runSummary; } Modified: java/trunk/run-database/src/main/java/org/hps/run/database/RunSummaryImpl.java ============================================================================= --- java/trunk/run-database/src/main/java/org/hps/run/database/RunSummaryImpl.java (original) +++ java/trunk/run-database/src/main/java/org/hps/run/database/RunSummaryImpl.java Wed Sep 23 07:49:16 2015 @@ -6,9 +6,12 @@ import java.util.ArrayList; import java.util.Date; import java.util.GregorianCalendar; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.TimeZone; +import org.hps.datacat.client.DatasetFileFormat; import org.hps.record.epics.EpicsData; import org.hps.record.scalers.ScalerData; import org.hps.record.triggerbank.TriggerConfig; @@ -54,11 +57,6 @@ private List<EpicsData> epicsDataList; /** - * The list of EVIO files in the run. - */ - private List<File> evioFileList = new ArrayList<File>(); - - /** * The run number. */ private final int run; @@ -76,7 +74,7 @@ /** * The trigger data for the run. */ - private TriggerConfig triggerConfigInt; + private TriggerConfig triggerConfig; /** * Start date of run. @@ -97,6 +95,11 @@ * Date when the run record was last updated. */ private Date updated; + + /** + * Lists of files indexed by their format. + */ + private Map<DatasetFileFormat, List<File>> fileMap = new HashMap<DatasetFileFormat, List<File>>(); /** * Create a run summary. @@ -112,8 +115,8 @@ * * @param file the file to add */ - public void addFile(final File file) { - this.evioFileList.add(file); + public void addEvioFile(final File file) { + this.getEvioFiles().add(file); } /** @@ -171,7 +174,7 @@ * @return the list of EVIO files in this run */ public List<File> getEvioFiles() { - return this.evioFileList; + return this.fileMap.get(DatasetFileFormat.EVIO); } /** @@ -208,7 +211,7 @@ * @return the trigger config of this run */ public TriggerConfig getTriggerConfig() { - return triggerConfigInt; + return triggerConfig; } /** @@ -271,7 +274,7 @@ * * @param startDate the start date */ - public void setEndDate(final Date endDate) { + void setEndDate(final Date endDate) { this.endDate = endDate; } @@ -280,7 +283,7 @@ * * @param endOkay <code>true</code> if end is okay */ - public void setEndOkay(final boolean endOkay) { + void setEndOkay(final boolean endOkay) { this.endOkay = endOkay; } @@ -289,26 +292,17 @@ * * @param epics the EPICS data for the run */ - public void setEpicsData(final List<EpicsData> epicsDataList) { + void setEpicsData(final List<EpicsData> epicsDataList) { this.epicsDataList = epicsDataList; } - - /** - * Set the list of EVIO files for the run. - * - * @param evioFileList the list of EVIO files for the run - */ - public void setEvioFiles(final List<File> evioFileList) { - this.evioFileList = evioFileList; - } - + /** * Set whether the run was "okay" meaning the data is usable for physics * analysis. * * @param runOkay <code>true</code> if the run is okay */ - public void setRunOkay(final boolean runOkay) { + void setRunOkay(final boolean runOkay) { this.runOkay = runOkay; } @@ -317,7 +311,7 @@ * * @param scalerData the scaler data */ - public void setScalerData(final List<ScalerData> scalerDataList) { + void setScalerData(final List<ScalerData> scalerDataList) { this.scalerDataList = scalerDataList; } @@ -326,8 +320,8 @@ * * @param triggerConfig the trigger config */ - public void setTriggerConfigInt(final TriggerConfig triggerConfigInt) { - this.triggerConfigInt = triggerConfigInt; + void setTriggerConfig(final TriggerConfig triggerConfig) { + this.triggerConfig = triggerConfig; } /** @@ -335,7 +329,7 @@ * * @param startDate the start date */ - public void setStartDate(final Date startDate) { + void setStartDate(final Date startDate) { this.startDate = startDate; } @@ -344,7 +338,7 @@ * * @param totalEvents the total number of physics events in the run */ - public void setTotalEvents(final int totalEvents) { + void setTotalEvents(final int totalEvents) { this.totalEvents = totalEvents; } @@ -353,7 +347,7 @@ * * @param totalFiles the total number of EVIO files in the run */ - public void setTotalFiles(final int totalFiles) { + void setTotalFiles(final int totalFiles) { this.totalFiles = totalFiles; } @@ -362,8 +356,37 @@ * * @param updated the date when the run record was last updated */ - public void setUpdated(final Date updated) { + void setUpdated(final Date updated) { this.updated = updated; + } + + /** + * Add a file associated with this run. + * <p> + * This is public because it is called by the file crawler. + * + * @param file a file associated with this run + */ + // FIXME: This should be removed from the run summary interface. + public void addFile(DatasetFileFormat format, File file) { + List<File> files = this.fileMap.get(file); + if (files == null) { + this.fileMap.put(format, new ArrayList<File>()); + } + this.fileMap.get(format).add(file); + } + + /** + * Get a list of files in the run by format (EVIO, LCIO etc.). + * + * @param format the file format + * @return the list of files with the given format + */ + public List<File> getFiles(DatasetFileFormat format) { + if (!this.fileMap.containsKey(format)) { + this.fileMap.put(format, new ArrayList<File>()); + } + return this.fileMap.get(format); } /**