Author: [log in to unmask]
Date: Mon Sep 28 11:40:33 2015
New Revision: 3729
Log:
Cleanup crawler and run-database classes.
Removed:
java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerFileVisitor.java
Modified:
java/trunk/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java
java/trunk/run-database/src/main/java/org/hps/run/database/RunDatabaseCommandLine.java
java/trunk/run-database/src/main/java/org/hps/run/database/RunProcessor.java
java/trunk/run-database/src/main/java/org/hps/run/database/RunSummary.java
java/trunk/run-database/src/main/java/org/hps/run/database/RunSummaryDaoImpl.java
java/trunk/run-database/src/main/java/org/hps/run/database/RunSummaryImpl.java
Modified: java/trunk/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java
=============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java (original)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java Mon Sep 28 11:40:33 2015
@@ -13,8 +13,10 @@
import java.util.Date;
import java.util.EnumSet;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.List;
import java.util.Map;
+import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
@@ -142,6 +144,7 @@
OPTIONS.addOption("h", "help", false, "print help and exit (overrides all other arguments)");
OPTIONS.addOption("o", "format", true, "add a file format for filtering: " + AVAILABLE_FORMATS);
OPTIONS.addOption("m", "metadata", false, "create metadata for datasets");
+ OPTIONS.addOption("r", "run", true, "add a run number to accept");
OPTIONS.addOption("s", "site", true, "datacat site");
OPTIONS.addOption("t", "timestamp-file", true, "existing or new timestamp file name");
OPTIONS.addOption("x", "max-depth", true, "max depth to crawl");
@@ -304,6 +307,15 @@
throw new RuntimeException("The -f argument with the datacat folder is required.");
}
+ // List of run numbers.
+ if (cl.hasOption("r")) {
+ Set<Integer> acceptRuns = new HashSet<Integer>();
+ for (String arg : cl.getOptionValues("r")) {
+ acceptRuns.add(Integer.parseInt(arg));
+ }
+ config.setAcceptRuns(acceptRuns);
+ }
+
} catch (final ParseException e) {
throw new RuntimeException("Error parsing options.", e);
}
@@ -349,6 +361,11 @@
LOGGER.info("adding file format filter for " + fileFormat.name());
}
visitor.addFilter(new FileFormatFilter(config.getFileFormats()));
+
+ // Run number filter.
+ if (!config.acceptRuns().isEmpty()) {
+ visitor.addFilter(new RunFilter(config.acceptRuns()));
+ }
// Walk the file tree using the visitor.
this.walk(visitor);
@@ -388,21 +405,6 @@
* @param visitor the file visitor
*/
private void walk(final DatacatFileVisitor visitor) {
- if (config.timestamp() != null) {
- // Date filter from timestamp.
- visitor.addFilter(new DateFileFilter(config.timestamp()));
- LOGGER.config("added date filter with time stamp " + config.timestamp());
- }
-
- // Is the accept run list not empty? (Empty means accept all runs.)
- if (!config.acceptRuns().isEmpty()) {
- // List of run numbers to accept.
- visitor.addFilter(new RunFilter(config.acceptRuns()));
- LOGGER.config("added run number filter");
- } else {
- LOGGER.config("no run number filter will be used");
- }
-
try {
// Walk the file tree from the root directory.
final EnumSet<FileVisitOption> options = EnumSet.noneOf(FileVisitOption.class);
Modified: java/trunk/run-database/src/main/java/org/hps/run/database/RunDatabaseCommandLine.java
=============================================================================
--- java/trunk/run-database/src/main/java/org/hps/run/database/RunDatabaseCommandLine.java (original)
+++ java/trunk/run-database/src/main/java/org/hps/run/database/RunDatabaseCommandLine.java Mon Sep 28 11:40:33 2015
@@ -3,8 +3,11 @@
import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Date;
+import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
+import java.util.Map;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
@@ -18,6 +21,7 @@
import org.hps.datacat.client.DatacatClient;
import org.hps.datacat.client.DatacatClientFactory;
import org.hps.datacat.client.Dataset;
+import org.hps.datacat.client.DatasetMetadata;
import org.hps.record.evio.EvioFileUtilities;
import org.lcsim.util.log.DefaultLogFormatter;
import org.lcsim.util.log.LogUtil;
@@ -169,18 +173,32 @@
int run = runManager.getRun();
// Get the list of EVIO files for the run using a data catalog query.
- List<File> files = getEvioFiles(run);
+ Map<File, Dataset> fileDatasets = getEvioFiles(run);
+ List<File> files = new ArrayList<File>(fileDatasets.keySet());
EvioFileUtilities.sortBySequence(files);
// Process the run's files to get information.
RunSummaryImpl runSummary = new RunSummaryImpl(run);
- RunProcessor runProcessor = this.createEvioRunProcessor(runSummary);
+ RunProcessor runProcessor = this.createEvioRunProcessor(runSummary, files);
try {
runProcessor.processRun();
} catch (Exception e) {
throw new RuntimeException(e);
}
+ // Set start date.
+ Dataset firstDataset = fileDatasets.get(files.get(0));
+ DatasetMetadata metadata = firstDataset.getMetadata();
+ Date startDate = new Date(metadata.getInteger("startTimestamp"));
+ runSummary.setStartDate(startDate);
+
+ // Set end date.
+ Dataset lastDataset = fileDatasets.get(files.get(files.size() - 1));
+ metadata = lastDataset.getMetadata();
+ Date endDate = new Date(metadata.getInteger("endTimestamp"));
+ runSummary.setEndDate(endDate);
+ runSummary.setEndOkay(metadata.getInteger("hasEnd") == 0 ? false : true);
+
// Delete existing run.
if (runExists) {
runManager.deleteRun();
@@ -233,9 +251,9 @@
*
* @return the run processor
*/
- private RunProcessor createEvioRunProcessor(final RunSummaryImpl runSummary) {
-
- final RunProcessor runProcessor = new RunProcessor(runSummary);
+ private RunProcessor createEvioRunProcessor(final RunSummaryImpl runSummary, List<File> files) {
+
+ final RunProcessor runProcessor = new RunProcessor(runSummary, files);
if (features.contains(Feature.EPICS)) {
runProcessor.addEpicsProcessor();
@@ -256,16 +274,23 @@
* @param run the run number
* @return the list of EVIO files from the run
*/
- private List<File> getEvioFiles(int run) {
+ private Map<File, Dataset> getEvioFiles(int run) {
DatacatClient datacatClient = new DatacatClientFactory().createClient();
Set<String> metadata = new HashSet<String>();
+ Map<File, Dataset> files = new HashMap<File, Dataset>();
+ metadata.add("runMin");
+ metadata.add("eventCount");
+ metadata.add("fileNumber");
+ metadata.add("endTimestamp");
+ metadata.add("startTimestamp");
+ metadata.add("hasEnd");
+ metadata.add("hasPrestart");
List<Dataset> datasets = datacatClient.findDatasets("data/raw", "fileFormat eq 'EVIO' AND dataType eq 'RAW' AND runMin eq " + run, metadata);
if (datasets.isEmpty()) {
throw new IllegalStateException("No EVIO datasets for run " + run + " were found in the data catalog.");
}
- List<File> files = new ArrayList<File>();
for (Dataset dataset : datasets) {
- files.add(new File(dataset.getLocations().get(0).getResource()));
+ files.put(new File(dataset.getLocations().get(0).getResource()), dataset);
}
return files;
}
Modified: java/trunk/run-database/src/main/java/org/hps/run/database/RunProcessor.java
=============================================================================
--- java/trunk/run-database/src/main/java/org/hps/run/database/RunProcessor.java (original)
+++ java/trunk/run-database/src/main/java/org/hps/run/database/RunProcessor.java Mon Sep 28 11:40:33 2015
@@ -58,40 +58,38 @@
private TiTimeOffsetEvioProcessor triggerTimeProcessor;
/**
- * Record loop adapter for getting file metadata.
- */
- private final EvioFileMetadataAdapter metadataAdapter = new EvioFileMetadataAdapter();
-
- /**
* The run summary for the run.
*/
private RunSummaryImpl runSummary;
-
+
+ /**
+ * List of EVIO files in the run.
+ */
+ private List<File> evioFiles;
+
/**
* Create a run processor.
*
* @param runSummary the run summary object for the run
* @return the run processor
*/
- public RunProcessor(RunSummaryImpl runSummary) {
-
- this.runSummary = runSummary;
-
- List<File> evioFiles = runSummary.getFiles(DatasetFileFormat.EVIO);
- if (evioFiles == null || evioFiles.isEmpty()) {
+ public RunProcessor(RunSummaryImpl runSummary, List<File> evioFiles) {
+ if (runSummary == null) {
+ throw new IllegalArgumentException("The run summary is null.");
+ }
+ if (evioFiles == null) {
+ throw new IllegalArgumentException("The evio file list is null.");
+ }
+ if (evioFiles.isEmpty()) {
throw new IllegalArgumentException("No EVIO files found in file set.");
}
- // Sort the list of EVIO files.
- Collections.sort(runSummary.getFiles(DatasetFileFormat.EVIO), new EvioFileSequenceComparator());
+ this.runSummary = runSummary;
+ this.evioFiles = evioFiles;
// Setup record loop.
evioFileSource = new EvioFileSource(evioFiles);
evioLoop.setEvioFileSource(evioFileSource);
-
- // Add file metadata processor.
- evioLoop.addRecordListener(metadataAdapter);
- evioLoop.addLoopListener(metadataAdapter);
}
public void addEpicsProcessor() {
@@ -114,68 +112,20 @@
}
/**
- * Extract meta data from first file in run.
- */
- private void processFirstFile() {
- final EvioFileMetadata metadata = metadataAdapter.getEvioFileMetadata().get(0);
- if (metadata == null) {
- throw new IllegalStateException("No meta data exists for first file.");
- }
- LOGGER.info("first file metadata: " + metadata.toString());
- if (metadata.getStartDate() == null) {
- throw new IllegalStateException("The start date is not set in the metadata.");
- }
- LOGGER.info("setting unix start time to " + metadata.getStartDate().getTime() + " from meta data");
- runSummary.setStartDate(metadata.getStartDate());
- }
-
- /**
- * Extract meta data from last file in run.
- */
- private void processLastFile() {
- LOGGER.info("looking for " + runSummary.getEvioFiles().get(runSummary.getEvioFiles().size() - 1).getPath() + " metadata");
- LOGGER.getHandlers()[0].flush();
- final EvioFileMetadata metadata = this.metadataAdapter.getEvioFileMetadata().get(this.metadataAdapter.getEvioFileMetadata().size() - 1);
- if (metadata == null) {
- throw new IllegalStateException("Failed to find metadata for last file.");
- }
- LOGGER.info("last file metadata: " + metadata.toString());
- if (metadata.getEndDate() == null) {
- throw new IllegalStateException("The end date is not set in the metadata.");
- }
- LOGGER.info("setting unix end time to " + metadata.getEndDate().getTime() + " from meta data");
- runSummary.setEndDate(metadata.getEndDate());
- LOGGER.info("setting has END to " + metadata.hasEnd());
- runSummary.setEndOkay(metadata.hasEnd());
- }
-
- /**
- * Process the run by executing the registered {@link org.hps.record.evio.EvioEventProcessor}s and extracting the
- * start and end dates.
- * <p>
- * This method will also execute file caching from MSS, if enabled by the {@link #useFileCache} option.
+ * Process the run by executing the registered {@link org.hps.record.evio.EvioEventProcessor}s
*
* @throws Exception if there is an error processing a file
*/
public void processRun() throws Exception {
- LOGGER.info("processing " + this.runSummary.getEvioFiles().size() + " files from run "
+ LOGGER.info("processing " + this.evioFiles.size() + " files from run "
+ this.runSummary.getRun());
// Run processors over all files.
LOGGER.info("looping over all events");
evioLoop.loop(-1);
- LOGGER.info("got " + metadataAdapter.getEvioFileMetadata().size() + " metadata objects from loop");
LOGGER.getHandlers()[0].flush();
-
- // Set start date from first file.
- LOGGER.info("processing first file");
- this.processFirstFile();
-
- // Set end date from last file.
- LOGGER.info("processing last file");
- this.processLastFile();
// Update run summary from processors.
LOGGER.info("updating run summary");
@@ -216,13 +166,4 @@
LOGGER.getHandlers()[0].flush();
}
-
- /**
- * Get list of metadata created by processing the files.
- *
- * @return the list of metadata
- */
- public List<EvioFileMetadata> getEvioFileMetaData() {
- return this.metadataAdapter.getEvioFileMetadata();
- }
}
Modified: java/trunk/run-database/src/main/java/org/hps/run/database/RunSummary.java
=============================================================================
--- java/trunk/run-database/src/main/java/org/hps/run/database/RunSummary.java (original)
+++ java/trunk/run-database/src/main/java/org/hps/run/database/RunSummary.java Mon Sep 28 11:40:33 2015
@@ -73,13 +73,6 @@
double getEventRate();
/**
- * Get the list of EVIO files in this run.
- *
- * @return the list of EVIO files in this run
- */
- List<File> getEvioFiles();
-
- /**
* Get the run number.
*
* @return the run number
@@ -141,13 +134,4 @@
* @return the date when this run record was last updated
*/
Date getUpdated();
-
- /**
- * Get a list of files in the run by format (EVIO, LCIO etc.).
- *
- * @param format the file format
- * @return the list of files with the given format
- */
- // FIXME: This should be removed from the run summary interface.
- public List<File> getFiles(DatasetFileFormat format);
}
Modified: java/trunk/run-database/src/main/java/org/hps/run/database/RunSummaryDaoImpl.java
=============================================================================
--- java/trunk/run-database/src/main/java/org/hps/run/database/RunSummaryDaoImpl.java (original)
+++ java/trunk/run-database/src/main/java/org/hps/run/database/RunSummaryDaoImpl.java Mon Sep 28 11:40:33 2015
@@ -415,7 +415,7 @@
preparedStatement.setTimestamp(2, new java.sql.Timestamp(runSummary.getStartDate().getTime()), CALENDAR);
preparedStatement.setTimestamp(3, new java.sql.Timestamp(runSummary.getEndDate().getTime()), CALENDAR);
preparedStatement.setInt(4, runSummary.getTotalEvents());
- preparedStatement.setInt(5, runSummary.getEvioFiles().size());
+ preparedStatement.setInt(5, runSummary.getTotalFiles());
preparedStatement.setBoolean(6, runSummary.getEndOkay());
preparedStatement.executeUpdate();
} catch (final SQLException e) {
@@ -498,7 +498,7 @@
preparedStatement.setTimestamp(1, new java.sql.Timestamp(runSummary.getStartDate().getTime()), CALENDAR);
preparedStatement.setTimestamp(2, new java.sql.Timestamp(runSummary.getEndDate().getTime()), CALENDAR);
preparedStatement.setInt(3, runSummary.getTotalEvents());
- preparedStatement.setInt(4, runSummary.getEvioFiles().size());
+ preparedStatement.setInt(4, runSummary.getTotalFiles());
preparedStatement.setBoolean(5, runSummary.getEndOkay());
preparedStatement.setBoolean(6, runSummary.getRunOkay());
preparedStatement.setInt(7, runSummary.getRun());
Modified: java/trunk/run-database/src/main/java/org/hps/run/database/RunSummaryImpl.java
=============================================================================
--- java/trunk/run-database/src/main/java/org/hps/run/database/RunSummaryImpl.java (original)
+++ java/trunk/run-database/src/main/java/org/hps/run/database/RunSummaryImpl.java Mon Sep 28 11:40:33 2015
@@ -97,26 +97,12 @@
private Date updated;
/**
- * Lists of files indexed by their format.
- */
- private Map<DatasetFileFormat, List<File>> fileMap = new HashMap<DatasetFileFormat, List<File>>();
-
- /**
* Create a run summary.
*
* @param run the run number
*/
public RunSummaryImpl(final int run) {
this.run = run;
- }
-
- /**
- * Add an EVIO file from this run to the list.
- *
- * @param file the file to add
- */
- public void addEvioFile(final File file) {
- this.getEvioFiles().add(file);
}
/**
@@ -169,15 +155,6 @@
}
/**
- * Get the list of EVIO files in this run.
- *
- * @return the list of EVIO files in this run
- */
- public List<File> getEvioFiles() {
- return this.fileMap.get(DatasetFileFormat.EVIO);
- }
-
- /**
* Get the run number.
*
* @return the run number
@@ -360,35 +337,6 @@
this.updated = updated;
}
- /**
- * Add a file associated with this run.
- * <p>
- * This is public because it is called by the file crawler.
- *
- * @param file a file associated with this run
- */
- // FIXME: This should be removed from the run summary interface.
- public void addFile(DatasetFileFormat format, File file) {
- List<File> files = this.fileMap.get(file);
- if (files == null) {
- this.fileMap.put(format, new ArrayList<File>());
- }
- this.fileMap.get(format).add(file);
- }
-
- /**
- * Get a list of files in the run by format (EVIO, LCIO etc.).
- *
- * @param format the file format
- * @return the list of files with the given format
- */
- public List<File> getFiles(DatasetFileFormat format) {
- if (!this.fileMap.containsKey(format)) {
- this.fileMap.put(format, new ArrayList<File>());
- }
- return this.fileMap.get(format);
- }
-
/**
* Convert this object to a string.
*
|