Author: [log in to unmask] Date: Mon Sep 28 11:40:33 2015 New Revision: 3729 Log: Cleanup crawler and run-database classes. Removed: java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerFileVisitor.java Modified: java/trunk/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java java/trunk/run-database/src/main/java/org/hps/run/database/RunDatabaseCommandLine.java java/trunk/run-database/src/main/java/org/hps/run/database/RunProcessor.java java/trunk/run-database/src/main/java/org/hps/run/database/RunSummary.java java/trunk/run-database/src/main/java/org/hps/run/database/RunSummaryDaoImpl.java java/trunk/run-database/src/main/java/org/hps/run/database/RunSummaryImpl.java Modified: java/trunk/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java ============================================================================= --- java/trunk/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java (original) +++ java/trunk/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java Mon Sep 28 11:40:33 2015 @@ -13,8 +13,10 @@ import java.util.Date; import java.util.EnumSet; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; @@ -142,6 +144,7 @@ OPTIONS.addOption("h", "help", false, "print help and exit (overrides all other arguments)"); OPTIONS.addOption("o", "format", true, "add a file format for filtering: " + AVAILABLE_FORMATS); OPTIONS.addOption("m", "metadata", false, "create metadata for datasets"); + OPTIONS.addOption("r", "run", true, "add a run number to accept"); OPTIONS.addOption("s", "site", true, "datacat site"); OPTIONS.addOption("t", "timestamp-file", true, "existing or new timestamp file name"); OPTIONS.addOption("x", "max-depth", true, "max depth to crawl"); @@ -304,6 +307,15 @@ throw new RuntimeException("The -f argument with the datacat folder is required."); } + // List of run numbers. + if (cl.hasOption("r")) { + Set<Integer> acceptRuns = new HashSet<Integer>(); + for (String arg : cl.getOptionValues("r")) { + acceptRuns.add(Integer.parseInt(arg)); + } + config.setAcceptRuns(acceptRuns); + } + } catch (final ParseException e) { throw new RuntimeException("Error parsing options.", e); } @@ -349,6 +361,11 @@ LOGGER.info("adding file format filter for " + fileFormat.name()); } visitor.addFilter(new FileFormatFilter(config.getFileFormats())); + + // Run number filter. + if (!config.acceptRuns().isEmpty()) { + visitor.addFilter(new RunFilter(config.acceptRuns())); + } // Walk the file tree using the visitor. this.walk(visitor); @@ -388,21 +405,6 @@ * @param visitor the file visitor */ private void walk(final DatacatFileVisitor visitor) { - if (config.timestamp() != null) { - // Date filter from timestamp. - visitor.addFilter(new DateFileFilter(config.timestamp())); - LOGGER.config("added date filter with time stamp " + config.timestamp()); - } - - // Is the accept run list not empty? (Empty means accept all runs.) - if (!config.acceptRuns().isEmpty()) { - // List of run numbers to accept. - visitor.addFilter(new RunFilter(config.acceptRuns())); - LOGGER.config("added run number filter"); - } else { - LOGGER.config("no run number filter will be used"); - } - try { // Walk the file tree from the root directory. final EnumSet<FileVisitOption> options = EnumSet.noneOf(FileVisitOption.class); Modified: java/trunk/run-database/src/main/java/org/hps/run/database/RunDatabaseCommandLine.java ============================================================================= --- java/trunk/run-database/src/main/java/org/hps/run/database/RunDatabaseCommandLine.java (original) +++ java/trunk/run-database/src/main/java/org/hps/run/database/RunDatabaseCommandLine.java Mon Sep 28 11:40:33 2015 @@ -3,8 +3,11 @@ import java.io.File; import java.util.ArrayList; import java.util.Arrays; +import java.util.Date; +import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; @@ -18,6 +21,7 @@ import org.hps.datacat.client.DatacatClient; import org.hps.datacat.client.DatacatClientFactory; import org.hps.datacat.client.Dataset; +import org.hps.datacat.client.DatasetMetadata; import org.hps.record.evio.EvioFileUtilities; import org.lcsim.util.log.DefaultLogFormatter; import org.lcsim.util.log.LogUtil; @@ -169,18 +173,32 @@ int run = runManager.getRun(); // Get the list of EVIO files for the run using a data catalog query. - List<File> files = getEvioFiles(run); + Map<File, Dataset> fileDatasets = getEvioFiles(run); + List<File> files = new ArrayList<File>(fileDatasets.keySet()); EvioFileUtilities.sortBySequence(files); // Process the run's files to get information. RunSummaryImpl runSummary = new RunSummaryImpl(run); - RunProcessor runProcessor = this.createEvioRunProcessor(runSummary); + RunProcessor runProcessor = this.createEvioRunProcessor(runSummary, files); try { runProcessor.processRun(); } catch (Exception e) { throw new RuntimeException(e); } + // Set start date. + Dataset firstDataset = fileDatasets.get(files.get(0)); + DatasetMetadata metadata = firstDataset.getMetadata(); + Date startDate = new Date(metadata.getInteger("startTimestamp")); + runSummary.setStartDate(startDate); + + // Set end date. + Dataset lastDataset = fileDatasets.get(files.get(files.size() - 1)); + metadata = lastDataset.getMetadata(); + Date endDate = new Date(metadata.getInteger("endTimestamp")); + runSummary.setEndDate(endDate); + runSummary.setEndOkay(metadata.getInteger("hasEnd") == 0 ? false : true); + // Delete existing run. if (runExists) { runManager.deleteRun(); @@ -233,9 +251,9 @@ * * @return the run processor */ - private RunProcessor createEvioRunProcessor(final RunSummaryImpl runSummary) { - - final RunProcessor runProcessor = new RunProcessor(runSummary); + private RunProcessor createEvioRunProcessor(final RunSummaryImpl runSummary, List<File> files) { + + final RunProcessor runProcessor = new RunProcessor(runSummary, files); if (features.contains(Feature.EPICS)) { runProcessor.addEpicsProcessor(); @@ -256,16 +274,23 @@ * @param run the run number * @return the list of EVIO files from the run */ - private List<File> getEvioFiles(int run) { + private Map<File, Dataset> getEvioFiles(int run) { DatacatClient datacatClient = new DatacatClientFactory().createClient(); Set<String> metadata = new HashSet<String>(); + Map<File, Dataset> files = new HashMap<File, Dataset>(); + metadata.add("runMin"); + metadata.add("eventCount"); + metadata.add("fileNumber"); + metadata.add("endTimestamp"); + metadata.add("startTimestamp"); + metadata.add("hasEnd"); + metadata.add("hasPrestart"); List<Dataset> datasets = datacatClient.findDatasets("data/raw", "fileFormat eq 'EVIO' AND dataType eq 'RAW' AND runMin eq " + run, metadata); if (datasets.isEmpty()) { throw new IllegalStateException("No EVIO datasets for run " + run + " were found in the data catalog."); } - List<File> files = new ArrayList<File>(); for (Dataset dataset : datasets) { - files.add(new File(dataset.getLocations().get(0).getResource())); + files.put(new File(dataset.getLocations().get(0).getResource()), dataset); } return files; } Modified: java/trunk/run-database/src/main/java/org/hps/run/database/RunProcessor.java ============================================================================= --- java/trunk/run-database/src/main/java/org/hps/run/database/RunProcessor.java (original) +++ java/trunk/run-database/src/main/java/org/hps/run/database/RunProcessor.java Mon Sep 28 11:40:33 2015 @@ -58,40 +58,38 @@ private TiTimeOffsetEvioProcessor triggerTimeProcessor; /** - * Record loop adapter for getting file metadata. - */ - private final EvioFileMetadataAdapter metadataAdapter = new EvioFileMetadataAdapter(); - - /** * The run summary for the run. */ private RunSummaryImpl runSummary; - + + /** + * List of EVIO files in the run. + */ + private List<File> evioFiles; + /** * Create a run processor. * * @param runSummary the run summary object for the run * @return the run processor */ - public RunProcessor(RunSummaryImpl runSummary) { - - this.runSummary = runSummary; - - List<File> evioFiles = runSummary.getFiles(DatasetFileFormat.EVIO); - if (evioFiles == null || evioFiles.isEmpty()) { + public RunProcessor(RunSummaryImpl runSummary, List<File> evioFiles) { + if (runSummary == null) { + throw new IllegalArgumentException("The run summary is null."); + } + if (evioFiles == null) { + throw new IllegalArgumentException("The evio file list is null."); + } + if (evioFiles.isEmpty()) { throw new IllegalArgumentException("No EVIO files found in file set."); } - // Sort the list of EVIO files. - Collections.sort(runSummary.getFiles(DatasetFileFormat.EVIO), new EvioFileSequenceComparator()); + this.runSummary = runSummary; + this.evioFiles = evioFiles; // Setup record loop. evioFileSource = new EvioFileSource(evioFiles); evioLoop.setEvioFileSource(evioFileSource); - - // Add file metadata processor. - evioLoop.addRecordListener(metadataAdapter); - evioLoop.addLoopListener(metadataAdapter); } public void addEpicsProcessor() { @@ -114,68 +112,20 @@ } /** - * Extract meta data from first file in run. - */ - private void processFirstFile() { - final EvioFileMetadata metadata = metadataAdapter.getEvioFileMetadata().get(0); - if (metadata == null) { - throw new IllegalStateException("No meta data exists for first file."); - } - LOGGER.info("first file metadata: " + metadata.toString()); - if (metadata.getStartDate() == null) { - throw new IllegalStateException("The start date is not set in the metadata."); - } - LOGGER.info("setting unix start time to " + metadata.getStartDate().getTime() + " from meta data"); - runSummary.setStartDate(metadata.getStartDate()); - } - - /** - * Extract meta data from last file in run. - */ - private void processLastFile() { - LOGGER.info("looking for " + runSummary.getEvioFiles().get(runSummary.getEvioFiles().size() - 1).getPath() + " metadata"); - LOGGER.getHandlers()[0].flush(); - final EvioFileMetadata metadata = this.metadataAdapter.getEvioFileMetadata().get(this.metadataAdapter.getEvioFileMetadata().size() - 1); - if (metadata == null) { - throw new IllegalStateException("Failed to find metadata for last file."); - } - LOGGER.info("last file metadata: " + metadata.toString()); - if (metadata.getEndDate() == null) { - throw new IllegalStateException("The end date is not set in the metadata."); - } - LOGGER.info("setting unix end time to " + metadata.getEndDate().getTime() + " from meta data"); - runSummary.setEndDate(metadata.getEndDate()); - LOGGER.info("setting has END to " + metadata.hasEnd()); - runSummary.setEndOkay(metadata.hasEnd()); - } - - /** - * Process the run by executing the registered {@link org.hps.record.evio.EvioEventProcessor}s and extracting the - * start and end dates. - * <p> - * This method will also execute file caching from MSS, if enabled by the {@link #useFileCache} option. + * Process the run by executing the registered {@link org.hps.record.evio.EvioEventProcessor}s * * @throws Exception if there is an error processing a file */ public void processRun() throws Exception { - LOGGER.info("processing " + this.runSummary.getEvioFiles().size() + " files from run " + LOGGER.info("processing " + this.evioFiles.size() + " files from run " + this.runSummary.getRun()); // Run processors over all files. LOGGER.info("looping over all events"); evioLoop.loop(-1); - LOGGER.info("got " + metadataAdapter.getEvioFileMetadata().size() + " metadata objects from loop"); LOGGER.getHandlers()[0].flush(); - - // Set start date from first file. - LOGGER.info("processing first file"); - this.processFirstFile(); - - // Set end date from last file. - LOGGER.info("processing last file"); - this.processLastFile(); // Update run summary from processors. LOGGER.info("updating run summary"); @@ -216,13 +166,4 @@ LOGGER.getHandlers()[0].flush(); } - - /** - * Get list of metadata created by processing the files. - * - * @return the list of metadata - */ - public List<EvioFileMetadata> getEvioFileMetaData() { - return this.metadataAdapter.getEvioFileMetadata(); - } } Modified: java/trunk/run-database/src/main/java/org/hps/run/database/RunSummary.java ============================================================================= --- java/trunk/run-database/src/main/java/org/hps/run/database/RunSummary.java (original) +++ java/trunk/run-database/src/main/java/org/hps/run/database/RunSummary.java Mon Sep 28 11:40:33 2015 @@ -73,13 +73,6 @@ double getEventRate(); /** - * Get the list of EVIO files in this run. - * - * @return the list of EVIO files in this run - */ - List<File> getEvioFiles(); - - /** * Get the run number. * * @return the run number @@ -141,13 +134,4 @@ * @return the date when this run record was last updated */ Date getUpdated(); - - /** - * Get a list of files in the run by format (EVIO, LCIO etc.). - * - * @param format the file format - * @return the list of files with the given format - */ - // FIXME: This should be removed from the run summary interface. - public List<File> getFiles(DatasetFileFormat format); } Modified: java/trunk/run-database/src/main/java/org/hps/run/database/RunSummaryDaoImpl.java ============================================================================= --- java/trunk/run-database/src/main/java/org/hps/run/database/RunSummaryDaoImpl.java (original) +++ java/trunk/run-database/src/main/java/org/hps/run/database/RunSummaryDaoImpl.java Mon Sep 28 11:40:33 2015 @@ -415,7 +415,7 @@ preparedStatement.setTimestamp(2, new java.sql.Timestamp(runSummary.getStartDate().getTime()), CALENDAR); preparedStatement.setTimestamp(3, new java.sql.Timestamp(runSummary.getEndDate().getTime()), CALENDAR); preparedStatement.setInt(4, runSummary.getTotalEvents()); - preparedStatement.setInt(5, runSummary.getEvioFiles().size()); + preparedStatement.setInt(5, runSummary.getTotalFiles()); preparedStatement.setBoolean(6, runSummary.getEndOkay()); preparedStatement.executeUpdate(); } catch (final SQLException e) { @@ -498,7 +498,7 @@ preparedStatement.setTimestamp(1, new java.sql.Timestamp(runSummary.getStartDate().getTime()), CALENDAR); preparedStatement.setTimestamp(2, new java.sql.Timestamp(runSummary.getEndDate().getTime()), CALENDAR); preparedStatement.setInt(3, runSummary.getTotalEvents()); - preparedStatement.setInt(4, runSummary.getEvioFiles().size()); + preparedStatement.setInt(4, runSummary.getTotalFiles()); preparedStatement.setBoolean(5, runSummary.getEndOkay()); preparedStatement.setBoolean(6, runSummary.getRunOkay()); preparedStatement.setInt(7, runSummary.getRun()); Modified: java/trunk/run-database/src/main/java/org/hps/run/database/RunSummaryImpl.java ============================================================================= --- java/trunk/run-database/src/main/java/org/hps/run/database/RunSummaryImpl.java (original) +++ java/trunk/run-database/src/main/java/org/hps/run/database/RunSummaryImpl.java Mon Sep 28 11:40:33 2015 @@ -97,26 +97,12 @@ private Date updated; /** - * Lists of files indexed by their format. - */ - private Map<DatasetFileFormat, List<File>> fileMap = new HashMap<DatasetFileFormat, List<File>>(); - - /** * Create a run summary. * * @param run the run number */ public RunSummaryImpl(final int run) { this.run = run; - } - - /** - * Add an EVIO file from this run to the list. - * - * @param file the file to add - */ - public void addEvioFile(final File file) { - this.getEvioFiles().add(file); } /** @@ -169,15 +155,6 @@ } /** - * Get the list of EVIO files in this run. - * - * @return the list of EVIO files in this run - */ - public List<File> getEvioFiles() { - return this.fileMap.get(DatasetFileFormat.EVIO); - } - - /** * Get the run number. * * @return the run number @@ -360,35 +337,6 @@ this.updated = updated; } - /** - * Add a file associated with this run. - * <p> - * This is public because it is called by the file crawler. - * - * @param file a file associated with this run - */ - // FIXME: This should be removed from the run summary interface. - public void addFile(DatasetFileFormat format, File file) { - List<File> files = this.fileMap.get(file); - if (files == null) { - this.fileMap.put(format, new ArrayList<File>()); - } - this.fileMap.get(format).add(file); - } - - /** - * Get a list of files in the run by format (EVIO, LCIO etc.). - * - * @param format the file format - * @return the list of files with the given format - */ - public List<File> getFiles(DatasetFileFormat format) { - if (!this.fileMap.containsKey(format)) { - this.fileMap.put(format, new ArrayList<File>()); - } - return this.fileMap.get(format); - } - /** * Convert this object to a string. *