Print

Print


Author: [log in to unmask]
Date: Mon Sep 28 11:40:33 2015
New Revision: 3729

Log:
Cleanup crawler and run-database classes.

Removed:
    java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerFileVisitor.java
Modified:
    java/trunk/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java
    java/trunk/run-database/src/main/java/org/hps/run/database/RunDatabaseCommandLine.java
    java/trunk/run-database/src/main/java/org/hps/run/database/RunProcessor.java
    java/trunk/run-database/src/main/java/org/hps/run/database/RunSummary.java
    java/trunk/run-database/src/main/java/org/hps/run/database/RunSummaryDaoImpl.java
    java/trunk/run-database/src/main/java/org/hps/run/database/RunSummaryImpl.java

Modified: java/trunk/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java
 =============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java	(original)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java	Mon Sep 28 11:40:33 2015
@@ -13,8 +13,10 @@
 import java.util.Date;
 import java.util.EnumSet;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import java.util.logging.Level;
 import java.util.logging.Logger;
 
@@ -142,6 +144,7 @@
         OPTIONS.addOption("h", "help", false, "print help and exit (overrides all other arguments)");
         OPTIONS.addOption("o", "format", true, "add a file format for filtering: " + AVAILABLE_FORMATS);
         OPTIONS.addOption("m", "metadata", false, "create metadata for datasets");
+        OPTIONS.addOption("r", "run", true, "add a run number to accept");
         OPTIONS.addOption("s", "site", true, "datacat site");
         OPTIONS.addOption("t", "timestamp-file", true, "existing or new timestamp file name");
         OPTIONS.addOption("x", "max-depth", true, "max depth to crawl");
@@ -304,6 +307,15 @@
                 throw new RuntimeException("The -f argument with the datacat folder is required.");
             }
 
+            // List of run numbers.
+            if (cl.hasOption("r")) {
+                Set<Integer> acceptRuns = new HashSet<Integer>();
+                for (String arg : cl.getOptionValues("r")) {
+                    acceptRuns.add(Integer.parseInt(arg));                
+                }
+                config.setAcceptRuns(acceptRuns);
+            }
+
         } catch (final ParseException e) {
             throw new RuntimeException("Error parsing options.", e);
         }
@@ -349,6 +361,11 @@
             LOGGER.info("adding file format filter for " + fileFormat.name());
         }
         visitor.addFilter(new FileFormatFilter(config.getFileFormats()));
+
+        // Run number filter.
+        if (!config.acceptRuns().isEmpty()) {
+            visitor.addFilter(new RunFilter(config.acceptRuns()));
+        }
 
         // Walk the file tree using the visitor.
         this.walk(visitor);
@@ -388,21 +405,6 @@
      * @param visitor the file visitor
      */
     private void walk(final DatacatFileVisitor visitor) {
-        if (config.timestamp() != null) {
-            // Date filter from timestamp.
-            visitor.addFilter(new DateFileFilter(config.timestamp()));
-            LOGGER.config("added date filter with time stamp " + config.timestamp());
-        }
-
-        // Is the accept run list not empty? (Empty means accept all runs.)
-        if (!config.acceptRuns().isEmpty()) {
-            // List of run numbers to accept.
-            visitor.addFilter(new RunFilter(config.acceptRuns()));
-            LOGGER.config("added run number filter");
-        } else {
-            LOGGER.config("no run number filter will be used");
-        }
-
         try {
             // Walk the file tree from the root directory.
             final EnumSet<FileVisitOption> options = EnumSet.noneOf(FileVisitOption.class);

Modified: java/trunk/run-database/src/main/java/org/hps/run/database/RunDatabaseCommandLine.java
 =============================================================================
--- java/trunk/run-database/src/main/java/org/hps/run/database/RunDatabaseCommandLine.java	(original)
+++ java/trunk/run-database/src/main/java/org/hps/run/database/RunDatabaseCommandLine.java	Mon Sep 28 11:40:33 2015
@@ -3,8 +3,11 @@
 import java.io.File;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Date;
+import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
+import java.util.Map;
 import java.util.Set;
 import java.util.logging.Level;
 import java.util.logging.Logger;
@@ -18,6 +21,7 @@
 import org.hps.datacat.client.DatacatClient;
 import org.hps.datacat.client.DatacatClientFactory;
 import org.hps.datacat.client.Dataset;
+import org.hps.datacat.client.DatasetMetadata;
 import org.hps.record.evio.EvioFileUtilities;
 import org.lcsim.util.log.DefaultLogFormatter;
 import org.lcsim.util.log.LogUtil;
@@ -169,18 +173,32 @@
         int run = runManager.getRun();
         
         // Get the list of EVIO files for the run using a data catalog query.
-        List<File> files = getEvioFiles(run);        
+        Map<File, Dataset> fileDatasets = getEvioFiles(run);   
+        List<File> files = new ArrayList<File>(fileDatasets.keySet());     
         EvioFileUtilities.sortBySequence(files);
         
         // Process the run's files to get information.
         RunSummaryImpl runSummary = new RunSummaryImpl(run);        
-        RunProcessor runProcessor = this.createEvioRunProcessor(runSummary);
+        RunProcessor runProcessor = this.createEvioRunProcessor(runSummary, files);
         try {
             runProcessor.processRun();
         } catch (Exception e) {
             throw new RuntimeException(e);
         }
 
+        // Set start date.
+        Dataset firstDataset = fileDatasets.get(files.get(0));
+        DatasetMetadata metadata = firstDataset.getMetadata();
+        Date startDate = new Date(metadata.getInteger("startTimestamp"));
+        runSummary.setStartDate(startDate);
+
+        // Set end date.
+        Dataset lastDataset = fileDatasets.get(files.get(files.size() - 1));
+        metadata = lastDataset.getMetadata();
+        Date endDate = new Date(metadata.getInteger("endTimestamp"));
+        runSummary.setEndDate(endDate);
+        runSummary.setEndOkay(metadata.getInteger("hasEnd") == 0 ? false : true);
+
         // Delete existing run.
         if (runExists) {
             runManager.deleteRun();
@@ -233,9 +251,9 @@
      *
      * @return the run processor
      */
-    private RunProcessor createEvioRunProcessor(final RunSummaryImpl runSummary) {
-
-        final RunProcessor runProcessor = new RunProcessor(runSummary);
+    private RunProcessor createEvioRunProcessor(final RunSummaryImpl runSummary, List<File> files) {
+
+        final RunProcessor runProcessor = new RunProcessor(runSummary, files);
 
         if (features.contains(Feature.EPICS)) {
             runProcessor.addEpicsProcessor();
@@ -256,16 +274,23 @@
      * @param run the run number
      * @return the list of EVIO files from the run
      */
-    private List<File> getEvioFiles(int run) {
+    private Map<File, Dataset> getEvioFiles(int run) {
         DatacatClient datacatClient = new DatacatClientFactory().createClient();
         Set<String> metadata = new HashSet<String>();
+        Map<File, Dataset> files = new HashMap<File, Dataset>();
+        metadata.add("runMin");
+        metadata.add("eventCount");
+        metadata.add("fileNumber");
+        metadata.add("endTimestamp");
+        metadata.add("startTimestamp");
+        metadata.add("hasEnd");
+        metadata.add("hasPrestart");
         List<Dataset> datasets = datacatClient.findDatasets("data/raw", "fileFormat eq 'EVIO' AND dataType eq 'RAW' AND runMin eq " + run, metadata);
         if (datasets.isEmpty()) {
             throw new IllegalStateException("No EVIO datasets for run " + run + " were found in the data catalog.");
         }
-        List<File> files = new ArrayList<File>();
         for (Dataset dataset : datasets) {
-            files.add(new File(dataset.getLocations().get(0).getResource()));
+            files.put(new File(dataset.getLocations().get(0).getResource()), dataset);
         }
         return files;
     }

Modified: java/trunk/run-database/src/main/java/org/hps/run/database/RunProcessor.java
 =============================================================================
--- java/trunk/run-database/src/main/java/org/hps/run/database/RunProcessor.java	(original)
+++ java/trunk/run-database/src/main/java/org/hps/run/database/RunProcessor.java	Mon Sep 28 11:40:33 2015
@@ -58,40 +58,38 @@
     private TiTimeOffsetEvioProcessor triggerTimeProcessor;
     
     /**
-     * Record loop adapter for getting file metadata.
-     */
-    private final EvioFileMetadataAdapter metadataAdapter = new EvioFileMetadataAdapter();
-    
-    /**
      * The run summary for the run.
      */
     private RunSummaryImpl runSummary;
-    
+
+    /**
+     * List of EVIO files in the run. 
+     */
+    private List<File> evioFiles;
+
     /**
      * Create a run processor.
      *
      * @param runSummary the run summary object for the run
      * @return the run processor
      */
-    public RunProcessor(RunSummaryImpl runSummary) {
-        
-        this.runSummary = runSummary;
-        
-        List<File> evioFiles = runSummary.getFiles(DatasetFileFormat.EVIO);
-        if (evioFiles == null || evioFiles.isEmpty()) {
+    public RunProcessor(RunSummaryImpl runSummary, List<File> evioFiles) {
+        if (runSummary == null) {
+            throw new IllegalArgumentException("The run summary is null.");
+        }
+        if (evioFiles == null) {
+            throw new IllegalArgumentException("The evio file list is null.");
+        }
+        if (evioFiles.isEmpty()) {
             throw new IllegalArgumentException("No EVIO files found in file set.");
         }
 
-        // Sort the list of EVIO files.
-        Collections.sort(runSummary.getFiles(DatasetFileFormat.EVIO), new EvioFileSequenceComparator());
+        this.runSummary = runSummary;
+        this.evioFiles = evioFiles;
 
         // Setup record loop.
         evioFileSource = new EvioFileSource(evioFiles);
         evioLoop.setEvioFileSource(evioFileSource);       
-        
-        // Add file metadata processor.
-        evioLoop.addRecordListener(metadataAdapter);
-        evioLoop.addLoopListener(metadataAdapter);
     }
     
     public void addEpicsProcessor() {
@@ -114,68 +112,20 @@
     }
 
     /**
-     * Extract meta data from first file in run.
-     */
-    private void processFirstFile() {
-        final EvioFileMetadata metadata = metadataAdapter.getEvioFileMetadata().get(0);
-        if (metadata == null) {
-            throw new IllegalStateException("No meta data exists for first file.");
-        }
-        LOGGER.info("first file metadata: " + metadata.toString());
-        if (metadata.getStartDate() == null) {
-            throw new IllegalStateException("The start date is not set in the metadata.");
-        }
-        LOGGER.info("setting unix start time to " + metadata.getStartDate().getTime() + " from meta data");
-        runSummary.setStartDate(metadata.getStartDate());
-    }
-
-    /**
-     * Extract meta data from last file in run.
-     */
-    private void processLastFile() {
-        LOGGER.info("looking for " + runSummary.getEvioFiles().get(runSummary.getEvioFiles().size() - 1).getPath() + " metadata");
-        LOGGER.getHandlers()[0].flush();
-        final EvioFileMetadata metadata = this.metadataAdapter.getEvioFileMetadata().get(this.metadataAdapter.getEvioFileMetadata().size() - 1);
-        if (metadata == null) {
-            throw new IllegalStateException("Failed to find metadata for last file.");
-        }
-        LOGGER.info("last file metadata: " + metadata.toString());
-        if (metadata.getEndDate() == null) {
-            throw new IllegalStateException("The end date is not set in the metadata.");
-        }
-        LOGGER.info("setting unix end time to " + metadata.getEndDate().getTime() + " from meta data");
-        runSummary.setEndDate(metadata.getEndDate());
-        LOGGER.info("setting has END to " + metadata.hasEnd());
-        runSummary.setEndOkay(metadata.hasEnd());
-    }
-
-    /**
-     * Process the run by executing the registered {@link org.hps.record.evio.EvioEventProcessor}s and extracting the
-     * start and end dates.
-     * <p>
-     * This method will also execute file caching from MSS, if enabled by the {@link #useFileCache} option.
+     * Process the run by executing the registered {@link org.hps.record.evio.EvioEventProcessor}s
      *
      * @throws Exception if there is an error processing a file
      */
     public void processRun() throws Exception {
 
-        LOGGER.info("processing " + this.runSummary.getEvioFiles().size() + " files from run "
+        LOGGER.info("processing " + this.evioFiles.size() + " files from run "
                 + this.runSummary.getRun());
 
         // Run processors over all files.
         LOGGER.info("looping over all events");
         evioLoop.loop(-1);
                 
-        LOGGER.info("got " + metadataAdapter.getEvioFileMetadata().size() + " metadata objects from loop");
         LOGGER.getHandlers()[0].flush();
-
-        // Set start date from first file.
-        LOGGER.info("processing first file");
-        this.processFirstFile();
-
-        // Set end date from last file.
-        LOGGER.info("processing last file");
-        this.processLastFile();
 
         // Update run summary from processors.
         LOGGER.info("updating run summary");
@@ -216,13 +166,4 @@
 
         LOGGER.getHandlers()[0].flush();
     }        
-    
-    /**
-     * Get list of metadata created by processing the files.
-     * 
-     * @return the list of metadata
-     */
-    public List<EvioFileMetadata> getEvioFileMetaData() {
-        return this.metadataAdapter.getEvioFileMetadata();
-    }
 }

Modified: java/trunk/run-database/src/main/java/org/hps/run/database/RunSummary.java
 =============================================================================
--- java/trunk/run-database/src/main/java/org/hps/run/database/RunSummary.java	(original)
+++ java/trunk/run-database/src/main/java/org/hps/run/database/RunSummary.java	Mon Sep 28 11:40:33 2015
@@ -73,13 +73,6 @@
     double getEventRate();
 
     /**
-     * Get the list of EVIO files in this run.
-     *
-     * @return the list of EVIO files in this run
-     */
-    List<File> getEvioFiles();
-
-    /**
      * Get the run number.
      *
      * @return the run number
@@ -141,13 +134,4 @@
      * @return the date when this run record was last updated
      */
     Date getUpdated();
-    
-    /**
-     * Get a list of files in the run by format (EVIO, LCIO etc.).
-     * 
-     * @param format the file format
-     * @return the list of files with the given format
-     */
-    // FIXME: This should be removed from the run summary interface.
-    public List<File> getFiles(DatasetFileFormat format);
 }

Modified: java/trunk/run-database/src/main/java/org/hps/run/database/RunSummaryDaoImpl.java
 =============================================================================
--- java/trunk/run-database/src/main/java/org/hps/run/database/RunSummaryDaoImpl.java	(original)
+++ java/trunk/run-database/src/main/java/org/hps/run/database/RunSummaryDaoImpl.java	Mon Sep 28 11:40:33 2015
@@ -415,7 +415,7 @@
             preparedStatement.setTimestamp(2, new java.sql.Timestamp(runSummary.getStartDate().getTime()), CALENDAR);
             preparedStatement.setTimestamp(3, new java.sql.Timestamp(runSummary.getEndDate().getTime()), CALENDAR);
             preparedStatement.setInt(4, runSummary.getTotalEvents());
-            preparedStatement.setInt(5, runSummary.getEvioFiles().size());
+            preparedStatement.setInt(5, runSummary.getTotalFiles());
             preparedStatement.setBoolean(6, runSummary.getEndOkay());
             preparedStatement.executeUpdate();
         } catch (final SQLException e) {
@@ -498,7 +498,7 @@
             preparedStatement.setTimestamp(1, new java.sql.Timestamp(runSummary.getStartDate().getTime()), CALENDAR);
             preparedStatement.setTimestamp(2, new java.sql.Timestamp(runSummary.getEndDate().getTime()), CALENDAR);
             preparedStatement.setInt(3, runSummary.getTotalEvents());
-            preparedStatement.setInt(4, runSummary.getEvioFiles().size());
+            preparedStatement.setInt(4, runSummary.getTotalFiles());
             preparedStatement.setBoolean(5, runSummary.getEndOkay());
             preparedStatement.setBoolean(6, runSummary.getRunOkay());
             preparedStatement.setInt(7, runSummary.getRun());

Modified: java/trunk/run-database/src/main/java/org/hps/run/database/RunSummaryImpl.java
 =============================================================================
--- java/trunk/run-database/src/main/java/org/hps/run/database/RunSummaryImpl.java	(original)
+++ java/trunk/run-database/src/main/java/org/hps/run/database/RunSummaryImpl.java	Mon Sep 28 11:40:33 2015
@@ -97,26 +97,12 @@
     private Date updated;
     
     /**
-     * Lists of files indexed by their format.
-     */
-    private Map<DatasetFileFormat, List<File>> fileMap = new HashMap<DatasetFileFormat, List<File>>();
-
-    /**
      * Create a run summary.
      *
      * @param run the run number
      */
     public RunSummaryImpl(final int run) {
         this.run = run;
-    }
-
-    /**
-     * Add an EVIO file from this run to the list.
-     *
-     * @param file the file to add
-     */
-    public void addEvioFile(final File file) {
-        this.getEvioFiles().add(file);
     }
 
     /**
@@ -169,15 +155,6 @@
     }
 
     /**
-     * Get the list of EVIO files in this run.
-     *
-     * @return the list of EVIO files in this run
-     */
-    public List<File> getEvioFiles() {
-        return this.fileMap.get(DatasetFileFormat.EVIO);
-    }
-
-    /**
      * Get the run number.
      *
      * @return the run number
@@ -360,35 +337,6 @@
         this.updated = updated;
     }
     
-    /**
-     * Add a file associated with this run.
-     * <p>
-     * This is public because it is called by the file crawler.
-     * 
-     * @param file a file associated with this run
-     */
-    // FIXME: This should be removed from the run summary interface.
-    public void addFile(DatasetFileFormat format, File file) {
-        List<File> files = this.fileMap.get(file);
-        if (files == null) {
-            this.fileMap.put(format, new ArrayList<File>());
-        }
-        this.fileMap.get(format).add(file);
-    }
-    
-    /**
-     * Get a list of files in the run by format (EVIO, LCIO etc.).
-     * 
-     * @param format the file format
-     * @return the list of files with the given format
-     */
-    public List<File> getFiles(DatasetFileFormat format) {
-        if (!this.fileMap.containsKey(format)) {
-            this.fileMap.put(format, new ArrayList<File>());
-        }
-        return this.fileMap.get(format);
-    }
-
     /**
      * Convert this object to a string.
      *