Print

Print


Author: [log in to unmask]
Date: Wed Jul 27 17:06:36 2016
New Revision: 4435

Log:
Updates to datacat and crawler modules.

Added:
    java/trunk/datacat/src/main/java/org/hps/datacat/EvioDatasetIndex.java
Modified:
    java/trunk/crawler/src/main/java/org/hps/crawler/DatacatAddFile.java
    java/trunk/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java
    java/trunk/crawler/src/main/java/org/hps/crawler/DatacatHelper.java
    java/trunk/datacat/src/main/java/org/hps/datacat/DatacatPrintRun.java
    java/trunk/datacat/src/main/java/org/hps/datacat/DatacatUtilities.java
    java/trunk/datacat/src/main/java/org/hps/datacat/FileEventRange.java
    java/trunk/run-database/src/main/java/org/hps/rundb/builder/DatacatBuilder.java

Modified: java/trunk/crawler/src/main/java/org/hps/crawler/DatacatAddFile.java
 =============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/DatacatAddFile.java	(original)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/DatacatAddFile.java	Wed Jul 27 17:06:36 2016
@@ -145,8 +145,9 @@
      */
     private void run() {        
         List<DatasetModel> datasets = DatacatHelper.createDatasets(paths, folder, site.toString());
+        DatacatUtilities util = new DatacatUtilities();
         if (!dryRun) {
-            DatacatUtilities.updateDatasets(datasets, folder, datacatUrl, patch);
+            util.updateDatasets(datasets, folder, patch);
             //LOGGER.info("Added " + datasets.size() + " datasets to datacat.");
         } else {
             LOGGER.info("Dry run is enabled; skipped adding dataset.");

Modified: java/trunk/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java
 =============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java	(original)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java	Wed Jul 27 17:06:36 2016
@@ -303,7 +303,8 @@
         if (!visitor.getFiles().isEmpty()) {
             List<DatasetModel> datasets = DatacatHelper.createDatasets(visitor.getFiles(), config.folder(), config.site().toString());
             LOGGER.info("built " + datasets.size() + " datasets");
-            DatacatUtilities.updateDatasets(datasets, config.folder(), config.datacatUrl(), false);
+            DatacatUtilities util = new DatacatUtilities(config.datacatUrl(), config.site());
+            util.updateDatasets(datasets, config.folder(), false);
             LOGGER.info("added datasets to datacat");
         } else {
             LOGGER.warning("No files were found by the crawler.");

Modified: java/trunk/crawler/src/main/java/org/hps/crawler/DatacatHelper.java
 =============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/DatacatHelper.java	(original)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/DatacatHelper.java	Wed Jul 27 17:06:36 2016
@@ -141,15 +141,15 @@
      */
     static List<DatasetModel> createDatasets(List<File> files, String folder, String site) {
         List<DatasetModel> datasets = new ArrayList<DatasetModel>();
+        DatacatUtilities util = new DatacatUtilities();
         for (File file : files) {
             Map<String, Object> metadata = createMetadata(file);
             DataType dataType = DatacatHelper.getDataType(file);
             FileFormat fileFormat = DatacatHelper.getFileFormat(file);
-            DatasetModel dataset = DatacatUtilities.createDataset(
+            DatasetModel dataset = util.createDataset(
                     file,
                     metadata,
                     folder,
-                    site,
                     dataType.toString(),
                     fileFormat.toString());
             datasets.add(dataset);

Modified: java/trunk/datacat/src/main/java/org/hps/datacat/DatacatPrintRun.java
 =============================================================================
--- java/trunk/datacat/src/main/java/org/hps/datacat/DatacatPrintRun.java	(original)
+++ java/trunk/datacat/src/main/java/org/hps/datacat/DatacatPrintRun.java	Wed Jul 27 17:06:36 2016
@@ -26,7 +26,7 @@
     
     private static void printRun(int run) throws Exception {
                 
-        DatasetResultSetModel results = DatacatUtilities.findEvioDatasets(run);
+        DatasetResultSetModel results = new DatacatUtilities().findEvioDatasets(run);
                 
         /* print results including metadata */
         for (DatasetModel dataset : results) {            

Modified: java/trunk/datacat/src/main/java/org/hps/datacat/DatacatUtilities.java
 =============================================================================
--- java/trunk/datacat/src/main/java/org/hps/datacat/DatacatUtilities.java	(original)
+++ java/trunk/datacat/src/main/java/org/hps/datacat/DatacatUtilities.java	Wed Jul 27 17:06:36 2016
@@ -23,9 +23,26 @@
  * 
  * @author jeremym
  */
-public class DatacatUtilities {
+public final class DatacatUtilities {
     
     private static final Logger LOGGER = Logger.getLogger(DatacatUtilities.class.getPackage().getName());
+    
+    private Client client;    
+    private Site site = DatacatConstants.DEFAULT_SITE;
+    
+    public DatacatUtilities(Client client, Site site) {
+        this.client = client;
+        this.site = site;
+    }
+    
+    public DatacatUtilities(String url, Site site) {
+        createClient(url);
+        this.site = site;
+    }
+    
+    public DatacatUtilities() {
+        createDefaultClient();
+    }
                   
     /**
      * Add datasets to the data catalog or patch existing ones.
@@ -35,14 +52,8 @@
      * @param url the datacat URL
      * @param patch <code>true</code> to allow patching existing datasets
      */
-    public static final void updateDatasets(List<DatasetModel> datasets, String folder, String url, boolean patch) {
+    public void updateDatasets(List<DatasetModel> datasets, String folder, boolean patch) {
         int nUpdated = 0;
-        Client client = null;
-        try {
-            client = new ClientBuilder().setUrl(url).build();
-        } catch (URISyntaxException e) {
-            throw new RuntimeException("Invalid datacat URL.", e);
-        }
         for (DatasetModel dataset : datasets) {
             try {
                 if (client.exists(folder + "/" + dataset.getName())) {
@@ -76,16 +87,14 @@
      * @param file the file on disk
      * @param metadata the metadata map 
      * @param folder the datacat folder
-     * @param site the datacat site
      * @param dataType the data type 
      * @param fileFormat the file format
      * @return the created dataset
      */
-    public static final DatasetModel createDataset(
+    public final DatasetModel createDataset(
             File file,
             Map<String, Object> metadata,
             String folder,
-            String site,
             String dataType,
             String fileFormat) {
         
@@ -101,7 +110,7 @@
             .resource(file.getPath())
             .dataType(dataType)
             .fileFormat(fileFormat)
-            .site(site)
+            .site(site.toString())
             .scanStatus("OK");
         
         // Set system metadata from the provided metadata map.
@@ -132,18 +141,23 @@
         return datasetBuilder.build();
     }
     
-    public static Client createDefaultClient() {
+    private Client createDefaultClient() {        
+        this.client = createClient(DatacatConstants.DATACAT_URL);
+        return this.client;
+    }
+    
+    private Client createClient(String url) {
+        Client client;
         try {
-            return new ClientBuilder().setUrl(DatacatConstants.DATACAT_URL).build();
+            client = new ClientBuilder().setUrl(url).build();
         } catch (URISyntaxException e) {
             throw new RuntimeException("Error initializing datacat client.", e);
         }
+        this.client = client;
+        return this.client;
     }
     
-    public static DatasetResultSetModel findEvioDatasets(Client client, String folder, Site site, String[] metadata, String[] sort, int run) {
-        if (client == null) {
-            client = createDefaultClient();
-        }
+    public DatasetResultSetModel findEvioDatasets(String folder, String[] metadata, String[] sort, int run) {        
         return client.searchForDatasets(
                 folder,
                 "current", /* dataset version */
@@ -154,11 +168,9 @@
                 );
     }
     
-    public static DatasetResultSetModel findEvioDatasets(int run) {        
+    public DatasetResultSetModel findEvioDatasets(int run) {        
         return findEvioDatasets(
-                null,
                 DatacatConstants.RAW_DATA_FOLDER,
-                DatacatConstants.DEFAULT_SITE,
                 DatacatConstants.EVIO_METADATA,
                 new String[] {"FILE"},
                 run

Added: java/trunk/datacat/src/main/java/org/hps/datacat/EvioDatasetIndex.java
 =============================================================================
--- java/trunk/datacat/src/main/java/org/hps/datacat/EvioDatasetIndex.java	(added)
+++ java/trunk/datacat/src/main/java/org/hps/datacat/EvioDatasetIndex.java	Wed Jul 27 17:06:36 2016
@@ -0,0 +1,146 @@
+package org.hps.datacat;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import org.srs.datacat.model.DatasetModel;
+import org.srs.datacat.model.DatasetResultSetModel;
+import org.srs.datacat.model.dataset.DatasetWithViewModel;
+
+/**
+ * Creates an index between an EVIO dataset and various metadata such as head timestamp range, file number, and event ID
+ * range.
+ * 
+ * @author jeremym
+ */
+public final class EvioDatasetIndex {
+
+    private DatasetResultSetModel datasets;
+    private DatacatUtilities util;
+    private Map<TimestampRange, DatasetModel> datasetTimestamps = new HashMap<TimestampRange, DatasetModel>();
+    private Map<Long, DatasetModel> datasetFileNumbers = new HashMap<Long, DatasetModel>();
+    private List<FileEventRange> eventRanges = new ArrayList<FileEventRange>();
+
+    static class TimestampRange {
+
+        private long startTimestamp;
+        private long endTimestamp;
+
+        TimestampRange(long startTimestamp, long endTimestamp) {
+            this.startTimestamp = startTimestamp;
+            this.endTimestamp = endTimestamp;
+        }
+    }
+
+    public EvioDatasetIndex(DatacatUtilities util, int run) {
+        this.util = util;
+        load(run);
+    }
+
+    public List<DatasetModel> findByTimestamp(long timestamp) {
+        List<DatasetModel> datasets = new ArrayList<DatasetModel>();
+        for (Entry<TimestampRange, DatasetModel> entry : datasetTimestamps.entrySet()) {
+            if (timestamp >= entry.getKey().startTimestamp && timestamp <= entry.getKey().endTimestamp) {
+                datasets.add(entry.getValue());
+            }
+        }
+        return datasets;
+    }
+
+    public DatasetModel findByEventRange(long eventId) {
+        return FileEventRange.findEventRange(eventRanges, eventId).getDataset();
+    }
+
+    public DatasetModel findByFileNumber(long fileNumber) {
+        return datasetFileNumbers.get(fileNumber);
+    }
+
+    public DatasetResultSetModel getDatasets() {
+        return datasets;
+    }
+
+    private void load(int run) {
+
+        datasets = util.findEvioDatasets(run);
+
+        // setup index of first and last timestamp
+        for (DatasetModel dataset : datasets) {
+            DatasetWithViewModel datasetView = (DatasetWithViewModel) dataset;
+            Map<String, Object> metadata = datasetView.getMetadataMap();
+            long firstTimestamp = (Long) metadata.get("FIRST_HEAD_TIMESTAMP");
+            long lastTimestamp = (Long) metadata.get("LAST_HEAD_TIMESTAMP");
+            datasetTimestamps.put(new TimestampRange(firstTimestamp, lastTimestamp), dataset);
+        }
+
+        // setup index by file number
+        for (DatasetModel dataset : datasets) {
+            DatasetWithViewModel datasetView = (DatasetWithViewModel) dataset;
+            Map<String, Object> metadata = datasetView.getMetadataMap();
+            long fileNumber = (Long) metadata.get("FILE");
+            this.datasetFileNumbers.put(fileNumber, dataset);
+        }
+        
+        // setup index by file number
+        this.eventRanges = FileEventRange.createEventRanges(datasets);
+    }
+   
+    // This is a test and not a command line interface!
+    public static void main(String[] args) {
+
+        DatacatUtilities util = new DatacatUtilities();
+        EvioDatasetIndex datasetIndex = new EvioDatasetIndex(util, 5772);
+        DatasetResultSetModel datasets = datasetIndex.getDatasets();
+
+        for (DatasetModel dataset : datasets) {
+
+            System.out.println("checking dataset " + dataset.getName() + " ...");
+
+            DatasetWithViewModel datasetView = (DatasetWithViewModel) dataset;
+            Map<String, Object> metadata = datasetView.getMetadataMap();
+
+            long firstTimestamp = (Long) metadata.get("FIRST_HEAD_TIMESTAMP");
+            long lastTimestamp = (Long) metadata.get("LAST_HEAD_TIMESTAMP");
+            long fileNumber = (Long) metadata.get("FILE");
+            long firstPhysicsEvent = (Long) metadata.get("FIRST_PHYSICS_EVENT");
+            long lastPhysicsEvent = (Long) metadata.get("LAST_PHYSICS_EVENT");
+            
+            System.out.println("FIRST_HEAD_TIMESTAMP = " + firstTimestamp);
+            System.out.println("LAST_HEAD_TIMESTAMP = " + lastTimestamp);
+            System.out.println("FILE = " + fileNumber);
+            System.out.println("FIRST_PHYSICS_EVENT = " + firstPhysicsEvent);
+            System.out.println("LAST_PHYSICS_EVENT = " + lastPhysicsEvent);
+                        
+            DatasetModel result = datasetIndex.findByEventRange(firstPhysicsEvent);
+            System.out.println("found " + result.getName() + " for event ID " + firstPhysicsEvent);
+
+            result = datasetIndex.findByEventRange(lastPhysicsEvent);
+            System.out.println("found " + result.getName() + " for event ID " + lastPhysicsEvent);
+
+            result = datasetIndex.findByFileNumber(fileNumber);
+            System.out.println("found " + result.getName() + " for file " + fileNumber);
+
+            List<DatasetModel> firstTimestampDatasets = datasetIndex.findByTimestamp(firstTimestamp);
+            for (DatasetModel firstTimestampDataset : firstTimestampDatasets) {
+                System.out.println("found " + firstTimestampDataset.getName() + " for timestamp = " 
+                        + firstTimestamp);
+            }
+
+            List<DatasetModel> lastTimestampDatasets = datasetIndex.findByTimestamp(lastTimestamp);
+            for (DatasetModel lastTimestampDataset : lastTimestampDatasets) {
+                System.out.println("found " + lastTimestampDataset.getName() + " for timestamp = "
+                        + lastTimestamp);
+            }
+
+            long midTimestamp = firstTimestamp + (lastTimestamp - firstTimestamp);
+            List<DatasetModel> midTimestampDatasets = datasetIndex.findByTimestamp(midTimestamp);
+            for (DatasetModel midTimestampDataset : midTimestampDatasets) {
+                System.out.println("found " + midTimestampDataset.getName() + " for timestamp = " + firstTimestamp);
+            }
+
+            System.out.println();
+        }
+    }
+}

Modified: java/trunk/datacat/src/main/java/org/hps/datacat/FileEventRange.java
 =============================================================================
--- java/trunk/datacat/src/main/java/org/hps/datacat/FileEventRange.java	(original)
+++ java/trunk/datacat/src/main/java/org/hps/datacat/FileEventRange.java	Wed Jul 27 17:06:36 2016
@@ -7,7 +7,6 @@
 import org.srs.datacat.model.DatasetModel;
 import org.srs.datacat.model.DatasetResultSetModel;
 import org.srs.datacat.model.dataset.DatasetWithViewModel;
-import org.srs.datacat.shared.DatasetLocation;
 
 /**
  * Utility class for assocating a file in the datacat to its event ID range.
@@ -18,16 +17,16 @@
     
     private long startEvent;
     private long endEvent;
-    private String path;
+    private DatasetModel dataset;
     
-    FileEventRange(long startEvent, long endEvent, String path) {
+    FileEventRange(DatasetModel dataset, long startEvent, long endEvent) {
         this.startEvent = startEvent;
         this.endEvent = endEvent;
-        this.path = path;
+        this.dataset = dataset;
     }
     
-    public String getPath() {
-        return path;
+    public DatasetModel getDataset() {
+        return dataset;
     }
     
     public long getStartEvent() {
@@ -49,13 +48,12 @@
             Map<String, Object> metadata = view.getMetadataMap();
             long firstPhysicsEvent = (Long) metadata.get("FIRST_PHYSICS_EVENT");
             long lastPhysicsEvent = (Long) metadata.get("LAST_PHYSICS_EVENT");
-            DatasetLocation loc = (DatasetLocation) view.getViewInfo().getLocations().iterator().next();
-            ranges.add(new FileEventRange(firstPhysicsEvent, lastPhysicsEvent, loc.getPath()));
+            ranges.add(new FileEventRange(ds, firstPhysicsEvent, lastPhysicsEvent));
         }
         return ranges;
     }
     
-    public static FileEventRange findEventRage(List<FileEventRange> ranges, long eventId) {
+    public static FileEventRange findEventRange(List<FileEventRange> ranges, long eventId) {
         FileEventRange match = null;
         for (FileEventRange range : ranges) {
             if (range.matches(eventId)) {

Modified: java/trunk/run-database/src/main/java/org/hps/rundb/builder/DatacatBuilder.java
 =============================================================================
--- java/trunk/run-database/src/main/java/org/hps/rundb/builder/DatacatBuilder.java	(original)
+++ java/trunk/run-database/src/main/java/org/hps/rundb/builder/DatacatBuilder.java	Wed Jul 27 17:06:36 2016
@@ -199,9 +199,8 @@
         
         LOGGER.info("finding EVIO datasets for run " + getRun() + " in " + this.folder + " at " + this.site + " ...");
         
-        DatasetResultSetModel results = DatacatUtilities.findEvioDatasets(datacatClient, this.folder, this.site, 
-                METADATA_FIELDS, new String[] {"FILE"}, getRun());
-                
+        DatacatUtilities util = new DatacatUtilities(datacatClient, this.site);        
+        DatasetResultSetModel results = util.findEvioDatasets(this.folder, METADATA_FIELDS, new String[] {"FILE"}, getRun());
         LOGGER.info("found " + results.getResults().size() + " EVIO datasets for run " + getRun());
                                
         return results;