LISTSERV mailing list manager LISTSERV 16.5

Help for HPS-SVN Archives


HPS-SVN Archives

HPS-SVN Archives


HPS-SVN@LISTSERV.SLAC.STANFORD.EDU


View:

Message:

[

First

|

Previous

|

Next

|

Last

]

By Topic:

[

First

|

Previous

|

Next

|

Last

]

By Author:

[

First

|

Previous

|

Next

|

Last

]

Font:

Proportional Font

LISTSERV Archives

LISTSERV Archives

HPS-SVN Home

HPS-SVN Home

HPS-SVN  July 2016

HPS-SVN July 2016

Subject:

r4435 - in /java/trunk: crawler/src/main/java/org/hps/crawler/ datacat/src/main/java/org/hps/datacat/ run-database/src/main/java/org/hps/rundb/builder/

From:

[log in to unmask]

Reply-To:

Notification of commits to the hps svn repository <[log in to unmask]>

Date:

Thu, 28 Jul 2016 00:06:41 -0000

Content-Type:

text/plain

Parts/Attachments:

Parts/Attachments

text/plain (425 lines)

Author: [log in to unmask]
Date: Wed Jul 27 17:06:36 2016
New Revision: 4435

Log:
Updates to datacat and crawler modules.

Added:
    java/trunk/datacat/src/main/java/org/hps/datacat/EvioDatasetIndex.java
Modified:
    java/trunk/crawler/src/main/java/org/hps/crawler/DatacatAddFile.java
    java/trunk/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java
    java/trunk/crawler/src/main/java/org/hps/crawler/DatacatHelper.java
    java/trunk/datacat/src/main/java/org/hps/datacat/DatacatPrintRun.java
    java/trunk/datacat/src/main/java/org/hps/datacat/DatacatUtilities.java
    java/trunk/datacat/src/main/java/org/hps/datacat/FileEventRange.java
    java/trunk/run-database/src/main/java/org/hps/rundb/builder/DatacatBuilder.java

Modified: java/trunk/crawler/src/main/java/org/hps/crawler/DatacatAddFile.java
 =============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/DatacatAddFile.java	(original)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/DatacatAddFile.java	Wed Jul 27 17:06:36 2016
@@ -145,8 +145,9 @@
      */
     private void run() {        
         List<DatasetModel> datasets = DatacatHelper.createDatasets(paths, folder, site.toString());
+        DatacatUtilities util = new DatacatUtilities();
         if (!dryRun) {
-            DatacatUtilities.updateDatasets(datasets, folder, datacatUrl, patch);
+            util.updateDatasets(datasets, folder, patch);
             //LOGGER.info("Added " + datasets.size() + " datasets to datacat.");
         } else {
             LOGGER.info("Dry run is enabled; skipped adding dataset.");

Modified: java/trunk/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java
 =============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java	(original)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java	Wed Jul 27 17:06:36 2016
@@ -303,7 +303,8 @@
         if (!visitor.getFiles().isEmpty()) {
             List<DatasetModel> datasets = DatacatHelper.createDatasets(visitor.getFiles(), config.folder(), config.site().toString());
             LOGGER.info("built " + datasets.size() + " datasets");
-            DatacatUtilities.updateDatasets(datasets, config.folder(), config.datacatUrl(), false);
+            DatacatUtilities util = new DatacatUtilities(config.datacatUrl(), config.site());
+            util.updateDatasets(datasets, config.folder(), false);
             LOGGER.info("added datasets to datacat");
         } else {
             LOGGER.warning("No files were found by the crawler.");

Modified: java/trunk/crawler/src/main/java/org/hps/crawler/DatacatHelper.java
 =============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/DatacatHelper.java	(original)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/DatacatHelper.java	Wed Jul 27 17:06:36 2016
@@ -141,15 +141,15 @@
      */
     static List<DatasetModel> createDatasets(List<File> files, String folder, String site) {
         List<DatasetModel> datasets = new ArrayList<DatasetModel>();
+        DatacatUtilities util = new DatacatUtilities();
         for (File file : files) {
             Map<String, Object> metadata = createMetadata(file);
             DataType dataType = DatacatHelper.getDataType(file);
             FileFormat fileFormat = DatacatHelper.getFileFormat(file);
-            DatasetModel dataset = DatacatUtilities.createDataset(
+            DatasetModel dataset = util.createDataset(
                     file,
                     metadata,
                     folder,
-                    site,
                     dataType.toString(),
                     fileFormat.toString());
             datasets.add(dataset);

Modified: java/trunk/datacat/src/main/java/org/hps/datacat/DatacatPrintRun.java
 =============================================================================
--- java/trunk/datacat/src/main/java/org/hps/datacat/DatacatPrintRun.java	(original)
+++ java/trunk/datacat/src/main/java/org/hps/datacat/DatacatPrintRun.java	Wed Jul 27 17:06:36 2016
@@ -26,7 +26,7 @@
     
     private static void printRun(int run) throws Exception {
                 
-        DatasetResultSetModel results = DatacatUtilities.findEvioDatasets(run);
+        DatasetResultSetModel results = new DatacatUtilities().findEvioDatasets(run);
                 
         /* print results including metadata */
         for (DatasetModel dataset : results) {            

Modified: java/trunk/datacat/src/main/java/org/hps/datacat/DatacatUtilities.java
 =============================================================================
--- java/trunk/datacat/src/main/java/org/hps/datacat/DatacatUtilities.java	(original)
+++ java/trunk/datacat/src/main/java/org/hps/datacat/DatacatUtilities.java	Wed Jul 27 17:06:36 2016
@@ -23,9 +23,26 @@
  * 
  * @author jeremym
  */
-public class DatacatUtilities {
+public final class DatacatUtilities {
     
     private static final Logger LOGGER = Logger.getLogger(DatacatUtilities.class.getPackage().getName());
+    
+    private Client client;    
+    private Site site = DatacatConstants.DEFAULT_SITE;
+    
+    public DatacatUtilities(Client client, Site site) {
+        this.client = client;
+        this.site = site;
+    }
+    
+    public DatacatUtilities(String url, Site site) {
+        createClient(url);
+        this.site = site;
+    }
+    
+    public DatacatUtilities() {
+        createDefaultClient();
+    }
                   
     /**
      * Add datasets to the data catalog or patch existing ones.
@@ -35,14 +52,8 @@
      * @param url the datacat URL
      * @param patch <code>true</code> to allow patching existing datasets
      */
-    public static final void updateDatasets(List<DatasetModel> datasets, String folder, String url, boolean patch) {
+    public void updateDatasets(List<DatasetModel> datasets, String folder, boolean patch) {
         int nUpdated = 0;
-        Client client = null;
-        try {
-            client = new ClientBuilder().setUrl(url).build();
-        } catch (URISyntaxException e) {
-            throw new RuntimeException("Invalid datacat URL.", e);
-        }
         for (DatasetModel dataset : datasets) {
             try {
                 if (client.exists(folder + "/" + dataset.getName())) {
@@ -76,16 +87,14 @@
      * @param file the file on disk
      * @param metadata the metadata map 
      * @param folder the datacat folder
-     * @param site the datacat site
      * @param dataType the data type 
      * @param fileFormat the file format
      * @return the created dataset
      */
-    public static final DatasetModel createDataset(
+    public final DatasetModel createDataset(
             File file,
             Map<String, Object> metadata,
             String folder,
-            String site,
             String dataType,
             String fileFormat) {
         
@@ -101,7 +110,7 @@
             .resource(file.getPath())
             .dataType(dataType)
             .fileFormat(fileFormat)
-            .site(site)
+            .site(site.toString())
             .scanStatus("OK");
         
         // Set system metadata from the provided metadata map.
@@ -132,18 +141,23 @@
         return datasetBuilder.build();
     }
     
-    public static Client createDefaultClient() {
+    private Client createDefaultClient() {        
+        this.client = createClient(DatacatConstants.DATACAT_URL);
+        return this.client;
+    }
+    
+    private Client createClient(String url) {
+        Client client;
         try {
-            return new ClientBuilder().setUrl(DatacatConstants.DATACAT_URL).build();
+            client = new ClientBuilder().setUrl(url).build();
         } catch (URISyntaxException e) {
             throw new RuntimeException("Error initializing datacat client.", e);
         }
+        this.client = client;
+        return this.client;
     }
     
-    public static DatasetResultSetModel findEvioDatasets(Client client, String folder, Site site, String[] metadata, String[] sort, int run) {
-        if (client == null) {
-            client = createDefaultClient();
-        }
+    public DatasetResultSetModel findEvioDatasets(String folder, String[] metadata, String[] sort, int run) {        
         return client.searchForDatasets(
                 folder,
                 "current", /* dataset version */
@@ -154,11 +168,9 @@
                 );
     }
     
-    public static DatasetResultSetModel findEvioDatasets(int run) {        
+    public DatasetResultSetModel findEvioDatasets(int run) {        
         return findEvioDatasets(
-                null,
                 DatacatConstants.RAW_DATA_FOLDER,
-                DatacatConstants.DEFAULT_SITE,
                 DatacatConstants.EVIO_METADATA,
                 new String[] {"FILE"},
                 run

Added: java/trunk/datacat/src/main/java/org/hps/datacat/EvioDatasetIndex.java
 =============================================================================
--- java/trunk/datacat/src/main/java/org/hps/datacat/EvioDatasetIndex.java	(added)
+++ java/trunk/datacat/src/main/java/org/hps/datacat/EvioDatasetIndex.java	Wed Jul 27 17:06:36 2016
@@ -0,0 +1,146 @@
+package org.hps.datacat;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import org.srs.datacat.model.DatasetModel;
+import org.srs.datacat.model.DatasetResultSetModel;
+import org.srs.datacat.model.dataset.DatasetWithViewModel;
+
+/**
+ * Creates an index between an EVIO dataset and various metadata such as head timestamp range, file number, and event ID
+ * range.
+ * 
+ * @author jeremym
+ */
+public final class EvioDatasetIndex {
+
+    private DatasetResultSetModel datasets;
+    private DatacatUtilities util;
+    private Map<TimestampRange, DatasetModel> datasetTimestamps = new HashMap<TimestampRange, DatasetModel>();
+    private Map<Long, DatasetModel> datasetFileNumbers = new HashMap<Long, DatasetModel>();
+    private List<FileEventRange> eventRanges = new ArrayList<FileEventRange>();
+
+    static class TimestampRange {
+
+        private long startTimestamp;
+        private long endTimestamp;
+
+        TimestampRange(long startTimestamp, long endTimestamp) {
+            this.startTimestamp = startTimestamp;
+            this.endTimestamp = endTimestamp;
+        }
+    }
+
+    public EvioDatasetIndex(DatacatUtilities util, int run) {
+        this.util = util;
+        load(run);
+    }
+
+    public List<DatasetModel> findByTimestamp(long timestamp) {
+        List<DatasetModel> datasets = new ArrayList<DatasetModel>();
+        for (Entry<TimestampRange, DatasetModel> entry : datasetTimestamps.entrySet()) {
+            if (timestamp >= entry.getKey().startTimestamp && timestamp <= entry.getKey().endTimestamp) {
+                datasets.add(entry.getValue());
+            }
+        }
+        return datasets;
+    }
+
+    public DatasetModel findByEventRange(long eventId) {
+        return FileEventRange.findEventRange(eventRanges, eventId).getDataset();
+    }
+
+    public DatasetModel findByFileNumber(long fileNumber) {
+        return datasetFileNumbers.get(fileNumber);
+    }
+
+    public DatasetResultSetModel getDatasets() {
+        return datasets;
+    }
+
+    private void load(int run) {
+
+        datasets = util.findEvioDatasets(run);
+
+        // setup index of first and last timestamp
+        for (DatasetModel dataset : datasets) {
+            DatasetWithViewModel datasetView = (DatasetWithViewModel) dataset;
+            Map<String, Object> metadata = datasetView.getMetadataMap();
+            long firstTimestamp = (Long) metadata.get("FIRST_HEAD_TIMESTAMP");
+            long lastTimestamp = (Long) metadata.get("LAST_HEAD_TIMESTAMP");
+            datasetTimestamps.put(new TimestampRange(firstTimestamp, lastTimestamp), dataset);
+        }
+
+        // setup index by file number
+        for (DatasetModel dataset : datasets) {
+            DatasetWithViewModel datasetView = (DatasetWithViewModel) dataset;
+            Map<String, Object> metadata = datasetView.getMetadataMap();
+            long fileNumber = (Long) metadata.get("FILE");
+            this.datasetFileNumbers.put(fileNumber, dataset);
+        }
+        
+        // setup index by file number
+        this.eventRanges = FileEventRange.createEventRanges(datasets);
+    }
+   
+    // This is a test and not a command line interface!
+    public static void main(String[] args) {
+
+        DatacatUtilities util = new DatacatUtilities();
+        EvioDatasetIndex datasetIndex = new EvioDatasetIndex(util, 5772);
+        DatasetResultSetModel datasets = datasetIndex.getDatasets();
+
+        for (DatasetModel dataset : datasets) {
+
+            System.out.println("checking dataset " + dataset.getName() + " ...");
+
+            DatasetWithViewModel datasetView = (DatasetWithViewModel) dataset;
+            Map<String, Object> metadata = datasetView.getMetadataMap();
+
+            long firstTimestamp = (Long) metadata.get("FIRST_HEAD_TIMESTAMP");
+            long lastTimestamp = (Long) metadata.get("LAST_HEAD_TIMESTAMP");
+            long fileNumber = (Long) metadata.get("FILE");
+            long firstPhysicsEvent = (Long) metadata.get("FIRST_PHYSICS_EVENT");
+            long lastPhysicsEvent = (Long) metadata.get("LAST_PHYSICS_EVENT");
+            
+            System.out.println("FIRST_HEAD_TIMESTAMP = " + firstTimestamp);
+            System.out.println("LAST_HEAD_TIMESTAMP = " + lastTimestamp);
+            System.out.println("FILE = " + fileNumber);
+            System.out.println("FIRST_PHYSICS_EVENT = " + firstPhysicsEvent);
+            System.out.println("LAST_PHYSICS_EVENT = " + lastPhysicsEvent);
+                        
+            DatasetModel result = datasetIndex.findByEventRange(firstPhysicsEvent);
+            System.out.println("found " + result.getName() + " for event ID " + firstPhysicsEvent);
+
+            result = datasetIndex.findByEventRange(lastPhysicsEvent);
+            System.out.println("found " + result.getName() + " for event ID " + lastPhysicsEvent);
+
+            result = datasetIndex.findByFileNumber(fileNumber);
+            System.out.println("found " + result.getName() + " for file " + fileNumber);
+
+            List<DatasetModel> firstTimestampDatasets = datasetIndex.findByTimestamp(firstTimestamp);
+            for (DatasetModel firstTimestampDataset : firstTimestampDatasets) {
+                System.out.println("found " + firstTimestampDataset.getName() + " for timestamp = " 
+                        + firstTimestamp);
+            }
+
+            List<DatasetModel> lastTimestampDatasets = datasetIndex.findByTimestamp(lastTimestamp);
+            for (DatasetModel lastTimestampDataset : lastTimestampDatasets) {
+                System.out.println("found " + lastTimestampDataset.getName() + " for timestamp = "
+                        + lastTimestamp);
+            }
+
+            long midTimestamp = firstTimestamp + (lastTimestamp - firstTimestamp);
+            List<DatasetModel> midTimestampDatasets = datasetIndex.findByTimestamp(midTimestamp);
+            for (DatasetModel midTimestampDataset : midTimestampDatasets) {
+                System.out.println("found " + midTimestampDataset.getName() + " for timestamp = " + firstTimestamp);
+            }
+
+            System.out.println();
+        }
+    }
+}

Modified: java/trunk/datacat/src/main/java/org/hps/datacat/FileEventRange.java
 =============================================================================
--- java/trunk/datacat/src/main/java/org/hps/datacat/FileEventRange.java	(original)
+++ java/trunk/datacat/src/main/java/org/hps/datacat/FileEventRange.java	Wed Jul 27 17:06:36 2016
@@ -7,7 +7,6 @@
 import org.srs.datacat.model.DatasetModel;
 import org.srs.datacat.model.DatasetResultSetModel;
 import org.srs.datacat.model.dataset.DatasetWithViewModel;
-import org.srs.datacat.shared.DatasetLocation;
 
 /**
  * Utility class for assocating a file in the datacat to its event ID range.
@@ -18,16 +17,16 @@
     
     private long startEvent;
     private long endEvent;
-    private String path;
+    private DatasetModel dataset;
     
-    FileEventRange(long startEvent, long endEvent, String path) {
+    FileEventRange(DatasetModel dataset, long startEvent, long endEvent) {
         this.startEvent = startEvent;
         this.endEvent = endEvent;
-        this.path = path;
+        this.dataset = dataset;
     }
     
-    public String getPath() {
-        return path;
+    public DatasetModel getDataset() {
+        return dataset;
     }
     
     public long getStartEvent() {
@@ -49,13 +48,12 @@
             Map<String, Object> metadata = view.getMetadataMap();
             long firstPhysicsEvent = (Long) metadata.get("FIRST_PHYSICS_EVENT");
             long lastPhysicsEvent = (Long) metadata.get("LAST_PHYSICS_EVENT");
-            DatasetLocation loc = (DatasetLocation) view.getViewInfo().getLocations().iterator().next();
-            ranges.add(new FileEventRange(firstPhysicsEvent, lastPhysicsEvent, loc.getPath()));
+            ranges.add(new FileEventRange(ds, firstPhysicsEvent, lastPhysicsEvent));
         }
         return ranges;
     }
     
-    public static FileEventRange findEventRage(List<FileEventRange> ranges, long eventId) {
+    public static FileEventRange findEventRange(List<FileEventRange> ranges, long eventId) {
         FileEventRange match = null;
         for (FileEventRange range : ranges) {
             if (range.matches(eventId)) {

Modified: java/trunk/run-database/src/main/java/org/hps/rundb/builder/DatacatBuilder.java
 =============================================================================
--- java/trunk/run-database/src/main/java/org/hps/rundb/builder/DatacatBuilder.java	(original)
+++ java/trunk/run-database/src/main/java/org/hps/rundb/builder/DatacatBuilder.java	Wed Jul 27 17:06:36 2016
@@ -199,9 +199,8 @@
         
         LOGGER.info("finding EVIO datasets for run " + getRun() + " in " + this.folder + " at " + this.site + " ...");
         
-        DatasetResultSetModel results = DatacatUtilities.findEvioDatasets(datacatClient, this.folder, this.site, 
-                METADATA_FIELDS, new String[] {"FILE"}, getRun());
-                
+        DatacatUtilities util = new DatacatUtilities(datacatClient, this.site);        
+        DatasetResultSetModel results = util.findEvioDatasets(this.folder, METADATA_FIELDS, new String[] {"FILE"}, getRun());
         LOGGER.info("found " + results.getResults().size() + " EVIO datasets for run " + getRun());
                                
         return results;

Top of Message | Previous Page | Permalink

Advanced Options


Options

Log In

Log In

Get Password

Get Password


Search Archives

Search Archives


Subscribe or Unsubscribe

Subscribe or Unsubscribe


Archives

November 2017
August 2017
July 2017
January 2017
December 2016
November 2016
October 2016
September 2016
August 2016
July 2016
June 2016
May 2016
April 2016
March 2016
February 2016
January 2016
December 2015
November 2015
October 2015
September 2015
August 2015
July 2015
June 2015
May 2015
April 2015
March 2015
February 2015
January 2015
December 2014
November 2014
October 2014
September 2014
August 2014
July 2014
June 2014
May 2014
April 2014
March 2014
February 2014
January 2014
December 2013
November 2013

ATOM RSS1 RSS2



LISTSERV.SLAC.STANFORD.EDU

Secured by F-Secure Anti-Virus CataList Email List Search Powered by the LISTSERV Email List Manager

Privacy Notice, Security Notice and Terms of Use