Author: [log in to unmask]
Date: Wed Jul 27 17:06:36 2016
New Revision: 4435
Log:
Updates to datacat and crawler modules.
Added:
java/trunk/datacat/src/main/java/org/hps/datacat/EvioDatasetIndex.java
Modified:
java/trunk/crawler/src/main/java/org/hps/crawler/DatacatAddFile.java
java/trunk/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java
java/trunk/crawler/src/main/java/org/hps/crawler/DatacatHelper.java
java/trunk/datacat/src/main/java/org/hps/datacat/DatacatPrintRun.java
java/trunk/datacat/src/main/java/org/hps/datacat/DatacatUtilities.java
java/trunk/datacat/src/main/java/org/hps/datacat/FileEventRange.java
java/trunk/run-database/src/main/java/org/hps/rundb/builder/DatacatBuilder.java
Modified: java/trunk/crawler/src/main/java/org/hps/crawler/DatacatAddFile.java
=============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/DatacatAddFile.java (original)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/DatacatAddFile.java Wed Jul 27 17:06:36 2016
@@ -145,8 +145,9 @@
*/
private void run() {
List<DatasetModel> datasets = DatacatHelper.createDatasets(paths, folder, site.toString());
+ DatacatUtilities util = new DatacatUtilities();
if (!dryRun) {
- DatacatUtilities.updateDatasets(datasets, folder, datacatUrl, patch);
+ util.updateDatasets(datasets, folder, patch);
//LOGGER.info("Added " + datasets.size() + " datasets to datacat.");
} else {
LOGGER.info("Dry run is enabled; skipped adding dataset.");
Modified: java/trunk/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java
=============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java (original)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java Wed Jul 27 17:06:36 2016
@@ -303,7 +303,8 @@
if (!visitor.getFiles().isEmpty()) {
List<DatasetModel> datasets = DatacatHelper.createDatasets(visitor.getFiles(), config.folder(), config.site().toString());
LOGGER.info("built " + datasets.size() + " datasets");
- DatacatUtilities.updateDatasets(datasets, config.folder(), config.datacatUrl(), false);
+ DatacatUtilities util = new DatacatUtilities(config.datacatUrl(), config.site());
+ util.updateDatasets(datasets, config.folder(), false);
LOGGER.info("added datasets to datacat");
} else {
LOGGER.warning("No files were found by the crawler.");
Modified: java/trunk/crawler/src/main/java/org/hps/crawler/DatacatHelper.java
=============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/DatacatHelper.java (original)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/DatacatHelper.java Wed Jul 27 17:06:36 2016
@@ -141,15 +141,15 @@
*/
static List<DatasetModel> createDatasets(List<File> files, String folder, String site) {
List<DatasetModel> datasets = new ArrayList<DatasetModel>();
+ DatacatUtilities util = new DatacatUtilities();
for (File file : files) {
Map<String, Object> metadata = createMetadata(file);
DataType dataType = DatacatHelper.getDataType(file);
FileFormat fileFormat = DatacatHelper.getFileFormat(file);
- DatasetModel dataset = DatacatUtilities.createDataset(
+ DatasetModel dataset = util.createDataset(
file,
metadata,
folder,
- site,
dataType.toString(),
fileFormat.toString());
datasets.add(dataset);
Modified: java/trunk/datacat/src/main/java/org/hps/datacat/DatacatPrintRun.java
=============================================================================
--- java/trunk/datacat/src/main/java/org/hps/datacat/DatacatPrintRun.java (original)
+++ java/trunk/datacat/src/main/java/org/hps/datacat/DatacatPrintRun.java Wed Jul 27 17:06:36 2016
@@ -26,7 +26,7 @@
private static void printRun(int run) throws Exception {
- DatasetResultSetModel results = DatacatUtilities.findEvioDatasets(run);
+ DatasetResultSetModel results = new DatacatUtilities().findEvioDatasets(run);
/* print results including metadata */
for (DatasetModel dataset : results) {
Modified: java/trunk/datacat/src/main/java/org/hps/datacat/DatacatUtilities.java
=============================================================================
--- java/trunk/datacat/src/main/java/org/hps/datacat/DatacatUtilities.java (original)
+++ java/trunk/datacat/src/main/java/org/hps/datacat/DatacatUtilities.java Wed Jul 27 17:06:36 2016
@@ -23,9 +23,26 @@
*
* @author jeremym
*/
-public class DatacatUtilities {
+public final class DatacatUtilities {
private static final Logger LOGGER = Logger.getLogger(DatacatUtilities.class.getPackage().getName());
+
+ private Client client;
+ private Site site = DatacatConstants.DEFAULT_SITE;
+
+ public DatacatUtilities(Client client, Site site) {
+ this.client = client;
+ this.site = site;
+ }
+
+ public DatacatUtilities(String url, Site site) {
+ createClient(url);
+ this.site = site;
+ }
+
+ public DatacatUtilities() {
+ createDefaultClient();
+ }
/**
* Add datasets to the data catalog or patch existing ones.
@@ -35,14 +52,8 @@
* @param url the datacat URL
* @param patch <code>true</code> to allow patching existing datasets
*/
- public static final void updateDatasets(List<DatasetModel> datasets, String folder, String url, boolean patch) {
+ public void updateDatasets(List<DatasetModel> datasets, String folder, boolean patch) {
int nUpdated = 0;
- Client client = null;
- try {
- client = new ClientBuilder().setUrl(url).build();
- } catch (URISyntaxException e) {
- throw new RuntimeException("Invalid datacat URL.", e);
- }
for (DatasetModel dataset : datasets) {
try {
if (client.exists(folder + "/" + dataset.getName())) {
@@ -76,16 +87,14 @@
* @param file the file on disk
* @param metadata the metadata map
* @param folder the datacat folder
- * @param site the datacat site
* @param dataType the data type
* @param fileFormat the file format
* @return the created dataset
*/
- public static final DatasetModel createDataset(
+ public final DatasetModel createDataset(
File file,
Map<String, Object> metadata,
String folder,
- String site,
String dataType,
String fileFormat) {
@@ -101,7 +110,7 @@
.resource(file.getPath())
.dataType(dataType)
.fileFormat(fileFormat)
- .site(site)
+ .site(site.toString())
.scanStatus("OK");
// Set system metadata from the provided metadata map.
@@ -132,18 +141,23 @@
return datasetBuilder.build();
}
- public static Client createDefaultClient() {
+ private Client createDefaultClient() {
+ this.client = createClient(DatacatConstants.DATACAT_URL);
+ return this.client;
+ }
+
+ private Client createClient(String url) {
+ Client client;
try {
- return new ClientBuilder().setUrl(DatacatConstants.DATACAT_URL).build();
+ client = new ClientBuilder().setUrl(url).build();
} catch (URISyntaxException e) {
throw new RuntimeException("Error initializing datacat client.", e);
}
+ this.client = client;
+ return this.client;
}
- public static DatasetResultSetModel findEvioDatasets(Client client, String folder, Site site, String[] metadata, String[] sort, int run) {
- if (client == null) {
- client = createDefaultClient();
- }
+ public DatasetResultSetModel findEvioDatasets(String folder, String[] metadata, String[] sort, int run) {
return client.searchForDatasets(
folder,
"current", /* dataset version */
@@ -154,11 +168,9 @@
);
}
- public static DatasetResultSetModel findEvioDatasets(int run) {
+ public DatasetResultSetModel findEvioDatasets(int run) {
return findEvioDatasets(
- null,
DatacatConstants.RAW_DATA_FOLDER,
- DatacatConstants.DEFAULT_SITE,
DatacatConstants.EVIO_METADATA,
new String[] {"FILE"},
run
Added: java/trunk/datacat/src/main/java/org/hps/datacat/EvioDatasetIndex.java
=============================================================================
--- java/trunk/datacat/src/main/java/org/hps/datacat/EvioDatasetIndex.java (added)
+++ java/trunk/datacat/src/main/java/org/hps/datacat/EvioDatasetIndex.java Wed Jul 27 17:06:36 2016
@@ -0,0 +1,146 @@
+package org.hps.datacat;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import org.srs.datacat.model.DatasetModel;
+import org.srs.datacat.model.DatasetResultSetModel;
+import org.srs.datacat.model.dataset.DatasetWithViewModel;
+
+/**
+ * Creates an index between an EVIO dataset and various metadata such as head timestamp range, file number, and event ID
+ * range.
+ *
+ * @author jeremym
+ */
+public final class EvioDatasetIndex {
+
+ private DatasetResultSetModel datasets;
+ private DatacatUtilities util;
+ private Map<TimestampRange, DatasetModel> datasetTimestamps = new HashMap<TimestampRange, DatasetModel>();
+ private Map<Long, DatasetModel> datasetFileNumbers = new HashMap<Long, DatasetModel>();
+ private List<FileEventRange> eventRanges = new ArrayList<FileEventRange>();
+
+ static class TimestampRange {
+
+ private long startTimestamp;
+ private long endTimestamp;
+
+ TimestampRange(long startTimestamp, long endTimestamp) {
+ this.startTimestamp = startTimestamp;
+ this.endTimestamp = endTimestamp;
+ }
+ }
+
+ public EvioDatasetIndex(DatacatUtilities util, int run) {
+ this.util = util;
+ load(run);
+ }
+
+ public List<DatasetModel> findByTimestamp(long timestamp) {
+ List<DatasetModel> datasets = new ArrayList<DatasetModel>();
+ for (Entry<TimestampRange, DatasetModel> entry : datasetTimestamps.entrySet()) {
+ if (timestamp >= entry.getKey().startTimestamp && timestamp <= entry.getKey().endTimestamp) {
+ datasets.add(entry.getValue());
+ }
+ }
+ return datasets;
+ }
+
+ public DatasetModel findByEventRange(long eventId) {
+ return FileEventRange.findEventRange(eventRanges, eventId).getDataset();
+ }
+
+ public DatasetModel findByFileNumber(long fileNumber) {
+ return datasetFileNumbers.get(fileNumber);
+ }
+
+ public DatasetResultSetModel getDatasets() {
+ return datasets;
+ }
+
+ private void load(int run) {
+
+ datasets = util.findEvioDatasets(run);
+
+ // setup index of first and last timestamp
+ for (DatasetModel dataset : datasets) {
+ DatasetWithViewModel datasetView = (DatasetWithViewModel) dataset;
+ Map<String, Object> metadata = datasetView.getMetadataMap();
+ long firstTimestamp = (Long) metadata.get("FIRST_HEAD_TIMESTAMP");
+ long lastTimestamp = (Long) metadata.get("LAST_HEAD_TIMESTAMP");
+ datasetTimestamps.put(new TimestampRange(firstTimestamp, lastTimestamp), dataset);
+ }
+
+ // setup index by file number
+ for (DatasetModel dataset : datasets) {
+ DatasetWithViewModel datasetView = (DatasetWithViewModel) dataset;
+ Map<String, Object> metadata = datasetView.getMetadataMap();
+ long fileNumber = (Long) metadata.get("FILE");
+ this.datasetFileNumbers.put(fileNumber, dataset);
+ }
+
+ // setup index by file number
+ this.eventRanges = FileEventRange.createEventRanges(datasets);
+ }
+
+ // This is a test and not a command line interface!
+ public static void main(String[] args) {
+
+ DatacatUtilities util = new DatacatUtilities();
+ EvioDatasetIndex datasetIndex = new EvioDatasetIndex(util, 5772);
+ DatasetResultSetModel datasets = datasetIndex.getDatasets();
+
+ for (DatasetModel dataset : datasets) {
+
+ System.out.println("checking dataset " + dataset.getName() + " ...");
+
+ DatasetWithViewModel datasetView = (DatasetWithViewModel) dataset;
+ Map<String, Object> metadata = datasetView.getMetadataMap();
+
+ long firstTimestamp = (Long) metadata.get("FIRST_HEAD_TIMESTAMP");
+ long lastTimestamp = (Long) metadata.get("LAST_HEAD_TIMESTAMP");
+ long fileNumber = (Long) metadata.get("FILE");
+ long firstPhysicsEvent = (Long) metadata.get("FIRST_PHYSICS_EVENT");
+ long lastPhysicsEvent = (Long) metadata.get("LAST_PHYSICS_EVENT");
+
+ System.out.println("FIRST_HEAD_TIMESTAMP = " + firstTimestamp);
+ System.out.println("LAST_HEAD_TIMESTAMP = " + lastTimestamp);
+ System.out.println("FILE = " + fileNumber);
+ System.out.println("FIRST_PHYSICS_EVENT = " + firstPhysicsEvent);
+ System.out.println("LAST_PHYSICS_EVENT = " + lastPhysicsEvent);
+
+ DatasetModel result = datasetIndex.findByEventRange(firstPhysicsEvent);
+ System.out.println("found " + result.getName() + " for event ID " + firstPhysicsEvent);
+
+ result = datasetIndex.findByEventRange(lastPhysicsEvent);
+ System.out.println("found " + result.getName() + " for event ID " + lastPhysicsEvent);
+
+ result = datasetIndex.findByFileNumber(fileNumber);
+ System.out.println("found " + result.getName() + " for file " + fileNumber);
+
+ List<DatasetModel> firstTimestampDatasets = datasetIndex.findByTimestamp(firstTimestamp);
+ for (DatasetModel firstTimestampDataset : firstTimestampDatasets) {
+ System.out.println("found " + firstTimestampDataset.getName() + " for timestamp = "
+ + firstTimestamp);
+ }
+
+ List<DatasetModel> lastTimestampDatasets = datasetIndex.findByTimestamp(lastTimestamp);
+ for (DatasetModel lastTimestampDataset : lastTimestampDatasets) {
+ System.out.println("found " + lastTimestampDataset.getName() + " for timestamp = "
+ + lastTimestamp);
+ }
+
+ long midTimestamp = firstTimestamp + (lastTimestamp - firstTimestamp);
+ List<DatasetModel> midTimestampDatasets = datasetIndex.findByTimestamp(midTimestamp);
+ for (DatasetModel midTimestampDataset : midTimestampDatasets) {
+ System.out.println("found " + midTimestampDataset.getName() + " for timestamp = " + firstTimestamp);
+ }
+
+ System.out.println();
+ }
+ }
+}
Modified: java/trunk/datacat/src/main/java/org/hps/datacat/FileEventRange.java
=============================================================================
--- java/trunk/datacat/src/main/java/org/hps/datacat/FileEventRange.java (original)
+++ java/trunk/datacat/src/main/java/org/hps/datacat/FileEventRange.java Wed Jul 27 17:06:36 2016
@@ -7,7 +7,6 @@
import org.srs.datacat.model.DatasetModel;
import org.srs.datacat.model.DatasetResultSetModel;
import org.srs.datacat.model.dataset.DatasetWithViewModel;
-import org.srs.datacat.shared.DatasetLocation;
/**
* Utility class for assocating a file in the datacat to its event ID range.
@@ -18,16 +17,16 @@
private long startEvent;
private long endEvent;
- private String path;
+ private DatasetModel dataset;
- FileEventRange(long startEvent, long endEvent, String path) {
+ FileEventRange(DatasetModel dataset, long startEvent, long endEvent) {
this.startEvent = startEvent;
this.endEvent = endEvent;
- this.path = path;
+ this.dataset = dataset;
}
- public String getPath() {
- return path;
+ public DatasetModel getDataset() {
+ return dataset;
}
public long getStartEvent() {
@@ -49,13 +48,12 @@
Map<String, Object> metadata = view.getMetadataMap();
long firstPhysicsEvent = (Long) metadata.get("FIRST_PHYSICS_EVENT");
long lastPhysicsEvent = (Long) metadata.get("LAST_PHYSICS_EVENT");
- DatasetLocation loc = (DatasetLocation) view.getViewInfo().getLocations().iterator().next();
- ranges.add(new FileEventRange(firstPhysicsEvent, lastPhysicsEvent, loc.getPath()));
+ ranges.add(new FileEventRange(ds, firstPhysicsEvent, lastPhysicsEvent));
}
return ranges;
}
- public static FileEventRange findEventRage(List<FileEventRange> ranges, long eventId) {
+ public static FileEventRange findEventRange(List<FileEventRange> ranges, long eventId) {
FileEventRange match = null;
for (FileEventRange range : ranges) {
if (range.matches(eventId)) {
Modified: java/trunk/run-database/src/main/java/org/hps/rundb/builder/DatacatBuilder.java
=============================================================================
--- java/trunk/run-database/src/main/java/org/hps/rundb/builder/DatacatBuilder.java (original)
+++ java/trunk/run-database/src/main/java/org/hps/rundb/builder/DatacatBuilder.java Wed Jul 27 17:06:36 2016
@@ -199,9 +199,8 @@
LOGGER.info("finding EVIO datasets for run " + getRun() + " in " + this.folder + " at " + this.site + " ...");
- DatasetResultSetModel results = DatacatUtilities.findEvioDatasets(datacatClient, this.folder, this.site,
- METADATA_FIELDS, new String[] {"FILE"}, getRun());
-
+ DatacatUtilities util = new DatacatUtilities(datacatClient, this.site);
+ DatasetResultSetModel results = util.findEvioDatasets(this.folder, METADATA_FIELDS, new String[] {"FILE"}, getRun());
LOGGER.info("found " + results.getResults().size() + " EVIO datasets for run " + getRun());
return results;
|