Author: [log in to unmask] Date: Wed Jul 27 17:06:36 2016 New Revision: 4435 Log: Updates to datacat and crawler modules. Added: java/trunk/datacat/src/main/java/org/hps/datacat/EvioDatasetIndex.java Modified: java/trunk/crawler/src/main/java/org/hps/crawler/DatacatAddFile.java java/trunk/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java java/trunk/crawler/src/main/java/org/hps/crawler/DatacatHelper.java java/trunk/datacat/src/main/java/org/hps/datacat/DatacatPrintRun.java java/trunk/datacat/src/main/java/org/hps/datacat/DatacatUtilities.java java/trunk/datacat/src/main/java/org/hps/datacat/FileEventRange.java java/trunk/run-database/src/main/java/org/hps/rundb/builder/DatacatBuilder.java Modified: java/trunk/crawler/src/main/java/org/hps/crawler/DatacatAddFile.java ============================================================================= --- java/trunk/crawler/src/main/java/org/hps/crawler/DatacatAddFile.java (original) +++ java/trunk/crawler/src/main/java/org/hps/crawler/DatacatAddFile.java Wed Jul 27 17:06:36 2016 @@ -145,8 +145,9 @@ */ private void run() { List<DatasetModel> datasets = DatacatHelper.createDatasets(paths, folder, site.toString()); + DatacatUtilities util = new DatacatUtilities(); if (!dryRun) { - DatacatUtilities.updateDatasets(datasets, folder, datacatUrl, patch); + util.updateDatasets(datasets, folder, patch); //LOGGER.info("Added " + datasets.size() + " datasets to datacat."); } else { LOGGER.info("Dry run is enabled; skipped adding dataset."); Modified: java/trunk/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java ============================================================================= --- java/trunk/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java (original) +++ java/trunk/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java Wed Jul 27 17:06:36 2016 @@ -303,7 +303,8 @@ if (!visitor.getFiles().isEmpty()) { List<DatasetModel> datasets = DatacatHelper.createDatasets(visitor.getFiles(), config.folder(), config.site().toString()); LOGGER.info("built " + datasets.size() + " datasets"); - DatacatUtilities.updateDatasets(datasets, config.folder(), config.datacatUrl(), false); + DatacatUtilities util = new DatacatUtilities(config.datacatUrl(), config.site()); + util.updateDatasets(datasets, config.folder(), false); LOGGER.info("added datasets to datacat"); } else { LOGGER.warning("No files were found by the crawler."); Modified: java/trunk/crawler/src/main/java/org/hps/crawler/DatacatHelper.java ============================================================================= --- java/trunk/crawler/src/main/java/org/hps/crawler/DatacatHelper.java (original) +++ java/trunk/crawler/src/main/java/org/hps/crawler/DatacatHelper.java Wed Jul 27 17:06:36 2016 @@ -141,15 +141,15 @@ */ static List<DatasetModel> createDatasets(List<File> files, String folder, String site) { List<DatasetModel> datasets = new ArrayList<DatasetModel>(); + DatacatUtilities util = new DatacatUtilities(); for (File file : files) { Map<String, Object> metadata = createMetadata(file); DataType dataType = DatacatHelper.getDataType(file); FileFormat fileFormat = DatacatHelper.getFileFormat(file); - DatasetModel dataset = DatacatUtilities.createDataset( + DatasetModel dataset = util.createDataset( file, metadata, folder, - site, dataType.toString(), fileFormat.toString()); datasets.add(dataset); Modified: java/trunk/datacat/src/main/java/org/hps/datacat/DatacatPrintRun.java ============================================================================= --- java/trunk/datacat/src/main/java/org/hps/datacat/DatacatPrintRun.java (original) +++ java/trunk/datacat/src/main/java/org/hps/datacat/DatacatPrintRun.java Wed Jul 27 17:06:36 2016 @@ -26,7 +26,7 @@ private static void printRun(int run) throws Exception { - DatasetResultSetModel results = DatacatUtilities.findEvioDatasets(run); + DatasetResultSetModel results = new DatacatUtilities().findEvioDatasets(run); /* print results including metadata */ for (DatasetModel dataset : results) { Modified: java/trunk/datacat/src/main/java/org/hps/datacat/DatacatUtilities.java ============================================================================= --- java/trunk/datacat/src/main/java/org/hps/datacat/DatacatUtilities.java (original) +++ java/trunk/datacat/src/main/java/org/hps/datacat/DatacatUtilities.java Wed Jul 27 17:06:36 2016 @@ -23,9 +23,26 @@ * * @author jeremym */ -public class DatacatUtilities { +public final class DatacatUtilities { private static final Logger LOGGER = Logger.getLogger(DatacatUtilities.class.getPackage().getName()); + + private Client client; + private Site site = DatacatConstants.DEFAULT_SITE; + + public DatacatUtilities(Client client, Site site) { + this.client = client; + this.site = site; + } + + public DatacatUtilities(String url, Site site) { + createClient(url); + this.site = site; + } + + public DatacatUtilities() { + createDefaultClient(); + } /** * Add datasets to the data catalog or patch existing ones. @@ -35,14 +52,8 @@ * @param url the datacat URL * @param patch <code>true</code> to allow patching existing datasets */ - public static final void updateDatasets(List<DatasetModel> datasets, String folder, String url, boolean patch) { + public void updateDatasets(List<DatasetModel> datasets, String folder, boolean patch) { int nUpdated = 0; - Client client = null; - try { - client = new ClientBuilder().setUrl(url).build(); - } catch (URISyntaxException e) { - throw new RuntimeException("Invalid datacat URL.", e); - } for (DatasetModel dataset : datasets) { try { if (client.exists(folder + "/" + dataset.getName())) { @@ -76,16 +87,14 @@ * @param file the file on disk * @param metadata the metadata map * @param folder the datacat folder - * @param site the datacat site * @param dataType the data type * @param fileFormat the file format * @return the created dataset */ - public static final DatasetModel createDataset( + public final DatasetModel createDataset( File file, Map<String, Object> metadata, String folder, - String site, String dataType, String fileFormat) { @@ -101,7 +110,7 @@ .resource(file.getPath()) .dataType(dataType) .fileFormat(fileFormat) - .site(site) + .site(site.toString()) .scanStatus("OK"); // Set system metadata from the provided metadata map. @@ -132,18 +141,23 @@ return datasetBuilder.build(); } - public static Client createDefaultClient() { + private Client createDefaultClient() { + this.client = createClient(DatacatConstants.DATACAT_URL); + return this.client; + } + + private Client createClient(String url) { + Client client; try { - return new ClientBuilder().setUrl(DatacatConstants.DATACAT_URL).build(); + client = new ClientBuilder().setUrl(url).build(); } catch (URISyntaxException e) { throw new RuntimeException("Error initializing datacat client.", e); } + this.client = client; + return this.client; } - public static DatasetResultSetModel findEvioDatasets(Client client, String folder, Site site, String[] metadata, String[] sort, int run) { - if (client == null) { - client = createDefaultClient(); - } + public DatasetResultSetModel findEvioDatasets(String folder, String[] metadata, String[] sort, int run) { return client.searchForDatasets( folder, "current", /* dataset version */ @@ -154,11 +168,9 @@ ); } - public static DatasetResultSetModel findEvioDatasets(int run) { + public DatasetResultSetModel findEvioDatasets(int run) { return findEvioDatasets( - null, DatacatConstants.RAW_DATA_FOLDER, - DatacatConstants.DEFAULT_SITE, DatacatConstants.EVIO_METADATA, new String[] {"FILE"}, run Added: java/trunk/datacat/src/main/java/org/hps/datacat/EvioDatasetIndex.java ============================================================================= --- java/trunk/datacat/src/main/java/org/hps/datacat/EvioDatasetIndex.java (added) +++ java/trunk/datacat/src/main/java/org/hps/datacat/EvioDatasetIndex.java Wed Jul 27 17:06:36 2016 @@ -0,0 +1,146 @@ +package org.hps.datacat; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +import org.srs.datacat.model.DatasetModel; +import org.srs.datacat.model.DatasetResultSetModel; +import org.srs.datacat.model.dataset.DatasetWithViewModel; + +/** + * Creates an index between an EVIO dataset and various metadata such as head timestamp range, file number, and event ID + * range. + * + * @author jeremym + */ +public final class EvioDatasetIndex { + + private DatasetResultSetModel datasets; + private DatacatUtilities util; + private Map<TimestampRange, DatasetModel> datasetTimestamps = new HashMap<TimestampRange, DatasetModel>(); + private Map<Long, DatasetModel> datasetFileNumbers = new HashMap<Long, DatasetModel>(); + private List<FileEventRange> eventRanges = new ArrayList<FileEventRange>(); + + static class TimestampRange { + + private long startTimestamp; + private long endTimestamp; + + TimestampRange(long startTimestamp, long endTimestamp) { + this.startTimestamp = startTimestamp; + this.endTimestamp = endTimestamp; + } + } + + public EvioDatasetIndex(DatacatUtilities util, int run) { + this.util = util; + load(run); + } + + public List<DatasetModel> findByTimestamp(long timestamp) { + List<DatasetModel> datasets = new ArrayList<DatasetModel>(); + for (Entry<TimestampRange, DatasetModel> entry : datasetTimestamps.entrySet()) { + if (timestamp >= entry.getKey().startTimestamp && timestamp <= entry.getKey().endTimestamp) { + datasets.add(entry.getValue()); + } + } + return datasets; + } + + public DatasetModel findByEventRange(long eventId) { + return FileEventRange.findEventRange(eventRanges, eventId).getDataset(); + } + + public DatasetModel findByFileNumber(long fileNumber) { + return datasetFileNumbers.get(fileNumber); + } + + public DatasetResultSetModel getDatasets() { + return datasets; + } + + private void load(int run) { + + datasets = util.findEvioDatasets(run); + + // setup index of first and last timestamp + for (DatasetModel dataset : datasets) { + DatasetWithViewModel datasetView = (DatasetWithViewModel) dataset; + Map<String, Object> metadata = datasetView.getMetadataMap(); + long firstTimestamp = (Long) metadata.get("FIRST_HEAD_TIMESTAMP"); + long lastTimestamp = (Long) metadata.get("LAST_HEAD_TIMESTAMP"); + datasetTimestamps.put(new TimestampRange(firstTimestamp, lastTimestamp), dataset); + } + + // setup index by file number + for (DatasetModel dataset : datasets) { + DatasetWithViewModel datasetView = (DatasetWithViewModel) dataset; + Map<String, Object> metadata = datasetView.getMetadataMap(); + long fileNumber = (Long) metadata.get("FILE"); + this.datasetFileNumbers.put(fileNumber, dataset); + } + + // setup index by file number + this.eventRanges = FileEventRange.createEventRanges(datasets); + } + + // This is a test and not a command line interface! + public static void main(String[] args) { + + DatacatUtilities util = new DatacatUtilities(); + EvioDatasetIndex datasetIndex = new EvioDatasetIndex(util, 5772); + DatasetResultSetModel datasets = datasetIndex.getDatasets(); + + for (DatasetModel dataset : datasets) { + + System.out.println("checking dataset " + dataset.getName() + " ..."); + + DatasetWithViewModel datasetView = (DatasetWithViewModel) dataset; + Map<String, Object> metadata = datasetView.getMetadataMap(); + + long firstTimestamp = (Long) metadata.get("FIRST_HEAD_TIMESTAMP"); + long lastTimestamp = (Long) metadata.get("LAST_HEAD_TIMESTAMP"); + long fileNumber = (Long) metadata.get("FILE"); + long firstPhysicsEvent = (Long) metadata.get("FIRST_PHYSICS_EVENT"); + long lastPhysicsEvent = (Long) metadata.get("LAST_PHYSICS_EVENT"); + + System.out.println("FIRST_HEAD_TIMESTAMP = " + firstTimestamp); + System.out.println("LAST_HEAD_TIMESTAMP = " + lastTimestamp); + System.out.println("FILE = " + fileNumber); + System.out.println("FIRST_PHYSICS_EVENT = " + firstPhysicsEvent); + System.out.println("LAST_PHYSICS_EVENT = " + lastPhysicsEvent); + + DatasetModel result = datasetIndex.findByEventRange(firstPhysicsEvent); + System.out.println("found " + result.getName() + " for event ID " + firstPhysicsEvent); + + result = datasetIndex.findByEventRange(lastPhysicsEvent); + System.out.println("found " + result.getName() + " for event ID " + lastPhysicsEvent); + + result = datasetIndex.findByFileNumber(fileNumber); + System.out.println("found " + result.getName() + " for file " + fileNumber); + + List<DatasetModel> firstTimestampDatasets = datasetIndex.findByTimestamp(firstTimestamp); + for (DatasetModel firstTimestampDataset : firstTimestampDatasets) { + System.out.println("found " + firstTimestampDataset.getName() + " for timestamp = " + + firstTimestamp); + } + + List<DatasetModel> lastTimestampDatasets = datasetIndex.findByTimestamp(lastTimestamp); + for (DatasetModel lastTimestampDataset : lastTimestampDatasets) { + System.out.println("found " + lastTimestampDataset.getName() + " for timestamp = " + + lastTimestamp); + } + + long midTimestamp = firstTimestamp + (lastTimestamp - firstTimestamp); + List<DatasetModel> midTimestampDatasets = datasetIndex.findByTimestamp(midTimestamp); + for (DatasetModel midTimestampDataset : midTimestampDatasets) { + System.out.println("found " + midTimestampDataset.getName() + " for timestamp = " + firstTimestamp); + } + + System.out.println(); + } + } +} Modified: java/trunk/datacat/src/main/java/org/hps/datacat/FileEventRange.java ============================================================================= --- java/trunk/datacat/src/main/java/org/hps/datacat/FileEventRange.java (original) +++ java/trunk/datacat/src/main/java/org/hps/datacat/FileEventRange.java Wed Jul 27 17:06:36 2016 @@ -7,7 +7,6 @@ import org.srs.datacat.model.DatasetModel; import org.srs.datacat.model.DatasetResultSetModel; import org.srs.datacat.model.dataset.DatasetWithViewModel; -import org.srs.datacat.shared.DatasetLocation; /** * Utility class for assocating a file in the datacat to its event ID range. @@ -18,16 +17,16 @@ private long startEvent; private long endEvent; - private String path; + private DatasetModel dataset; - FileEventRange(long startEvent, long endEvent, String path) { + FileEventRange(DatasetModel dataset, long startEvent, long endEvent) { this.startEvent = startEvent; this.endEvent = endEvent; - this.path = path; + this.dataset = dataset; } - public String getPath() { - return path; + public DatasetModel getDataset() { + return dataset; } public long getStartEvent() { @@ -49,13 +48,12 @@ Map<String, Object> metadata = view.getMetadataMap(); long firstPhysicsEvent = (Long) metadata.get("FIRST_PHYSICS_EVENT"); long lastPhysicsEvent = (Long) metadata.get("LAST_PHYSICS_EVENT"); - DatasetLocation loc = (DatasetLocation) view.getViewInfo().getLocations().iterator().next(); - ranges.add(new FileEventRange(firstPhysicsEvent, lastPhysicsEvent, loc.getPath())); + ranges.add(new FileEventRange(ds, firstPhysicsEvent, lastPhysicsEvent)); } return ranges; } - public static FileEventRange findEventRage(List<FileEventRange> ranges, long eventId) { + public static FileEventRange findEventRange(List<FileEventRange> ranges, long eventId) { FileEventRange match = null; for (FileEventRange range : ranges) { if (range.matches(eventId)) { Modified: java/trunk/run-database/src/main/java/org/hps/rundb/builder/DatacatBuilder.java ============================================================================= --- java/trunk/run-database/src/main/java/org/hps/rundb/builder/DatacatBuilder.java (original) +++ java/trunk/run-database/src/main/java/org/hps/rundb/builder/DatacatBuilder.java Wed Jul 27 17:06:36 2016 @@ -199,9 +199,8 @@ LOGGER.info("finding EVIO datasets for run " + getRun() + " in " + this.folder + " at " + this.site + " ..."); - DatasetResultSetModel results = DatacatUtilities.findEvioDatasets(datacatClient, this.folder, this.site, - METADATA_FIELDS, new String[] {"FILE"}, getRun()); - + DatacatUtilities util = new DatacatUtilities(datacatClient, this.site); + DatasetResultSetModel results = util.findEvioDatasets(this.folder, METADATA_FIELDS, new String[] {"FILE"}, getRun()); LOGGER.info("found " + results.getResults().size() + " EVIO datasets for run " + getRun()); return results;