Author: [log in to unmask] Date: Mon Jan 4 10:56:39 2016 New Revision: 4078 Log: Cleanup building of new dataset; other minor changes. Modified: java/branches/jeremy-dev/crawler/src/main/java/org/hps/crawler/CrawlerFileVisitor.java java/branches/jeremy-dev/crawler/src/main/java/org/hps/crawler/DataType.java java/branches/jeremy-dev/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java java/branches/jeremy-dev/crawler/src/main/java/org/hps/crawler/DatacatHelper.java Modified: java/branches/jeremy-dev/crawler/src/main/java/org/hps/crawler/CrawlerFileVisitor.java ============================================================================= --- java/branches/jeremy-dev/crawler/src/main/java/org/hps/crawler/CrawlerFileVisitor.java (original) +++ java/branches/jeremy-dev/crawler/src/main/java/org/hps/crawler/CrawlerFileVisitor.java Mon Jan 4 10:56:39 2016 @@ -10,7 +10,7 @@ import java.util.List; /** - * Visitor which creates a {@link FileSet} from walking a directory tree. + * Visitor which creates a list of files from walking a directory tree. * <p> * Any number of {@link java.io.FileFilter} objects can be registered with this visitor to restrict which files are * accepted. @@ -20,12 +20,12 @@ final class CrawlerFileVisitor extends SimpleFileVisitor<Path> { /** - * The run log containing information about files from each run. + * The list of files found from crawling. */ private final List<File> files = new ArrayList<File>(); /** - * A list of file filters to apply. + * A list of file filters applied to each path. */ private final List<FileFilter> filters = new ArrayList<FileFilter>(); Modified: java/branches/jeremy-dev/crawler/src/main/java/org/hps/crawler/DataType.java ============================================================================= --- java/branches/jeremy-dev/crawler/src/main/java/org/hps/crawler/DataType.java (original) +++ java/branches/jeremy-dev/crawler/src/main/java/org/hps/crawler/DataType.java Mon Jan 4 10:56:39 2016 @@ -7,7 +7,7 @@ */ public enum DataType { /** - * Data quality management plots. + * Data quality management plots (AIDA or ROOT). */ DQM, /** @@ -15,7 +15,7 @@ */ RAW, /** - * Reconstructed data (usually LCIO). + * Reconstructed data (LCIO). */ RECON, /** Modified: java/branches/jeremy-dev/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java ============================================================================= --- java/branches/jeremy-dev/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java (original) +++ java/branches/jeremy-dev/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java Mon Jan 4 10:56:39 2016 @@ -2,7 +2,6 @@ import java.io.File; import java.io.IOException; -import java.net.URISyntaxException; import java.nio.file.FileVisitOption; import java.nio.file.Files; import java.nio.file.attribute.BasicFileAttributes; @@ -19,8 +18,6 @@ import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; -import org.srs.datacat.client.Client; -import org.srs.datacat.client.ClientBuilder; import org.srs.datacat.model.DatasetModel; /** @@ -57,7 +54,6 @@ * Statically define the command options. */ static { - OPTIONS.addOption("L", "log-level", true, "set the log level (INFO, FINE, etc.)"); OPTIONS.addOption("b", "min-date", true, "min date for a file (example \"2015-03-26 11:28:59\")"); OPTIONS.addOption("d", "directory", true, "root directory to crawl"); OPTIONS.addOption("f", "folder", true, "datacat folder"); @@ -108,13 +104,6 @@ // Print help. if (cl.hasOption("h") || args.length == 0) { this.printUsage(); - } - - // Log level (only used for this class's logger). - if (cl.hasOption("L")) { - final Level level = Level.parse(cl.getOptionValue("L")); - LOGGER.config("log level " + level); - LOGGER.setLevel(level); } // Root directory for file crawling. @@ -217,6 +206,18 @@ } config.setAcceptRuns(acceptRuns); } + + // Dry run. + if (cl.hasOption("D")) { + config.setDryRun(true); + } + + // List of paths. + if (!cl.getArgList().isEmpty()) { + for (String arg : cl.getArgList()) { + config.addPath(arg); + } + } // Dataset site (defaults to JLAB). Site site = Site.JLAB; @@ -226,22 +227,10 @@ LOGGER.config("dataset site " + site); config.setSite(site); - // Dry run. - if (cl.hasOption("D")) { - config.setDryRun(true); - } - // Data catalog URL. if (cl.hasOption("u")) { config.setDatacatUrl(cl.getOptionValue("u")); LOGGER.config("datacat URL " + config.datacatUrl()); - } - - // List of paths. - if (!cl.getArgList().isEmpty()) { - for (String arg : cl.getArgList()) { - config.addPath(arg); - } } } catch (final ParseException e) { Modified: java/branches/jeremy-dev/crawler/src/main/java/org/hps/crawler/DatacatHelper.java ============================================================================= --- java/branches/jeremy-dev/crawler/src/main/java/org/hps/crawler/DatacatHelper.java (original) +++ java/branches/jeremy-dev/crawler/src/main/java/org/hps/crawler/DatacatHelper.java Mon Jan 4 10:56:39 2016 @@ -14,6 +14,7 @@ import org.srs.datacat.client.Client; import org.srs.datacat.client.ClientBuilder; import org.srs.datacat.model.DatasetModel; +import org.srs.datacat.model.DatasetView.VersionId; import org.srs.datacat.shared.Dataset; import org.srs.datacat.shared.Provider; @@ -183,27 +184,37 @@ String dataType, String fileFormat) { - Provider provider = new Provider(); - List<DatasetModel> datasets = new ArrayList<DatasetModel>(); - + Provider provider = new Provider(); Dataset.Builder datasetBuilder = provider.getDatasetBuilder(); - datasetBuilder.versionId(1); - datasetBuilder.master(true); - datasetBuilder.name(file.getName()); - datasetBuilder.resource(file.getPath()); - datasetBuilder.size((Long) metadata.get("size")); - datasetBuilder.scanStatus("OK"); - datasetBuilder.dataType(dataType); - datasetBuilder.fileFormat(fileFormat); - datasetBuilder.site(site); + + // Set basic info on new dataset. + datasetBuilder.versionId(VersionId.valueOf("new")) + .master(true) + .name(file.getName()) + .resource(file.getPath()) + .dataType(dataType) + .fileFormat(fileFormat) + .site(site) + .scanStatus("OK"); + + // Set system metadata from the provided metadata map. if (metadata.get("eventCount") != null) { datasetBuilder.eventCount((Long) metadata.get("eventCount")); } - datasetBuilder.runMin((Long) metadata.get("runMin")); - datasetBuilder.runMax((Long) metadata.get("runMax")); - datasetBuilder.checksum((String) metadata.get("checksum")); - - // Create user metadata leaving out system metadata fields. + if (metadata.get("checksum") != null) { + datasetBuilder.checksum((String) metadata.get("checksum")); + } + if (metadata.get("runMin") != null) { + datasetBuilder.runMin((Long) metadata.get("runMin")); + } + if (metadata.get("runMax") != null) { + datasetBuilder.runMax((Long) metadata.get("runMax")); + } + if (metadata.get("size") != null) { + datasetBuilder.size((Long) metadata.get("size")); + } + + // Create user metadata, leaving out system metadata fields. Map<String, Object> userMetadata = new HashMap<String, Object>(); for (Entry<String, Object> metadataEntry : metadata.entrySet()) { if (!SYSTEM_METADATA.contains(metadataEntry.getKey())) { @@ -212,11 +223,7 @@ } datasetBuilder.versionMetadata(userMetadata); - // Build dataset and add to list. - DatasetModel dataset = datasetBuilder.build(); - datasets.add(dataset); - - return dataset; + return datasetBuilder.build(); } /**