Author: [log in to unmask] Date: Tue Jan 26 18:31:07 2016 New Revision: 4141 Log: Add class for adding single files to the datacat. Added: java/branches/jeremy-dev/crawler/src/main/java/org/hps/crawler/DatacatAddFile.java Added: java/branches/jeremy-dev/crawler/src/main/java/org/hps/crawler/DatacatAddFile.java ============================================================================= --- java/branches/jeremy-dev/crawler/src/main/java/org/hps/crawler/DatacatAddFile.java (added) +++ java/branches/jeremy-dev/crawler/src/main/java/org/hps/crawler/DatacatAddFile.java Tue Jan 26 18:31:07 2016 @@ -0,0 +1,148 @@ +package org.hps.crawler; + +import java.io.File; +import java.util.ArrayList; +import java.util.List; +import java.util.logging.Logger; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.DefaultParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; +import org.srs.datacat.model.DatasetModel; + +/** + * Command line file crawler for populating the data catalog. + * + * @author Jeremy McCormick, SLAC + */ +public final class DatacatAddFile { + + /** + * Setup the logger. + */ + private static final Logger LOGGER = Logger.getLogger(DatacatCrawler.class.getPackage().getName()); + + private List<File> paths; + + /** + * Command line options for the crawler. + */ + private static final Options OPTIONS = new Options(); + + /** + * Statically define the command options. + */ + static { + OPTIONS.addOption("h", "help", false, "print help and exit (overrides all other arguments)"); + OPTIONS.addOption("f", "folder", true, "datacat folder"); + OPTIONS.addOption("s", "site", true, "datacat site"); + OPTIONS.addOption("u", "base-url", true, "provide a base URL of the datacat server"); + } + + /** + * Main method. + * + * @param args the command line arguments + */ + public static void main(final String[] args) { + new DatacatAddFile().parse(args).run(); + } + + /** + * The crawler configuration. + */ + private CrawlerConfig config; + + /** + * The options parser. + */ + private final DefaultParser parser = new DefaultParser(); + + /** + * Parse command line options. + * + * @param args the command line arguments + * @return this object (for method chaining) + */ + private DatacatAddFile parse(final String[] args) { + + LOGGER.config("parsing command line options"); + + this.config = new CrawlerConfig(); + + try { + final CommandLine cl = this.parser.parse(OPTIONS, args); + + // Print help. + if (cl.hasOption("h") || args.length == 0) { + this.printUsage(); + } + + // Datacat folder. + if (cl.hasOption("f")) { + config.setDatacatFolder(cl.getOptionValue("f")); + LOGGER.config("set datacat folder to " + config.folder()); + } else { + throw new RuntimeException("The -f argument with the datacat folder is required."); + } + + // Dry run. + if (cl.hasOption("D")) { + config.setDryRun(true); + } + + // List of paths. + if (!cl.getArgList().isEmpty()) { + paths = new ArrayList<File>(); + for (String arg : cl.getArgList()) { + paths.add(new File(arg)); + } + } + + if (this.paths.isEmpty()) { + throw new RuntimeException("Missing at least one file to process."); + } + + // Dataset site (defaults to JLAB). + Site site = Site.JLAB; + if (cl.hasOption("s")) { + site = Site.valueOf(cl.getOptionValue("s")); + } + LOGGER.config("dataset site " + site); + config.setSite(site); + + // Data catalog URL. + if (cl.hasOption("u")) { + config.setDatacatUrl(cl.getOptionValue("u")); + LOGGER.config("datacat URL " + config.datacatUrl()); + } + + } catch (final ParseException e) { + throw new RuntimeException("Error parsing options.", e); + } + + LOGGER.info("Done parsing command line options."); + + return this; + } + + /** + * Print the usage statement for this tool to the console and then exit the program. + */ + private void printUsage() { + final HelpFormatter help = new HelpFormatter(); + help.printHelp(70, "DatacatAddFile [options] path1 path2 ...", "", OPTIONS, ""); + System.exit(0); + } + + /** + * Run the job. + */ + private void run() { + List<DatasetModel> datasets = DatacatHelper.createDatasets(paths, config.folder(), config.site().toString()); + DatacatHelper.addDatasets(datasets, config.folder(), config.datacatUrl()); + LOGGER.info("added " + datasets.size() + " datasets"); + } +}