Author: [log in to unmask]
Date: Tue Jan 26 18:31:07 2016
New Revision: 4141
Log:
Add class for adding single files to the datacat.
Added:
java/branches/jeremy-dev/crawler/src/main/java/org/hps/crawler/DatacatAddFile.java
Added: java/branches/jeremy-dev/crawler/src/main/java/org/hps/crawler/DatacatAddFile.java
=============================================================================
--- java/branches/jeremy-dev/crawler/src/main/java/org/hps/crawler/DatacatAddFile.java (added)
+++ java/branches/jeremy-dev/crawler/src/main/java/org/hps/crawler/DatacatAddFile.java Tue Jan 26 18:31:07 2016
@@ -0,0 +1,148 @@
+package org.hps.crawler;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.logging.Logger;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.DefaultParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.srs.datacat.model.DatasetModel;
+
+/**
+ * Command line file crawler for populating the data catalog.
+ *
+ * @author Jeremy McCormick, SLAC
+ */
+public final class DatacatAddFile {
+
+ /**
+ * Setup the logger.
+ */
+ private static final Logger LOGGER = Logger.getLogger(DatacatCrawler.class.getPackage().getName());
+
+ private List<File> paths;
+
+ /**
+ * Command line options for the crawler.
+ */
+ private static final Options OPTIONS = new Options();
+
+ /**
+ * Statically define the command options.
+ */
+ static {
+ OPTIONS.addOption("h", "help", false, "print help and exit (overrides all other arguments)");
+ OPTIONS.addOption("f", "folder", true, "datacat folder");
+ OPTIONS.addOption("s", "site", true, "datacat site");
+ OPTIONS.addOption("u", "base-url", true, "provide a base URL of the datacat server");
+ }
+
+ /**
+ * Main method.
+ *
+ * @param args the command line arguments
+ */
+ public static void main(final String[] args) {
+ new DatacatAddFile().parse(args).run();
+ }
+
+ /**
+ * The crawler configuration.
+ */
+ private CrawlerConfig config;
+
+ /**
+ * The options parser.
+ */
+ private final DefaultParser parser = new DefaultParser();
+
+ /**
+ * Parse command line options.
+ *
+ * @param args the command line arguments
+ * @return this object (for method chaining)
+ */
+ private DatacatAddFile parse(final String[] args) {
+
+ LOGGER.config("parsing command line options");
+
+ this.config = new CrawlerConfig();
+
+ try {
+ final CommandLine cl = this.parser.parse(OPTIONS, args);
+
+ // Print help.
+ if (cl.hasOption("h") || args.length == 0) {
+ this.printUsage();
+ }
+
+ // Datacat folder.
+ if (cl.hasOption("f")) {
+ config.setDatacatFolder(cl.getOptionValue("f"));
+ LOGGER.config("set datacat folder to " + config.folder());
+ } else {
+ throw new RuntimeException("The -f argument with the datacat folder is required.");
+ }
+
+ // Dry run.
+ if (cl.hasOption("D")) {
+ config.setDryRun(true);
+ }
+
+ // List of paths.
+ if (!cl.getArgList().isEmpty()) {
+ paths = new ArrayList<File>();
+ for (String arg : cl.getArgList()) {
+ paths.add(new File(arg));
+ }
+ }
+
+ if (this.paths.isEmpty()) {
+ throw new RuntimeException("Missing at least one file to process.");
+ }
+
+ // Dataset site (defaults to JLAB).
+ Site site = Site.JLAB;
+ if (cl.hasOption("s")) {
+ site = Site.valueOf(cl.getOptionValue("s"));
+ }
+ LOGGER.config("dataset site " + site);
+ config.setSite(site);
+
+ // Data catalog URL.
+ if (cl.hasOption("u")) {
+ config.setDatacatUrl(cl.getOptionValue("u"));
+ LOGGER.config("datacat URL " + config.datacatUrl());
+ }
+
+ } catch (final ParseException e) {
+ throw new RuntimeException("Error parsing options.", e);
+ }
+
+ LOGGER.info("Done parsing command line options.");
+
+ return this;
+ }
+
+ /**
+ * Print the usage statement for this tool to the console and then exit the program.
+ */
+ private void printUsage() {
+ final HelpFormatter help = new HelpFormatter();
+ help.printHelp(70, "DatacatAddFile [options] path1 path2 ...", "", OPTIONS, "");
+ System.exit(0);
+ }
+
+ /**
+ * Run the job.
+ */
+ private void run() {
+ List<DatasetModel> datasets = DatacatHelper.createDatasets(paths, config.folder(), config.site().toString());
+ DatacatHelper.addDatasets(datasets, config.folder(), config.datacatUrl());
+ LOGGER.info("added " + datasets.size() + " datasets");
+ }
+}
|