Author: [log in to unmask]
Date: Tue Sep 29 13:13:41 2015
New Revision: 3738
Log:
Miscellaneous updates to data crawler.
Added:
java/trunk/crawler/src/main/java/org/hps/crawler/AidaMetadataReader.java
java/trunk/crawler/src/main/java/org/hps/crawler/RootDqmMetadataReader.java
Modified:
java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerConfig.java
java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerFileUtilities.java
java/trunk/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java
java/trunk/crawler/src/main/java/org/hps/crawler/DatacatUtilities.java
java/trunk/crawler/src/main/java/org/hps/crawler/EvioMetadataReader.java
java/trunk/crawler/src/main/java/org/hps/crawler/FileFormatFilter.java
java/trunk/crawler/src/main/java/org/hps/crawler/RootDstMetadataReader.java
Added: java/trunk/crawler/src/main/java/org/hps/crawler/AidaMetadataReader.java
=============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/AidaMetadataReader.java (added)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/AidaMetadataReader.java Tue Sep 29 13:13:41 2015
@@ -0,0 +1,30 @@
+package org.hps.crawler;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * This is a metadata reader for ROOT DQM files.
+ * <p>
+ * It currently only gets the run number from the file name.
+ *
+ * @author Jeremy McCormick, SLAC
+ */
+public class AidaMetadataReader implements FileMetadataReader {
+
+ /**
+ * Get the metadata for a ROOT DQM file.
+ *
+ * @return the metadata for a ROOT DQM file
+ */
+ @Override
+ public Map<String, Object> getMetadata(final File file) throws IOException {
+ final Map<String, Object> metadata = new HashMap<String, Object>();
+ final int run = CrawlerFileUtilities.getRunFromFileName(file);
+ metadata.put("runMin", run);
+ metadata.put("runMax", run);
+ return metadata;
+ }
+}
Modified: java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerConfig.java
=============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerConfig.java (original)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerConfig.java Tue Sep 29 13:13:41 2015
@@ -108,7 +108,7 @@
this.formats.add(format);
return this;
}
-
+
/**
* Get the database connection parameters.
*
@@ -162,7 +162,7 @@
Integer maxDepth() {
return maxDepth;
}
-
+
/**
* Get the root directory for the file search.
*
Modified: java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerFileUtilities.java
=============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerFileUtilities.java (original)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerFileUtilities.java Tue Sep 29 13:13:41 2015
@@ -2,14 +2,21 @@
import java.io.File;
+/**
+ * File utilities for crawler.
+ *
+ * @author Jeremy McCormick, SLAC
+ */
public class CrawlerFileUtilities {
-
- static boolean isHpsFile(File file) {
- return file.getName().startsWith("hps");
- }
-
- static int getRunFromFileName(File file) {
- String name = file.getName();
- return Integer.parseInt(name.substring(4, 8));
+
+ /**
+ * Get run number from file name assuming it looks like "hps_001234".
+ *
+ * @param file the file
+ * @return the run number
+ */
+ static int getRunFromFileName(final File file) {
+ final String name = file.getName();
+ return Integer.parseInt(name.substring(4, 10));
}
}
Modified: java/trunk/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java
=============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java (original)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java Tue Sep 29 13:13:41 2015
@@ -152,7 +152,7 @@
/**
* Main method.
- *
+ *
* @param args the command line arguments
*/
public static void main(final String[] args) {
@@ -309,9 +309,9 @@
// List of run numbers.
if (cl.hasOption("r")) {
- Set<Integer> acceptRuns = new HashSet<Integer>();
- for (String arg : cl.getOptionValues("r")) {
- acceptRuns.add(Integer.parseInt(arg));
+ final Set<Integer> acceptRuns = new HashSet<Integer>();
+ for (final String arg : cl.getOptionValues("r")) {
+ acceptRuns.add(Integer.parseInt(arg));
}
config.setAcceptRuns(acceptRuns);
}
Modified: java/trunk/crawler/src/main/java/org/hps/crawler/DatacatUtilities.java
=============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/DatacatUtilities.java (original)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/DatacatUtilities.java Tue Sep 29 13:13:41 2015
@@ -17,6 +17,9 @@
*/
class DatacatUtilities {
+ /**
+ * Static map of strings to dataset file formats.
+ */
static Map<String, DatasetFileFormat> formatMap = new HashMap<String, DatasetFileFormat>();
static {
for (final DatasetFileFormat format : DatasetFileFormat.values()) {
@@ -24,6 +27,14 @@
}
}
+ /**
+ * Add a file to the data catalog.
+ *
+ * @param datacatClient the data catalog client
+ * @param folder the target folder in the data catalog
+ * @param file the file with the full path
+ * @param metadata the file's meta data
+ */
static void addFile(final DatacatClient datacatClient, final String folder, final File file,
final Map<String, Object> metadata) {
final DatasetFileFormat fileFormat = DatacatUtilities.getFileFormat(file);
@@ -52,12 +63,19 @@
return response;
}
+ /**
+ * Create metadata for a file.
+ *
+ * @param file the file
+ * @return the metadata for the file
+ */
static Map<String, Object> createMetadata(final File file) {
final DatasetFileFormat fileFormat = DatacatUtilities.getFileFormat(file);
final DatasetDataType dataType = DatacatUtilities.getDataType(file);
final FileMetadataReader reader = DatacatUtilities.getFileMetaDataReader(fileFormat, dataType);
if (reader == null) {
- throw new RuntimeException("No metadata reader found for format " + fileFormat.name() + " and type " + dataType.name() + ".");
+ throw new RuntimeException("No metadata reader found for format " + fileFormat.name() + " and type "
+ + dataType.name() + ".");
}
Map<String, Object> metadata;
try {
@@ -68,6 +86,12 @@
return metadata;
}
+ /**
+ * Get the data type for a file.
+ *
+ * @param file the file
+ * @return the file's data type
+ */
static DatasetDataType getDataType(final File file) {
final DatasetFileFormat fileFormat = getFileFormat(file);
DatasetDataType dataType = null;
@@ -79,7 +103,6 @@
} else if (fileFormat.equals(DatasetFileFormat.LCIO)) {
dataType = DatasetDataType.RECON;
} else if (fileFormat.equals(DatasetFileFormat.ROOT)) {
- // FIXME: This should probably open the file and determine what it contains.
if (file.getName().contains("_dqm")) {
dataType = DatasetDataType.DQM;
} else if (file.getName().contains("_dst")) {
@@ -94,6 +117,12 @@
return dataType;
}
+ /**
+ * Get the file format of a file.
+ *
+ * @param pathname the file
+ * @return the file format of the file
+ */
static DatasetFileFormat getFileFormat(final File pathname) {
String name = pathname.getName();
if (name.contains(DatasetFileFormat.EVIO.extension()) && !name.endsWith(DatasetFileFormat.EVIO.extension())) {
@@ -103,6 +132,13 @@
return formatMap.get(extension);
}
+ /**
+ * Get a metadata reader for a given combination of file format and data type.
+ *
+ * @param fileFormat the file format
+ * @param dataType the data type
+ * @return the file metadata reader
+ */
static FileMetadataReader getFileMetaDataReader(final DatasetFileFormat fileFormat, final DatasetDataType dataType) {
FileMetadataReader reader = null;
if (fileFormat.equals(DatasetFileFormat.LCIO)) {
@@ -111,19 +147,20 @@
reader = new EvioMetadataReader();
} else if (fileFormat.equals(DatasetFileFormat.ROOT) && dataType.equals(DatasetDataType.DST)) {
reader = new RootDstMetadataReader();
+ } else if (fileFormat.equals(DatasetFileFormat.ROOT) && dataType.equals(DatasetDataType.DQM)) {
+ reader = new RootDqmMetadataReader();
+ } else if (fileFormat.equals(DatasetFileFormat.AIDA)) {
+ reader = new AidaMetadataReader();
}
return reader;
}
- static String getFolder(final String rootDir, final File file) {
- String stripDir = rootDir;
- if (!stripDir.endsWith("/")) {
- stripDir += "/";
- }
- final String folder = file.getParentFile().getPath().replace(stripDir, "");
- return folder;
- }
-
+ /**
+ * Strip the file number from an EVIO file name.
+ *
+ * @param name the EVIO file name
+ * @return the file name stripped of the file number
+ */
static String stripEvioFileNumber(final String name) {
String strippedName = name;
if (!name.endsWith(DatasetFileFormat.EVIO.extension())) {
Modified: java/trunk/crawler/src/main/java/org/hps/crawler/EvioMetadataReader.java
=============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/EvioMetadataReader.java (original)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/EvioMetadataReader.java Tue Sep 29 13:13:41 2015
@@ -16,25 +16,28 @@
import org.jlab.coda.jevio.EvioReader;
import org.lcsim.util.log.DefaultLogFormatter;
import org.lcsim.util.log.LogUtil;
-
+
/**
* Reads metadata from EVIO files.
- *
+ *
* @author Jeremy McCormick, SLAC
*/
public class EvioMetadataReader implements FileMetadataReader {
-
+
+ /**
+ * Class logger.
+ */
private static Logger LOGGER = LogUtil.create(EvioMetadataReader.class, new DefaultLogFormatter(), Level.ALL);
-
+
/**
* Get the EVIO file metadata.
- *
+ *
* @param file the EVIO file
* @return the metadata map of key and value pairs
*/
@Override
- public Map<String, Object> getMetadata(File file) throws IOException {
-
+ public Map<String, Object> getMetadata(final File file) throws IOException {
+
Date startDate = null;
Date endDate = null;
int badEventCount = 0;
@@ -47,18 +50,18 @@
Integer endEvent = null;
Integer startEvent = null;
Long lastTimestamp = null;
-
+
EvioReader evioReader = null;
try {
evioReader = EvioFileUtilities.open(file, false);
- } catch (EvioException e) {
+ } catch (final EvioException e) {
throw new IOException(e);
}
-
- int fileNumber = EvioFileUtilities.getSequenceFromName(file);
-
+
+ final int fileNumber = EvioFileUtilities.getSequenceFromName(file);
+
EvioEvent evioEvent = null;
-
+
while (true) {
try {
evioEvent = evioReader.parseNextEvent();
@@ -114,22 +117,22 @@
}
++eventCount;
}
- }
-
+ }
+
// Set end date from last valid timestamp.
if (endDate == null) {
endDate = new Date(lastTimestamp);
LOGGER.info("set end date to " + endDate + " from last timestamp " + lastTimestamp);
}
-
+
// Set end event number.
if (eventIdData != null) {
endEvent = eventIdData[0];
LOGGER.info("set end event " + endEvent);
}
-
- Map<String, Object> metaDataMap = new HashMap<String, Object>();
-
+
+ final Map<String, Object> metaDataMap = new HashMap<String, Object>();
+
metaDataMap.put("runMin", run);
metaDataMap.put("runMax", run);
metaDataMap.put("eventCount", eventCount);
@@ -142,7 +145,7 @@
metaDataMap.put("endEvent", endEvent);
metaDataMap.put("hasEnd", hasEnd ? 1 : 0);
metaDataMap.put("hasPrestart", hasPrestart ? 1 : 0);
-
+
return metaDataMap;
- }
+ }
}
Modified: java/trunk/crawler/src/main/java/org/hps/crawler/FileFormatFilter.java
=============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/FileFormatFilter.java (original)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/FileFormatFilter.java Tue Sep 29 13:13:41 2015
@@ -14,7 +14,7 @@
* Filter files on their format.
* <p>
* Only files matching the format will be accepted by the file visitor.
- *
+ *
* @author Jeremy McCormick, SLAC
*/
public class FileFormatFilter implements FileFilter {
@@ -23,18 +23,18 @@
* Setup logger.
*/
private static final Logger LOGGER = LogUtil.create(FileFormatFilter.class, new DefaultLogFormatter(), Level.ALL);
-
+
/**
* The file format.
*/
- private Set<DatasetFileFormat> formats;
-
+ private final Set<DatasetFileFormat> formats;
+
/**
* Create a new filter with the given format.
- *
+ *
* @param format the file format
*/
- FileFormatFilter(Set<DatasetFileFormat> formats) {
+ FileFormatFilter(final Set<DatasetFileFormat> formats) {
if (formats == null) {
throw new IllegalArgumentException("The formats collection is null.");
}
@@ -43,18 +43,18 @@
}
this.formats = formats;
}
-
+
/**
* Returns <code>true</code> if the file should be accepted, e.g. it matches the filer's format.
- *
+ *
* @param pathname the file's full path
*/
@Override
- public boolean accept(File pathname) {
+ public boolean accept(final File pathname) {
LOGGER.info(pathname.getPath());
- DatasetFileFormat fileFormat = DatacatUtilities.getFileFormat(pathname);
+ final DatasetFileFormat fileFormat = DatacatUtilities.getFileFormat(pathname);
if (fileFormat != null) {
- LOGGER.info("file " + pathname.getPath() + " has format " + fileFormat.name());
+ LOGGER.info("file " + pathname.getPath() + " has format " + fileFormat.name());
return formats.contains(fileFormat);
} else {
LOGGER.info("rejected file " + pathname.getPath() + " with unknown format");
Added: java/trunk/crawler/src/main/java/org/hps/crawler/RootDqmMetadataReader.java
=============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/RootDqmMetadataReader.java (added)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/RootDqmMetadataReader.java Tue Sep 29 13:13:41 2015
@@ -0,0 +1,30 @@
+package org.hps.crawler;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * This is a metadata reader for ROOT DQM files.
+ * <p>
+ * It currently only gets the run number from the file name.
+ *
+ * @author Jeremy McCormick, SLAC
+ */
+public class RootDqmMetadataReader implements FileMetadataReader {
+
+ /**
+ * Get the metadata for a ROOT DQM file.
+ *
+ * @return the metadata for a ROOT DQM file
+ */
+ @Override
+ public Map<String, Object> getMetadata(final File file) throws IOException {
+ final Map<String, Object> metadata = new HashMap<String, Object>();
+ final int run = CrawlerFileUtilities.getRunFromFileName(file);
+ metadata.put("runMin", run);
+ metadata.put("runMax", run);
+ return metadata;
+ }
+}
Modified: java/trunk/crawler/src/main/java/org/hps/crawler/RootDstMetadataReader.java
=============================================================================
--- java/trunk/crawler/src/main/java/org/hps/crawler/RootDstMetadataReader.java (original)
+++ java/trunk/crawler/src/main/java/org/hps/crawler/RootDstMetadataReader.java Tue Sep 29 13:13:41 2015
@@ -2,7 +2,6 @@
import hep.io.root.RootClassNotFound;
import hep.io.root.RootFileReader;
-import hep.io.root.interfaces.TBranch;
import hep.io.root.interfaces.TLeafElement;
import hep.io.root.interfaces.TObjArray;
import hep.io.root.interfaces.TTree;
@@ -16,33 +15,34 @@
* This is a very simple metadata reader for ROOT DST files.
* <p>
* It currently only sets the standard metadata for event count and run number.
- *
+ *
* @author Jeremy McCormick, SLAC
*/
public class RootDstMetadataReader implements FileMetadataReader {
/**
* Get the metadata for a ROOT DST file.
- *
+ *
* @return the metadata for a ROOT DST file
*/
- public Map<String, Object> getMetadata(File file) throws IOException {
- Map<String, Object> metadata = new HashMap<String, Object>();
+ @Override
+ public Map<String, Object> getMetadata(final File file) throws IOException {
+ final Map<String, Object> metadata = new HashMap<String, Object>();
RootFileReader rootReader = null;
- long eventCount = 0;
+ long eventCount = 0;
int runMin = 0;
int runMax = 0;
long size = 0;
try {
rootReader = new RootFileReader(file.getAbsolutePath());
- TTree tree = (TTree) rootReader.get("HPS_Event");
- //TBranch branch = tree.getBranch("Event");
+ final TTree tree = (TTree) rootReader.get("HPS_Event");
+ // TBranch branch = tree.getBranch("Event");
eventCount = tree.getEntries();
size = tree.getTotBytes();
- TObjArray leaves = tree.getLeaves();
-
- for (Object object : leaves) {
- TLeafElement leaf = (TLeafElement) object;
+ final TObjArray leaves = tree.getLeaves();
+
+ for (final Object object : leaves) {
+ final TLeafElement leaf = (TLeafElement) object;
if ("run_number".equals(leaf.getName())) {
runMin = (int) leaf.getWrappedValue(0);
runMax = (int) leaf.getWrappedValue(0);
@@ -55,11 +55,11 @@
if (rootReader != null) {
rootReader.close();
}
- }
+ }
metadata.put("eventCount", eventCount);
metadata.put("runMin", runMin);
metadata.put("runMax", runMax);
metadata.put("size", size);
return metadata;
- }
+ }
}
|