Author: [log in to unmask] Date: Tue Sep 29 13:13:41 2015 New Revision: 3738 Log: Miscellaneous updates to data crawler. Added: java/trunk/crawler/src/main/java/org/hps/crawler/AidaMetadataReader.java java/trunk/crawler/src/main/java/org/hps/crawler/RootDqmMetadataReader.java Modified: java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerConfig.java java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerFileUtilities.java java/trunk/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java java/trunk/crawler/src/main/java/org/hps/crawler/DatacatUtilities.java java/trunk/crawler/src/main/java/org/hps/crawler/EvioMetadataReader.java java/trunk/crawler/src/main/java/org/hps/crawler/FileFormatFilter.java java/trunk/crawler/src/main/java/org/hps/crawler/RootDstMetadataReader.java Added: java/trunk/crawler/src/main/java/org/hps/crawler/AidaMetadataReader.java ============================================================================= --- java/trunk/crawler/src/main/java/org/hps/crawler/AidaMetadataReader.java (added) +++ java/trunk/crawler/src/main/java/org/hps/crawler/AidaMetadataReader.java Tue Sep 29 13:13:41 2015 @@ -0,0 +1,30 @@ +package org.hps.crawler; + +import java.io.File; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +/** + * This is a metadata reader for ROOT DQM files. + * <p> + * It currently only gets the run number from the file name. + * + * @author Jeremy McCormick, SLAC + */ +public class AidaMetadataReader implements FileMetadataReader { + + /** + * Get the metadata for a ROOT DQM file. + * + * @return the metadata for a ROOT DQM file + */ + @Override + public Map<String, Object> getMetadata(final File file) throws IOException { + final Map<String, Object> metadata = new HashMap<String, Object>(); + final int run = CrawlerFileUtilities.getRunFromFileName(file); + metadata.put("runMin", run); + metadata.put("runMax", run); + return metadata; + } +} Modified: java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerConfig.java ============================================================================= --- java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerConfig.java (original) +++ java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerConfig.java Tue Sep 29 13:13:41 2015 @@ -108,7 +108,7 @@ this.formats.add(format); return this; } - + /** * Get the database connection parameters. * @@ -162,7 +162,7 @@ Integer maxDepth() { return maxDepth; } - + /** * Get the root directory for the file search. * Modified: java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerFileUtilities.java ============================================================================= --- java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerFileUtilities.java (original) +++ java/trunk/crawler/src/main/java/org/hps/crawler/CrawlerFileUtilities.java Tue Sep 29 13:13:41 2015 @@ -2,14 +2,21 @@ import java.io.File; +/** + * File utilities for crawler. + * + * @author Jeremy McCormick, SLAC + */ public class CrawlerFileUtilities { - - static boolean isHpsFile(File file) { - return file.getName().startsWith("hps"); - } - - static int getRunFromFileName(File file) { - String name = file.getName(); - return Integer.parseInt(name.substring(4, 8)); + + /** + * Get run number from file name assuming it looks like "hps_001234". + * + * @param file the file + * @return the run number + */ + static int getRunFromFileName(final File file) { + final String name = file.getName(); + return Integer.parseInt(name.substring(4, 10)); } } Modified: java/trunk/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java ============================================================================= --- java/trunk/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java (original) +++ java/trunk/crawler/src/main/java/org/hps/crawler/DatacatCrawler.java Tue Sep 29 13:13:41 2015 @@ -152,7 +152,7 @@ /** * Main method. - * + * * @param args the command line arguments */ public static void main(final String[] args) { @@ -309,9 +309,9 @@ // List of run numbers. if (cl.hasOption("r")) { - Set<Integer> acceptRuns = new HashSet<Integer>(); - for (String arg : cl.getOptionValues("r")) { - acceptRuns.add(Integer.parseInt(arg)); + final Set<Integer> acceptRuns = new HashSet<Integer>(); + for (final String arg : cl.getOptionValues("r")) { + acceptRuns.add(Integer.parseInt(arg)); } config.setAcceptRuns(acceptRuns); } Modified: java/trunk/crawler/src/main/java/org/hps/crawler/DatacatUtilities.java ============================================================================= --- java/trunk/crawler/src/main/java/org/hps/crawler/DatacatUtilities.java (original) +++ java/trunk/crawler/src/main/java/org/hps/crawler/DatacatUtilities.java Tue Sep 29 13:13:41 2015 @@ -17,6 +17,9 @@ */ class DatacatUtilities { + /** + * Static map of strings to dataset file formats. + */ static Map<String, DatasetFileFormat> formatMap = new HashMap<String, DatasetFileFormat>(); static { for (final DatasetFileFormat format : DatasetFileFormat.values()) { @@ -24,6 +27,14 @@ } } + /** + * Add a file to the data catalog. + * + * @param datacatClient the data catalog client + * @param folder the target folder in the data catalog + * @param file the file with the full path + * @param metadata the file's meta data + */ static void addFile(final DatacatClient datacatClient, final String folder, final File file, final Map<String, Object> metadata) { final DatasetFileFormat fileFormat = DatacatUtilities.getFileFormat(file); @@ -52,12 +63,19 @@ return response; } + /** + * Create metadata for a file. + * + * @param file the file + * @return the metadata for the file + */ static Map<String, Object> createMetadata(final File file) { final DatasetFileFormat fileFormat = DatacatUtilities.getFileFormat(file); final DatasetDataType dataType = DatacatUtilities.getDataType(file); final FileMetadataReader reader = DatacatUtilities.getFileMetaDataReader(fileFormat, dataType); if (reader == null) { - throw new RuntimeException("No metadata reader found for format " + fileFormat.name() + " and type " + dataType.name() + "."); + throw new RuntimeException("No metadata reader found for format " + fileFormat.name() + " and type " + + dataType.name() + "."); } Map<String, Object> metadata; try { @@ -68,6 +86,12 @@ return metadata; } + /** + * Get the data type for a file. + * + * @param file the file + * @return the file's data type + */ static DatasetDataType getDataType(final File file) { final DatasetFileFormat fileFormat = getFileFormat(file); DatasetDataType dataType = null; @@ -79,7 +103,6 @@ } else if (fileFormat.equals(DatasetFileFormat.LCIO)) { dataType = DatasetDataType.RECON; } else if (fileFormat.equals(DatasetFileFormat.ROOT)) { - // FIXME: This should probably open the file and determine what it contains. if (file.getName().contains("_dqm")) { dataType = DatasetDataType.DQM; } else if (file.getName().contains("_dst")) { @@ -94,6 +117,12 @@ return dataType; } + /** + * Get the file format of a file. + * + * @param pathname the file + * @return the file format of the file + */ static DatasetFileFormat getFileFormat(final File pathname) { String name = pathname.getName(); if (name.contains(DatasetFileFormat.EVIO.extension()) && !name.endsWith(DatasetFileFormat.EVIO.extension())) { @@ -103,6 +132,13 @@ return formatMap.get(extension); } + /** + * Get a metadata reader for a given combination of file format and data type. + * + * @param fileFormat the file format + * @param dataType the data type + * @return the file metadata reader + */ static FileMetadataReader getFileMetaDataReader(final DatasetFileFormat fileFormat, final DatasetDataType dataType) { FileMetadataReader reader = null; if (fileFormat.equals(DatasetFileFormat.LCIO)) { @@ -111,19 +147,20 @@ reader = new EvioMetadataReader(); } else if (fileFormat.equals(DatasetFileFormat.ROOT) && dataType.equals(DatasetDataType.DST)) { reader = new RootDstMetadataReader(); + } else if (fileFormat.equals(DatasetFileFormat.ROOT) && dataType.equals(DatasetDataType.DQM)) { + reader = new RootDqmMetadataReader(); + } else if (fileFormat.equals(DatasetFileFormat.AIDA)) { + reader = new AidaMetadataReader(); } return reader; } - static String getFolder(final String rootDir, final File file) { - String stripDir = rootDir; - if (!stripDir.endsWith("/")) { - stripDir += "/"; - } - final String folder = file.getParentFile().getPath().replace(stripDir, ""); - return folder; - } - + /** + * Strip the file number from an EVIO file name. + * + * @param name the EVIO file name + * @return the file name stripped of the file number + */ static String stripEvioFileNumber(final String name) { String strippedName = name; if (!name.endsWith(DatasetFileFormat.EVIO.extension())) { Modified: java/trunk/crawler/src/main/java/org/hps/crawler/EvioMetadataReader.java ============================================================================= --- java/trunk/crawler/src/main/java/org/hps/crawler/EvioMetadataReader.java (original) +++ java/trunk/crawler/src/main/java/org/hps/crawler/EvioMetadataReader.java Tue Sep 29 13:13:41 2015 @@ -16,25 +16,28 @@ import org.jlab.coda.jevio.EvioReader; import org.lcsim.util.log.DefaultLogFormatter; import org.lcsim.util.log.LogUtil; - + /** * Reads metadata from EVIO files. - * + * * @author Jeremy McCormick, SLAC */ public class EvioMetadataReader implements FileMetadataReader { - + + /** + * Class logger. + */ private static Logger LOGGER = LogUtil.create(EvioMetadataReader.class, new DefaultLogFormatter(), Level.ALL); - + /** * Get the EVIO file metadata. - * + * * @param file the EVIO file * @return the metadata map of key and value pairs */ @Override - public Map<String, Object> getMetadata(File file) throws IOException { - + public Map<String, Object> getMetadata(final File file) throws IOException { + Date startDate = null; Date endDate = null; int badEventCount = 0; @@ -47,18 +50,18 @@ Integer endEvent = null; Integer startEvent = null; Long lastTimestamp = null; - + EvioReader evioReader = null; try { evioReader = EvioFileUtilities.open(file, false); - } catch (EvioException e) { + } catch (final EvioException e) { throw new IOException(e); } - - int fileNumber = EvioFileUtilities.getSequenceFromName(file); - + + final int fileNumber = EvioFileUtilities.getSequenceFromName(file); + EvioEvent evioEvent = null; - + while (true) { try { evioEvent = evioReader.parseNextEvent(); @@ -114,22 +117,22 @@ } ++eventCount; } - } - + } + // Set end date from last valid timestamp. if (endDate == null) { endDate = new Date(lastTimestamp); LOGGER.info("set end date to " + endDate + " from last timestamp " + lastTimestamp); } - + // Set end event number. if (eventIdData != null) { endEvent = eventIdData[0]; LOGGER.info("set end event " + endEvent); } - - Map<String, Object> metaDataMap = new HashMap<String, Object>(); - + + final Map<String, Object> metaDataMap = new HashMap<String, Object>(); + metaDataMap.put("runMin", run); metaDataMap.put("runMax", run); metaDataMap.put("eventCount", eventCount); @@ -142,7 +145,7 @@ metaDataMap.put("endEvent", endEvent); metaDataMap.put("hasEnd", hasEnd ? 1 : 0); metaDataMap.put("hasPrestart", hasPrestart ? 1 : 0); - + return metaDataMap; - } + } } Modified: java/trunk/crawler/src/main/java/org/hps/crawler/FileFormatFilter.java ============================================================================= --- java/trunk/crawler/src/main/java/org/hps/crawler/FileFormatFilter.java (original) +++ java/trunk/crawler/src/main/java/org/hps/crawler/FileFormatFilter.java Tue Sep 29 13:13:41 2015 @@ -14,7 +14,7 @@ * Filter files on their format. * <p> * Only files matching the format will be accepted by the file visitor. - * + * * @author Jeremy McCormick, SLAC */ public class FileFormatFilter implements FileFilter { @@ -23,18 +23,18 @@ * Setup logger. */ private static final Logger LOGGER = LogUtil.create(FileFormatFilter.class, new DefaultLogFormatter(), Level.ALL); - + /** * The file format. */ - private Set<DatasetFileFormat> formats; - + private final Set<DatasetFileFormat> formats; + /** * Create a new filter with the given format. - * + * * @param format the file format */ - FileFormatFilter(Set<DatasetFileFormat> formats) { + FileFormatFilter(final Set<DatasetFileFormat> formats) { if (formats == null) { throw new IllegalArgumentException("The formats collection is null."); } @@ -43,18 +43,18 @@ } this.formats = formats; } - + /** * Returns <code>true</code> if the file should be accepted, e.g. it matches the filer's format. - * + * * @param pathname the file's full path */ @Override - public boolean accept(File pathname) { + public boolean accept(final File pathname) { LOGGER.info(pathname.getPath()); - DatasetFileFormat fileFormat = DatacatUtilities.getFileFormat(pathname); + final DatasetFileFormat fileFormat = DatacatUtilities.getFileFormat(pathname); if (fileFormat != null) { - LOGGER.info("file " + pathname.getPath() + " has format " + fileFormat.name()); + LOGGER.info("file " + pathname.getPath() + " has format " + fileFormat.name()); return formats.contains(fileFormat); } else { LOGGER.info("rejected file " + pathname.getPath() + " with unknown format"); Added: java/trunk/crawler/src/main/java/org/hps/crawler/RootDqmMetadataReader.java ============================================================================= --- java/trunk/crawler/src/main/java/org/hps/crawler/RootDqmMetadataReader.java (added) +++ java/trunk/crawler/src/main/java/org/hps/crawler/RootDqmMetadataReader.java Tue Sep 29 13:13:41 2015 @@ -0,0 +1,30 @@ +package org.hps.crawler; + +import java.io.File; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +/** + * This is a metadata reader for ROOT DQM files. + * <p> + * It currently only gets the run number from the file name. + * + * @author Jeremy McCormick, SLAC + */ +public class RootDqmMetadataReader implements FileMetadataReader { + + /** + * Get the metadata for a ROOT DQM file. + * + * @return the metadata for a ROOT DQM file + */ + @Override + public Map<String, Object> getMetadata(final File file) throws IOException { + final Map<String, Object> metadata = new HashMap<String, Object>(); + final int run = CrawlerFileUtilities.getRunFromFileName(file); + metadata.put("runMin", run); + metadata.put("runMax", run); + return metadata; + } +} Modified: java/trunk/crawler/src/main/java/org/hps/crawler/RootDstMetadataReader.java ============================================================================= --- java/trunk/crawler/src/main/java/org/hps/crawler/RootDstMetadataReader.java (original) +++ java/trunk/crawler/src/main/java/org/hps/crawler/RootDstMetadataReader.java Tue Sep 29 13:13:41 2015 @@ -2,7 +2,6 @@ import hep.io.root.RootClassNotFound; import hep.io.root.RootFileReader; -import hep.io.root.interfaces.TBranch; import hep.io.root.interfaces.TLeafElement; import hep.io.root.interfaces.TObjArray; import hep.io.root.interfaces.TTree; @@ -16,33 +15,34 @@ * This is a very simple metadata reader for ROOT DST files. * <p> * It currently only sets the standard metadata for event count and run number. - * + * * @author Jeremy McCormick, SLAC */ public class RootDstMetadataReader implements FileMetadataReader { /** * Get the metadata for a ROOT DST file. - * + * * @return the metadata for a ROOT DST file */ - public Map<String, Object> getMetadata(File file) throws IOException { - Map<String, Object> metadata = new HashMap<String, Object>(); + @Override + public Map<String, Object> getMetadata(final File file) throws IOException { + final Map<String, Object> metadata = new HashMap<String, Object>(); RootFileReader rootReader = null; - long eventCount = 0; + long eventCount = 0; int runMin = 0; int runMax = 0; long size = 0; try { rootReader = new RootFileReader(file.getAbsolutePath()); - TTree tree = (TTree) rootReader.get("HPS_Event"); - //TBranch branch = tree.getBranch("Event"); + final TTree tree = (TTree) rootReader.get("HPS_Event"); + // TBranch branch = tree.getBranch("Event"); eventCount = tree.getEntries(); size = tree.getTotBytes(); - TObjArray leaves = tree.getLeaves(); - - for (Object object : leaves) { - TLeafElement leaf = (TLeafElement) object; + final TObjArray leaves = tree.getLeaves(); + + for (final Object object : leaves) { + final TLeafElement leaf = (TLeafElement) object; if ("run_number".equals(leaf.getName())) { runMin = (int) leaf.getWrappedValue(0); runMax = (int) leaf.getWrappedValue(0); @@ -55,11 +55,11 @@ if (rootReader != null) { rootReader.close(); } - } + } metadata.put("eventCount", eventCount); metadata.put("runMin", runMin); metadata.put("runMax", runMax); metadata.put("size", size); return metadata; - } + } }