Print

Print


Author: [log in to unmask]
Date: Wed May 27 19:03:58 2015
New Revision: 3046

Log:
Add class for extracting run information from spreadsheet export.

Added:
    java/trunk/conditions/src/main/java/org/hps/conditions/run/RunRange.java

Added: java/trunk/conditions/src/main/java/org/hps/conditions/run/RunRange.java
 =============================================================================
--- java/trunk/conditions/src/main/java/org/hps/conditions/run/RunRange.java	(added)
+++ java/trunk/conditions/src/main/java/org/hps/conditions/run/RunRange.java	Wed May 27 19:03:58 2015
@@ -0,0 +1,270 @@
+package org.hps.conditions.run;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.commons.csv.CSVRecord;
+
+/**
+ * Used with the {@link RunSpreadsheet} to find ranges of runs where columns have the same values so they can be assigned
+ * a conditions record with a run start and end range.
+ * <p>
+ * Bad rows such as ones without run numbers or with invalid data values are skipped and not included in a range.
+ * 
+ * @author Jeremy McCormick
+ */
+public final class RunRange {
+
+    /**
+     * Find run ranges for conditions data given a set of column names and the full run spreadsheet. 
+     * 
+     * @param runSheet the run spreadsheet data (from CSV file)
+     * @param columnNames the names of the columns
+     * @return the list of run ranges
+     */
+    public static final List<RunRange> findRunRanges(final RunSpreadsheet runSheet, final Set<String> columnNames) {
+        
+        final List<RunRange> ranges = new ArrayList<RunRange>();
+
+        final Iterator<CSVRecord> it = runSheet.getRecords().iterator();
+        CSVRecord record = it.next();
+
+        RunRange range = null;
+
+        // Iterate over all the records.
+        while (it.hasNext()) {
+            record = it.next();
+            // Is the record valid?
+            if (isValidRecord(record, columnNames)) {
+                if (range == null) {
+                    // Create new range for the valid row. 
+                    range = new RunRange(columnNames);
+                } else {                    
+                    // If this record is not in the range then add a new range.
+                    if (!range.inRange(record)) {
+                        // Add the current range and create a new one.
+                        ranges.add(range);
+                        range = new RunRange(columnNames);
+                    }
+                }                
+                
+                // Update the range from the current record.
+                range.update(record);
+            } else {
+                if (range != null) {
+                    // Add the range and set to null as this record is invalid.
+                    ranges.add(range);
+                    range = null;
+                }
+            }
+        }
+
+        return ranges;
+    }
+    
+    /**
+     * Return <code>true</code> if the <code>CSVRecord</code> is valid, which means it has a run number and 
+     * data in the columns used by this range.
+     * 
+     * @param record the <code>CSVRecord</code> to check
+     * @param columnNames the names of the columns
+     * @return <code>true</code> if record is valid
+     */
+    private static boolean isValidRecord(final CSVRecord record, final Set<String> columnNames) {
+        try {
+            // Check if run number can be parsed.
+            Integer.parseInt(record.get("run"));
+        } catch (final NumberFormatException e) {
+            // Run number is bad.
+            return false;
+        }
+        for (final String columnName : columnNames) {
+            // Check that required column data is not null, blank, or empty string.
+            if (record.get(columnName) == null || "".equals(record.get(columnName)) || record.get(columnName).length() == 0) {                
+                return false;
+            }
+        }
+        return true;
+    }
+
+    /**
+     * The names of the columns.
+     */
+    private final Set<String> columnNames;
+
+    /**
+     * The start of the run range.
+     */
+    private int runEnd = Integer.MIN_VALUE;
+
+    /**
+     * The end of the run range.
+     */
+    private int runStart = Integer.MAX_VALUE;
+
+    /**
+     * The mapping of column names to values.
+     */
+    private Map<String, String> values = new LinkedHashMap<String, String>();
+
+    /**
+     * Create a new range.
+     * 
+     * @param columnNames the names of the columns
+     */
+    RunRange(final Set<String> columnNames) {
+        if (columnNames == null) {
+            throw new IllegalArgumentException("columnNames is null");
+        }
+        this.columnNames = columnNames;
+    }
+
+    /**
+     * Get the last run number in the range.
+     * 
+     * @return the last run number in the range
+     */
+    public int getRunEnd() {
+        return runEnd;
+    }
+
+    /**
+     * Get the first run number in the range.
+     * 
+     * @return the first run number in the range
+     */
+    public int getRunStart() {
+        return runStart;
+    }
+    
+    /**
+     * Get get value of a field by column name.
+     * 
+     * @param columnName the column name
+     * @return the value of the field
+     */
+    public String getValue(String columnName) {
+        return this.values.get(columnName);
+    }
+    
+    /**
+     * Get the raw values of the data.
+     * 
+     * @return the raw data values
+     */
+    public Collection<String> getValues() {
+        return this.values.values();
+    }
+    
+    /**
+     * Get the names of the columns used by this range.
+     * 
+     * @return the names of the columns
+     */
+    public Set<String> getColumnNames() {
+        return this.columnNames;
+    }
+
+    /**
+     * Return <code>true</code> if the record is in the range, e.g. its data values are the same.    
+     * 
+     * @param record the <code>CSVRecord</code> containing the run data
+     * @return <code>true</code> if the the record is in range
+     */
+    private boolean inRange(final CSVRecord record) {
+        for (final String columnName : columnNames) {
+            if (!record.get(columnName).equals(this.values.get(columnName))) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    /**
+     * Update the range from a record.     
+     * @param record the <code>CSVRecord</code> with the run's data
+     */
+    private void update(final CSVRecord record) {                
+        final int run = Integer.parseInt(record.get("run"));
+        if (run < runStart) {
+            this.runStart = run;
+        }
+        if (run > runEnd) {
+            this.runEnd = run;
+        }
+        if (values.size() == 0) {
+            for (final String columnName : columnNames) {
+                this.values.put(columnName, record.get(columnName));
+            }
+        }
+    }
+    
+    /**
+     * Convert this object to a string.
+     * 
+     * @return this object converted to a string
+     */
+    public String toString() {
+        StringBuffer sb = new StringBuffer();
+        sb.append("RunRange { ");
+        sb.append("runStart: " + this.runStart + ", ");
+        sb.append("runEnd: " + this.runEnd + ", ");
+        for (String columnName : columnNames) {
+            sb.append(columnName + ": " + values.get(columnName) + ", " );
+        }
+        sb.setLength(sb.length() - 2);
+        sb.append(" }");
+        return sb.toString();
+    }
+    
+    /**
+     * Get the list of unique values from a set of run ranges.
+     * 
+     * @param ranges the run ranges
+     * @return the list of unique values from the field values
+     */
+    public static List<Collection<String>> getUniqueValues(List<RunRange> ranges) {
+        Iterator<RunRange> it = ranges.iterator();        
+        List<Collection<String>> uniqueValuesList = new ArrayList<Collection<String>>();
+        while(it.hasNext()) {        
+            RunRange range = it.next();
+            Collection<String> values = range.getValues();
+            if (!contains(values, uniqueValuesList)) {
+                uniqueValuesList.add(values);
+            }           
+        }
+        return uniqueValuesList;
+    }
+    
+    /**
+     * Return <code>true</code> if the values are already in the unique values list.
+     * 
+     * @param values the list of field values for a row
+     * @param uniqueValuesList the unique values list
+     * @return <code>true</code> if the values are already in the unique values list
+     */
+    private static boolean contains(Collection<String> values, List<Collection<String>> uniqueValuesList) {
+        for (Collection<String> uniqueValues : uniqueValuesList) {
+            Iterator<String> valuesIterator = values.iterator();
+            Iterator<String> uniqueValuesIterator = uniqueValues.iterator();
+            boolean equals = true;
+            while (valuesIterator.hasNext() && uniqueValuesIterator.hasNext()) {
+                String value = valuesIterator.next(); 
+                String uniqueValue = uniqueValuesIterator.next();
+                if (!value.equals(uniqueValue)) {
+                    equals = false;
+                    break;
+                }
+            }
+            if (equals) {
+                return true;
+            }
+        }
+        return false;
+    }
+}