Author: [log in to unmask] Date: Wed May 27 19:03:58 2015 New Revision: 3046 Log: Add class for extracting run information from spreadsheet export. Added: java/trunk/conditions/src/main/java/org/hps/conditions/run/RunRange.java Added: java/trunk/conditions/src/main/java/org/hps/conditions/run/RunRange.java ============================================================================= --- java/trunk/conditions/src/main/java/org/hps/conditions/run/RunRange.java (added) +++ java/trunk/conditions/src/main/java/org/hps/conditions/run/RunRange.java Wed May 27 19:03:58 2015 @@ -0,0 +1,270 @@ +package org.hps.conditions.run; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.commons.csv.CSVRecord; + +/** + * Used with the {@link RunSpreadsheet} to find ranges of runs where columns have the same values so they can be assigned + * a conditions record with a run start and end range. + * <p> + * Bad rows such as ones without run numbers or with invalid data values are skipped and not included in a range. + * + * @author Jeremy McCormick + */ +public final class RunRange { + + /** + * Find run ranges for conditions data given a set of column names and the full run spreadsheet. + * + * @param runSheet the run spreadsheet data (from CSV file) + * @param columnNames the names of the columns + * @return the list of run ranges + */ + public static final List<RunRange> findRunRanges(final RunSpreadsheet runSheet, final Set<String> columnNames) { + + final List<RunRange> ranges = new ArrayList<RunRange>(); + + final Iterator<CSVRecord> it = runSheet.getRecords().iterator(); + CSVRecord record = it.next(); + + RunRange range = null; + + // Iterate over all the records. + while (it.hasNext()) { + record = it.next(); + // Is the record valid? + if (isValidRecord(record, columnNames)) { + if (range == null) { + // Create new range for the valid row. + range = new RunRange(columnNames); + } else { + // If this record is not in the range then add a new range. + if (!range.inRange(record)) { + // Add the current range and create a new one. + ranges.add(range); + range = new RunRange(columnNames); + } + } + + // Update the range from the current record. + range.update(record); + } else { + if (range != null) { + // Add the range and set to null as this record is invalid. + ranges.add(range); + range = null; + } + } + } + + return ranges; + } + + /** + * Return <code>true</code> if the <code>CSVRecord</code> is valid, which means it has a run number and + * data in the columns used by this range. + * + * @param record the <code>CSVRecord</code> to check + * @param columnNames the names of the columns + * @return <code>true</code> if record is valid + */ + private static boolean isValidRecord(final CSVRecord record, final Set<String> columnNames) { + try { + // Check if run number can be parsed. + Integer.parseInt(record.get("run")); + } catch (final NumberFormatException e) { + // Run number is bad. + return false; + } + for (final String columnName : columnNames) { + // Check that required column data is not null, blank, or empty string. + if (record.get(columnName) == null || "".equals(record.get(columnName)) || record.get(columnName).length() == 0) { + return false; + } + } + return true; + } + + /** + * The names of the columns. + */ + private final Set<String> columnNames; + + /** + * The start of the run range. + */ + private int runEnd = Integer.MIN_VALUE; + + /** + * The end of the run range. + */ + private int runStart = Integer.MAX_VALUE; + + /** + * The mapping of column names to values. + */ + private Map<String, String> values = new LinkedHashMap<String, String>(); + + /** + * Create a new range. + * + * @param columnNames the names of the columns + */ + RunRange(final Set<String> columnNames) { + if (columnNames == null) { + throw new IllegalArgumentException("columnNames is null"); + } + this.columnNames = columnNames; + } + + /** + * Get the last run number in the range. + * + * @return the last run number in the range + */ + public int getRunEnd() { + return runEnd; + } + + /** + * Get the first run number in the range. + * + * @return the first run number in the range + */ + public int getRunStart() { + return runStart; + } + + /** + * Get get value of a field by column name. + * + * @param columnName the column name + * @return the value of the field + */ + public String getValue(String columnName) { + return this.values.get(columnName); + } + + /** + * Get the raw values of the data. + * + * @return the raw data values + */ + public Collection<String> getValues() { + return this.values.values(); + } + + /** + * Get the names of the columns used by this range. + * + * @return the names of the columns + */ + public Set<String> getColumnNames() { + return this.columnNames; + } + + /** + * Return <code>true</code> if the record is in the range, e.g. its data values are the same. + * + * @param record the <code>CSVRecord</code> containing the run data + * @return <code>true</code> if the the record is in range + */ + private boolean inRange(final CSVRecord record) { + for (final String columnName : columnNames) { + if (!record.get(columnName).equals(this.values.get(columnName))) { + return false; + } + } + return true; + } + + /** + * Update the range from a record. + * @param record the <code>CSVRecord</code> with the run's data + */ + private void update(final CSVRecord record) { + final int run = Integer.parseInt(record.get("run")); + if (run < runStart) { + this.runStart = run; + } + if (run > runEnd) { + this.runEnd = run; + } + if (values.size() == 0) { + for (final String columnName : columnNames) { + this.values.put(columnName, record.get(columnName)); + } + } + } + + /** + * Convert this object to a string. + * + * @return this object converted to a string + */ + public String toString() { + StringBuffer sb = new StringBuffer(); + sb.append("RunRange { "); + sb.append("runStart: " + this.runStart + ", "); + sb.append("runEnd: " + this.runEnd + ", "); + for (String columnName : columnNames) { + sb.append(columnName + ": " + values.get(columnName) + ", " ); + } + sb.setLength(sb.length() - 2); + sb.append(" }"); + return sb.toString(); + } + + /** + * Get the list of unique values from a set of run ranges. + * + * @param ranges the run ranges + * @return the list of unique values from the field values + */ + public static List<Collection<String>> getUniqueValues(List<RunRange> ranges) { + Iterator<RunRange> it = ranges.iterator(); + List<Collection<String>> uniqueValuesList = new ArrayList<Collection<String>>(); + while(it.hasNext()) { + RunRange range = it.next(); + Collection<String> values = range.getValues(); + if (!contains(values, uniqueValuesList)) { + uniqueValuesList.add(values); + } + } + return uniqueValuesList; + } + + /** + * Return <code>true</code> if the values are already in the unique values list. + * + * @param values the list of field values for a row + * @param uniqueValuesList the unique values list + * @return <code>true</code> if the values are already in the unique values list + */ + private static boolean contains(Collection<String> values, List<Collection<String>> uniqueValuesList) { + for (Collection<String> uniqueValues : uniqueValuesList) { + Iterator<String> valuesIterator = values.iterator(); + Iterator<String> uniqueValuesIterator = uniqueValues.iterator(); + boolean equals = true; + while (valuesIterator.hasNext() && uniqueValuesIterator.hasNext()) { + String value = valuesIterator.next(); + String uniqueValue = uniqueValuesIterator.next(); + if (!value.equals(uniqueValue)) { + equals = false; + break; + } + } + if (equals) { + return true; + } + } + return false; + } +}