use of org.broadinstitute.hellbender.utils.tsv.DataLine in project gatk by broadinstitute.
the class CombineReadCounts method readCountFileReader.
/**
* Creates a read-count file reader given the input files and the expected target collection.
* @param file the input file.
* @param targets the expected targets in the input file.
* @return never {@code null}.
*/
private TableReader<ReadCountRecord> readCountFileReader(final File file, final TargetCollection<Target> targets) {
try {
return new TableReader<ReadCountRecord>(file) {
private boolean hasName;
private boolean hasCoordinates;
private int countColumnCount;
private int[] countColumnIndexes;
@Override
public void processColumns(final TableColumnCollection columns) {
hasCoordinates = columns.containsAll(TargetTableColumn.CONTIG.toString(), TargetTableColumn.START.toString(), TargetTableColumn.END.toString());
hasName = columns.contains(TargetTableColumn.NAME.toString());
if (!hasCoordinates && !hasName) {
throw formatException("header contain neither coordinates nor target name columns");
}
final List<String> countColumnNames = readCountColumnNames(columns);
countColumnCount = countColumnNames.size();
countColumnIndexes = new int[countColumnCount];
for (int i = 0; i < countColumnCount; i++) {
countColumnIndexes[i] = columns.indexOf(countColumnNames.get(i));
}
}
@Override
protected ReadCountRecord createRecord(final DataLine dataLine) {
final double[] counts = new double[countColumnCount];
final Target target = createTarget(dataLine);
for (int i = 0; i < counts.length; i++) {
counts[i] = dataLine.getDouble(countColumnIndexes[i]);
}
return new ReadCountRecord(target, counts);
}
/**
* Extracts the target object out of a data input line.
* @param dataLine the input data line.
* @return never {@code null}.
*/
private Target createTarget(final DataLine dataLine) {
if (hasName) {
final String name = dataLine.get(TargetTableColumn.NAME);
final Target target = targets.target(name);
final SimpleInterval interval = createInterval(dataLine);
if (target == null) {
return new Target(name, createInterval(dataLine));
} else if (interval != null && !interval.equals(target.getInterval())) {
throw new UserException.BadInput(String.format("invalid target '%s' coordinates: expected %s but found %s", name, target.getInterval(), createInterval(dataLine)));
} else {
return target;
}
} else {
// hasCoordinates must be true.
final SimpleInterval interval = createInterval(dataLine);
final Optional<Target> target = targets.targets(interval).stream().findAny();
if (!target.isPresent() || !target.get().getInterval().equals(interval)) {
throw formatException("target not found with coordinates " + interval);
}
return target.get();
}
}
/**
* Extract the interval out of a data line.
* @param dataLine the input data line.
* @return {@code null} if the interval cannot be determined from the input file alone.
*/
private SimpleInterval createInterval(final DataLine dataLine) {
if (hasCoordinates) {
return new SimpleInterval(dataLine.get(TargetTableColumn.CONTIG), dataLine.getInt(TargetTableColumn.START), dataLine.getInt(TargetTableColumn.END));
} else {
return null;
}
}
};
} catch (final IOException ex) {
throw new UserException.CouldNotReadInputFile(file, ex);
}
}
use of org.broadinstitute.hellbender.utils.tsv.DataLine in project gatk by broadinstitute.
the class ReadCountCollectionUtils method createReadCountRecordTableWriterWithoutIntervals.
private static TableWriter<ReadCountRecord> createReadCountRecordTableWriterWithoutIntervals(final Writer writer, List<String> columnNames) throws IOException {
final TableColumnCollection columns = new TableColumnCollection(columnNames);
return new TableWriter<ReadCountRecord>(writer, columns) {
@Override
protected void composeLine(final ReadCountRecord record, final DataLine dataLine) {
dataLine.append(record.getTarget().getName());
record.appendCountsTo(dataLine);
}
};
}
use of org.broadinstitute.hellbender.utils.tsv.DataLine in project gatk-protected by broadinstitute.
the class ReadCountCollectionUtils method writerWithIntervals.
/**
* Creates a new table writer that will output the target intervals.
* @param writer where to output the table formatted content.
* @param countColumnNames list of count column names.
* @return never {@code null}.
* @throws IOException if there is some low level IO problem creating the writer.
* @throws IllegalArgumentException if {@code countColumnNames} is {@code null}, contains
* {@code null} or a non valid count column name (e.g. a reserved word).
*/
public static TableWriter<ReadCountRecord> writerWithIntervals(final Writer writer, final List<String> countColumnNames) throws IOException {
final List<String> columnNames = new ArrayList<>();
columnNames.add(TargetTableColumn.CONTIG.toString());
columnNames.add(TargetTableColumn.START.toString());
columnNames.add(TargetTableColumn.END.toString());
columnNames.add(TargetTableColumn.NAME.toString());
columnNames.addAll(Utils.nonNull(countColumnNames));
final TableColumnCollection columns = new TableColumnCollection(columnNames);
return new TableWriter<ReadCountRecord>(writer, columns) {
@Override
protected void composeLine(final ReadCountRecord record, final DataLine dataLine) {
final SimpleInterval interval = record.getTarget().getInterval();
if (interval == null) {
throw new IllegalStateException("invalid combination of targets with and without intervals defined");
}
dataLine.append(interval.getContig()).append(interval.getStart()).append(interval.getEnd()).append(record.getTarget().getName());
record.appendCountsTo(dataLine);
}
};
}
use of org.broadinstitute.hellbender.utils.tsv.DataLine in project gatk-protected by broadinstitute.
the class ReadCountCollectionUtils method createReadCountRecordTableWriterWithoutIntervals.
private static TableWriter<ReadCountRecord> createReadCountRecordTableWriterWithoutIntervals(final Writer writer, List<String> columnNames) throws IOException {
final TableColumnCollection columns = new TableColumnCollection(columnNames);
return new TableWriter<ReadCountRecord>(writer, columns) {
@Override
protected void composeLine(final ReadCountRecord record, final DataLine dataLine) {
dataLine.append(record.getTarget().getName());
record.appendCountsTo(dataLine);
}
};
}
use of org.broadinstitute.hellbender.utils.tsv.DataLine in project gatk-protected by broadinstitute.
the class XHMMSegmentCallerBaseIntegrationTest method writeChainInTempFile.
public static File writeChainInTempFile(final XHMMData chain) {
final File result = createTempFile("chain-", ".tab");
//final File result = new File("/tmp/input");
final List<String> sampleNames = IntStream.range(0, chain.data.size()).mapToObj(a -> "SAMPLE_" + a).collect(Collectors.toList());
final List<String> columnNames = new ArrayList<>(sampleNames.size() + 4);
columnNames.addAll(Arrays.asList(TargetTableColumn.CONTIG.toString(), TargetTableColumn.START.toString(), TargetTableColumn.END.toString(), TargetTableColumn.NAME.toString()));
columnNames.addAll(sampleNames);
try (final TableWriter<Integer> writer = new TableWriter<Integer>(result, new TableColumnCollection(columnNames)) {
@Override
protected void composeLine(final Integer record, final DataLine dataLine) {
dataLine.append(chain.targets.get(record).getContig()).append(chain.targets.get(record).getStart()).append(chain.targets.get(record).getEnd()).append(chain.targets.get(record).getName());
for (final List<Double> sampleData : chain.data) {
dataLine.append(sampleData.get(record));
}
}
}) {
writer.writeAllRecords(IntStream.range(0, chain.targets.size()).boxed().collect(Collectors.toList()));
} catch (final IOException ex) {
throw new UncheckedIOException(ex);
}
return result;
}
Aggregations