use of htsjdk.samtools.util.IntervalList in project gatk-protected by broadinstitute.
the class HetPulldownCalculatorUnitTest method inputGetTumorHetPulldown.
@DataProvider(name = "inputGetTumorHetPulldown")
public Object[][] inputGetTumorHetPulldown() {
final Pulldown tumorHetPulldown = new Pulldown(normalHeader);
tumorHetPulldown.add(new AllelicCount(new SimpleInterval("1", 11522, 11522), 7, 4));
tumorHetPulldown.add(new AllelicCount(new SimpleInterval("1", 12098, 12098), 8, 6));
tumorHetPulldown.add(new AllelicCount(new SimpleInterval("1", 14630, 14630), 9, 8));
tumorHetPulldown.add(new AllelicCount(new SimpleInterval("2", 14689, 14689), 6, 9));
tumorHetPulldown.add(new AllelicCount(new SimpleInterval("2", 14982, 14982), 6, 5));
final IntervalList normalHetIntervals = new IntervalList(tumorHeader);
normalHetIntervals.add(new Interval("1", 11522, 11522));
normalHetIntervals.add(new Interval("1", 12098, 12098));
normalHetIntervals.add(new Interval("1", 14630, 14630));
normalHetIntervals.add(new Interval("2", 14689, 14689));
normalHetIntervals.add(new Interval("2", 14982, 14982));
return new Object[][] { { normalHetIntervals, tumorHetPulldown } };
}
use of htsjdk.samtools.util.IntervalList in project gatk by broadinstitute.
the class CollectTargetedMetrics method doWork.
/**
* Asserts that files are readable and writable and then fires off an
* HsMetricsCalculator instance to do the real work.
*/
@Override
protected Object doWork() {
for (final File targetInterval : TARGET_INTERVALS) IOUtil.assertFileIsReadable(targetInterval);
IOUtil.assertFileIsReadable(INPUT);
IOUtil.assertFileIsWritable(OUTPUT);
if (PER_TARGET_COVERAGE != null)
IOUtil.assertFileIsWritable(PER_TARGET_COVERAGE);
final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT);
final IntervalList targetIntervals = IntervalList.fromFiles(TARGET_INTERVALS);
// Validate that the targets and baits have the same references as the reads file
SequenceUtil.assertSequenceDictionariesEqual(reader.getFileHeader().getSequenceDictionary(), targetIntervals.getHeader().getSequenceDictionary());
SequenceUtil.assertSequenceDictionariesEqual(reader.getFileHeader().getSequenceDictionary(), getProbeIntervals().getHeader().getSequenceDictionary());
ReferenceSequenceFile ref = null;
if (REFERENCE_SEQUENCE != null) {
IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE);
SequenceUtil.assertSequenceDictionariesEqual(reader.getFileHeader().getSequenceDictionary(), ref.getSequenceDictionary(), INPUT, REFERENCE_SEQUENCE);
}
final COLLECTOR collector = makeCollector(METRIC_ACCUMULATION_LEVEL, reader.getFileHeader().getReadGroups(), ref, PER_TARGET_COVERAGE, targetIntervals, getProbeIntervals(), getProbeSetName());
final ProgressLogger progress = new ProgressLogger(logger);
for (final SAMRecord record : reader) {
collector.acceptRecord(record, null);
progress.record(record);
}
// Write the output file
final MetricsFile<METRIC, Integer> metrics = getMetricsFile();
collector.finish();
collector.addAllLevelsToFile(metrics);
metrics.write(OUTPUT);
CloserUtil.close(reader);
return null;
}
use of htsjdk.samtools.util.IntervalList in project gatk by broadinstitute.
the class RnaSeqMetricsCollector method makeOverlapDetector.
public static OverlapDetector<Interval> makeOverlapDetector(final File samFile, final SAMFileHeader header, final File ribosomalIntervalsFile) {
OverlapDetector<Interval> ribosomalSequenceOverlapDetector = new OverlapDetector<>(0, 0);
if (ribosomalIntervalsFile != null) {
final IntervalList ribosomalIntervals = IntervalList.fromFile(ribosomalIntervalsFile);
try {
SequenceUtil.assertSequenceDictionariesEqual(header.getSequenceDictionary(), ribosomalIntervals.getHeader().getSequenceDictionary());
} catch (SequenceUtil.SequenceListsDifferException e) {
throw new UserException("Sequence dictionaries differ in " + samFile.getAbsolutePath() + " and " + ribosomalIntervalsFile.getAbsolutePath(), e);
}
final IntervalList uniquedRibosomalIntervals = ribosomalIntervals.uniqued();
final List<Interval> intervals = uniquedRibosomalIntervals.getIntervals();
ribosomalSequenceOverlapDetector.addAll(intervals, intervals);
}
return ribosomalSequenceOverlapDetector;
}
use of htsjdk.samtools.util.IntervalList in project gatk by broadinstitute.
the class IntervalListScatterer method scatter.
public List<IntervalList> scatter(final IntervalList sourceIntervalList, final int scatterCount, final boolean isUniqued) {
Utils.validateArg(scatterCount >= 1, "scatterCount < 1");
final IntervalList uniquedList = isUniqued ? sourceIntervalList : sourceIntervalList.uniqued();
final long idealSplitLength = deduceIdealSplitLength(uniquedList, scatterCount);
final List<IntervalList> accumulatedIntervalLists = new ArrayList<>();
IntervalList runningIntervalList = new IntervalList(uniquedList.getHeader());
final ArrayDeque<Interval> intervalQueue = new ArrayDeque<>(uniquedList.getIntervals());
while (!intervalQueue.isEmpty() && accumulatedIntervalLists.size() < scatterCount - 1) {
final Interval interval = intervalQueue.pollFirst();
final long projectedSize = runningIntervalList.getBaseCount() + interval.length();
if (projectedSize <= idealSplitLength) {
runningIntervalList.add(interval);
} else {
switch(mode) {
case INTERVAL_SUBDIVISION:
final int amountToConsume = (int) (idealSplitLength - runningIntervalList.getBaseCount());
final Interval left = new Interval(interval.getContig(), interval.getStart(), interval.getStart() + amountToConsume - 1, interval.isNegativeStrand(), interval.getName());
final Interval right = new Interval(interval.getContig(), interval.getStart() + amountToConsume, interval.getEnd(), interval.isNegativeStrand(), interval.getName());
runningIntervalList.add(left);
// Push back the excess back onto our queue for reconsideration.
intervalQueue.addFirst(right);
break;
case BALANCING_WITHOUT_INTERVAL_SUBDIVISION:
if (runningIntervalList.getIntervals().isEmpty()) {
runningIntervalList.add(interval);
} else {
// Push this interval into the next scatter; re-inject it into the queue, then advance the scatter.
intervalQueue.addFirst(interval);
accumulatedIntervalLists.add(runningIntervalList.uniqued());
runningIntervalList = new IntervalList(uniquedList.getHeader());
}
break;
}
}
if (runningIntervalList.getBaseCount() >= idealSplitLength) {
accumulatedIntervalLists.add(runningIntervalList.uniqued());
runningIntervalList = new IntervalList(uniquedList.getHeader());
}
}
// Flush the remaining intervals into the last split.
while (!intervalQueue.isEmpty()) {
runningIntervalList.add(intervalQueue.pollFirst());
}
if (!runningIntervalList.getIntervals().isEmpty()) {
accumulatedIntervalLists.add(runningIntervalList.uniqued());
}
return accumulatedIntervalLists;
}
use of htsjdk.samtools.util.IntervalList in project gatk by broadinstitute.
the class IntervalListTools method doWork.
@Override
protected Object doWork() {
// Check inputs
for (final File f : INPUT) IOUtil.assertFileIsReadable(f);
for (final File f : SECOND_INPUT) IOUtil.assertFileIsReadable(f);
if (OUTPUT != null) {
if (SCATTER_COUNT == 1) {
IOUtil.assertFileIsWritable(OUTPUT);
} else {
IOUtil.assertDirectoryIsWritable(OUTPUT);
}
}
// Read in the interval lists and apply any padding
final List<IntervalList> lists = openIntervalLists(INPUT);
// same for the second list
final List<IntervalList> secondLists = openIntervalLists(SECOND_INPUT);
if (UNIQUE && !SORT) {
LOG.warn("UNIQUE=true requires sorting but SORT=false was specified. Results will be sorted!");
}
final IntervalList result = ACTION.act(lists, secondLists);
if (SCATTER_COUNT > 1) {
// Scattering requires a uniqued, sorted interval list. We want to do this up front (before BREAKING AT BANDS)
SORT = true;
UNIQUE = true;
}
if (INVERT) {
// no need to sort, since return will be sorted by definition.
SORT = false;
UNIQUE = true;
}
final IntervalList possiblySortedResult = SORT ? result.sorted() : result;
final IntervalList possiblyInvertedResult = INVERT ? IntervalList.invert(possiblySortedResult) : possiblySortedResult;
//only get unique if this has been asked unless inverting (since the invert will return a unique list)
List<Interval> finalIntervals = UNIQUE ? possiblyInvertedResult.uniqued().getIntervals() : possiblyInvertedResult.getIntervals();
if (BREAK_BANDS_AT_MULTIPLES_OF > 0) {
finalIntervals = IntervalList.breakIntervalsAtBandMultiples(finalIntervals, BREAK_BANDS_AT_MULTIPLES_OF);
}
// Decide on a PG ID and make a program group
final SAMFileHeader header = result.getHeader();
final Set<String> pgs = new HashSet<>();
for (final SAMProgramRecord pg : header.getProgramRecords()) pgs.add(pg.getId());
for (int i = 1; i < Integer.MAX_VALUE; ++i) {
if (!pgs.contains(String.valueOf(i))) {
final SAMProgramRecord pg = new SAMProgramRecord(String.valueOf(i));
pg.setCommandLine(getCommandLine());
pg.setProgramName(getClass().getSimpleName());
header.addProgramRecord(pg);
break;
}
}
// Add any comments
if (COMMENT != null) {
for (final String comment : COMMENT) {
header.addComment(comment);
}
}
final IntervalList output = new IntervalList(header);
for (final Interval i : finalIntervals) {
output.add(i);
}
final List<IntervalList> resultIntervals;
if (OUTPUT != null) {
if (SCATTER_COUNT == 1) {
output.write(OUTPUT);
resultIntervals = Arrays.asList(output);
} else {
final List<IntervalList> scattered = writeScatterIntervals(output);
LOG.info(String.format("Wrote %s scatter subdirectories to %s.", scattered.size(), OUTPUT));
if (scattered.size() != SCATTER_COUNT) {
LOG.warn(String.format("Requested scatter width of %s, but only emitted %s. (This may be an expected consequence of running in %s mode.)", SCATTER_COUNT, scattered.size(), IntervalListScatterer.Mode.BALANCING_WITHOUT_INTERVAL_SUBDIVISION));
}
resultIntervals = scattered;
}
} else {
resultIntervals = Arrays.asList(output);
}
long totalUniqueBaseCount = 0;
long intervalCount = 0;
for (final IntervalList finalInterval : resultIntervals) {
totalUniqueBaseCount = finalInterval.getUniqueBaseCount();
intervalCount += finalInterval.size();
}
LOG.info("Produced " + intervalCount + " intervals totalling " + totalUniqueBaseCount + " unique bases.");
return null;
}
Aggregations