Search in sources :

Example 21 with IntervalList

use of htsjdk.samtools.util.IntervalList in project gatk-protected by broadinstitute.

the class HetPulldownCalculatorUnitTest method inputGetTumorHetPulldown.

@DataProvider(name = "inputGetTumorHetPulldown")
public Object[][] inputGetTumorHetPulldown() {
    final Pulldown tumorHetPulldown = new Pulldown(normalHeader);
    tumorHetPulldown.add(new AllelicCount(new SimpleInterval("1", 11522, 11522), 7, 4));
    tumorHetPulldown.add(new AllelicCount(new SimpleInterval("1", 12098, 12098), 8, 6));
    tumorHetPulldown.add(new AllelicCount(new SimpleInterval("1", 14630, 14630), 9, 8));
    tumorHetPulldown.add(new AllelicCount(new SimpleInterval("2", 14689, 14689), 6, 9));
    tumorHetPulldown.add(new AllelicCount(new SimpleInterval("2", 14982, 14982), 6, 5));
    final IntervalList normalHetIntervals = new IntervalList(tumorHeader);
    normalHetIntervals.add(new Interval("1", 11522, 11522));
    normalHetIntervals.add(new Interval("1", 12098, 12098));
    normalHetIntervals.add(new Interval("1", 14630, 14630));
    normalHetIntervals.add(new Interval("2", 14689, 14689));
    normalHetIntervals.add(new Interval("2", 14982, 14982));
    return new Object[][] { { normalHetIntervals, tumorHetPulldown } };
}
Also used : IntervalList(htsjdk.samtools.util.IntervalList) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) AllelicCount(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Interval(htsjdk.samtools.util.Interval) DataProvider(org.testng.annotations.DataProvider)

Example 22 with IntervalList

use of htsjdk.samtools.util.IntervalList in project gatk by broadinstitute.

the class CollectTargetedMetrics method doWork.

/**
     * Asserts that files are readable and writable and then fires off an
     * HsMetricsCalculator instance to do the real work.
     */
@Override
protected Object doWork() {
    for (final File targetInterval : TARGET_INTERVALS) IOUtil.assertFileIsReadable(targetInterval);
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);
    if (PER_TARGET_COVERAGE != null)
        IOUtil.assertFileIsWritable(PER_TARGET_COVERAGE);
    final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT);
    final IntervalList targetIntervals = IntervalList.fromFiles(TARGET_INTERVALS);
    // Validate that the targets and baits have the same references as the reads file
    SequenceUtil.assertSequenceDictionariesEqual(reader.getFileHeader().getSequenceDictionary(), targetIntervals.getHeader().getSequenceDictionary());
    SequenceUtil.assertSequenceDictionariesEqual(reader.getFileHeader().getSequenceDictionary(), getProbeIntervals().getHeader().getSequenceDictionary());
    ReferenceSequenceFile ref = null;
    if (REFERENCE_SEQUENCE != null) {
        IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
        ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE);
        SequenceUtil.assertSequenceDictionariesEqual(reader.getFileHeader().getSequenceDictionary(), ref.getSequenceDictionary(), INPUT, REFERENCE_SEQUENCE);
    }
    final COLLECTOR collector = makeCollector(METRIC_ACCUMULATION_LEVEL, reader.getFileHeader().getReadGroups(), ref, PER_TARGET_COVERAGE, targetIntervals, getProbeIntervals(), getProbeSetName());
    final ProgressLogger progress = new ProgressLogger(logger);
    for (final SAMRecord record : reader) {
        collector.acceptRecord(record, null);
        progress.record(record);
    }
    // Write the output file
    final MetricsFile<METRIC, Integer> metrics = getMetricsFile();
    collector.finish();
    collector.addAllLevelsToFile(metrics);
    metrics.write(OUTPUT);
    CloserUtil.close(reader);
    return null;
}
Also used : SamReader(htsjdk.samtools.SamReader) IntervalList(htsjdk.samtools.util.IntervalList) SAMRecord(htsjdk.samtools.SAMRecord) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger) MetricsFile(htsjdk.samtools.metrics.MetricsFile) File(java.io.File) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile)

Example 23 with IntervalList

use of htsjdk.samtools.util.IntervalList in project gatk by broadinstitute.

the class RnaSeqMetricsCollector method makeOverlapDetector.

public static OverlapDetector<Interval> makeOverlapDetector(final File samFile, final SAMFileHeader header, final File ribosomalIntervalsFile) {
    OverlapDetector<Interval> ribosomalSequenceOverlapDetector = new OverlapDetector<>(0, 0);
    if (ribosomalIntervalsFile != null) {
        final IntervalList ribosomalIntervals = IntervalList.fromFile(ribosomalIntervalsFile);
        try {
            SequenceUtil.assertSequenceDictionariesEqual(header.getSequenceDictionary(), ribosomalIntervals.getHeader().getSequenceDictionary());
        } catch (SequenceUtil.SequenceListsDifferException e) {
            throw new UserException("Sequence dictionaries differ in " + samFile.getAbsolutePath() + " and " + ribosomalIntervalsFile.getAbsolutePath(), e);
        }
        final IntervalList uniquedRibosomalIntervals = ribosomalIntervals.uniqued();
        final List<Interval> intervals = uniquedRibosomalIntervals.getIntervals();
        ribosomalSequenceOverlapDetector.addAll(intervals, intervals);
    }
    return ribosomalSequenceOverlapDetector;
}
Also used : SequenceUtil(htsjdk.samtools.util.SequenceUtil) IntervalList(htsjdk.samtools.util.IntervalList) UserException(org.broadinstitute.hellbender.exceptions.UserException) OverlapDetector(htsjdk.samtools.util.OverlapDetector) Interval(htsjdk.samtools.util.Interval)

Example 24 with IntervalList

use of htsjdk.samtools.util.IntervalList in project gatk by broadinstitute.

the class IntervalListScatterer method scatter.

public List<IntervalList> scatter(final IntervalList sourceIntervalList, final int scatterCount, final boolean isUniqued) {
    Utils.validateArg(scatterCount >= 1, "scatterCount < 1");
    final IntervalList uniquedList = isUniqued ? sourceIntervalList : sourceIntervalList.uniqued();
    final long idealSplitLength = deduceIdealSplitLength(uniquedList, scatterCount);
    final List<IntervalList> accumulatedIntervalLists = new ArrayList<>();
    IntervalList runningIntervalList = new IntervalList(uniquedList.getHeader());
    final ArrayDeque<Interval> intervalQueue = new ArrayDeque<>(uniquedList.getIntervals());
    while (!intervalQueue.isEmpty() && accumulatedIntervalLists.size() < scatterCount - 1) {
        final Interval interval = intervalQueue.pollFirst();
        final long projectedSize = runningIntervalList.getBaseCount() + interval.length();
        if (projectedSize <= idealSplitLength) {
            runningIntervalList.add(interval);
        } else {
            switch(mode) {
                case INTERVAL_SUBDIVISION:
                    final int amountToConsume = (int) (idealSplitLength - runningIntervalList.getBaseCount());
                    final Interval left = new Interval(interval.getContig(), interval.getStart(), interval.getStart() + amountToConsume - 1, interval.isNegativeStrand(), interval.getName());
                    final Interval right = new Interval(interval.getContig(), interval.getStart() + amountToConsume, interval.getEnd(), interval.isNegativeStrand(), interval.getName());
                    runningIntervalList.add(left);
                    // Push back the excess back onto our queue for reconsideration.
                    intervalQueue.addFirst(right);
                    break;
                case BALANCING_WITHOUT_INTERVAL_SUBDIVISION:
                    if (runningIntervalList.getIntervals().isEmpty()) {
                        runningIntervalList.add(interval);
                    } else {
                        // Push this interval into the next scatter; re-inject it into the queue, then advance the scatter.
                        intervalQueue.addFirst(interval);
                        accumulatedIntervalLists.add(runningIntervalList.uniqued());
                        runningIntervalList = new IntervalList(uniquedList.getHeader());
                    }
                    break;
            }
        }
        if (runningIntervalList.getBaseCount() >= idealSplitLength) {
            accumulatedIntervalLists.add(runningIntervalList.uniqued());
            runningIntervalList = new IntervalList(uniquedList.getHeader());
        }
    }
    // Flush the remaining intervals into the last split.
    while (!intervalQueue.isEmpty()) {
        runningIntervalList.add(intervalQueue.pollFirst());
    }
    if (!runningIntervalList.getIntervals().isEmpty()) {
        accumulatedIntervalLists.add(runningIntervalList.uniqued());
    }
    return accumulatedIntervalLists;
}
Also used : IntervalList(htsjdk.samtools.util.IntervalList) Interval(htsjdk.samtools.util.Interval)

Example 25 with IntervalList

use of htsjdk.samtools.util.IntervalList in project gatk by broadinstitute.

the class IntervalListTools method doWork.

@Override
protected Object doWork() {
    // Check inputs
    for (final File f : INPUT) IOUtil.assertFileIsReadable(f);
    for (final File f : SECOND_INPUT) IOUtil.assertFileIsReadable(f);
    if (OUTPUT != null) {
        if (SCATTER_COUNT == 1) {
            IOUtil.assertFileIsWritable(OUTPUT);
        } else {
            IOUtil.assertDirectoryIsWritable(OUTPUT);
        }
    }
    // Read in the interval lists and apply any padding
    final List<IntervalList> lists = openIntervalLists(INPUT);
    // same for the second list
    final List<IntervalList> secondLists = openIntervalLists(SECOND_INPUT);
    if (UNIQUE && !SORT) {
        LOG.warn("UNIQUE=true requires sorting but SORT=false was specified.  Results will be sorted!");
    }
    final IntervalList result = ACTION.act(lists, secondLists);
    if (SCATTER_COUNT > 1) {
        // Scattering requires a uniqued, sorted interval list.  We want to do this up front (before BREAKING AT BANDS)
        SORT = true;
        UNIQUE = true;
    }
    if (INVERT) {
        // no need to sort, since return will be sorted by definition.
        SORT = false;
        UNIQUE = true;
    }
    final IntervalList possiblySortedResult = SORT ? result.sorted() : result;
    final IntervalList possiblyInvertedResult = INVERT ? IntervalList.invert(possiblySortedResult) : possiblySortedResult;
    //only get unique if this has been asked unless inverting (since the invert will return a unique list)
    List<Interval> finalIntervals = UNIQUE ? possiblyInvertedResult.uniqued().getIntervals() : possiblyInvertedResult.getIntervals();
    if (BREAK_BANDS_AT_MULTIPLES_OF > 0) {
        finalIntervals = IntervalList.breakIntervalsAtBandMultiples(finalIntervals, BREAK_BANDS_AT_MULTIPLES_OF);
    }
    // Decide on a PG ID and make a program group
    final SAMFileHeader header = result.getHeader();
    final Set<String> pgs = new HashSet<>();
    for (final SAMProgramRecord pg : header.getProgramRecords()) pgs.add(pg.getId());
    for (int i = 1; i < Integer.MAX_VALUE; ++i) {
        if (!pgs.contains(String.valueOf(i))) {
            final SAMProgramRecord pg = new SAMProgramRecord(String.valueOf(i));
            pg.setCommandLine(getCommandLine());
            pg.setProgramName(getClass().getSimpleName());
            header.addProgramRecord(pg);
            break;
        }
    }
    // Add any comments
    if (COMMENT != null) {
        for (final String comment : COMMENT) {
            header.addComment(comment);
        }
    }
    final IntervalList output = new IntervalList(header);
    for (final Interval i : finalIntervals) {
        output.add(i);
    }
    final List<IntervalList> resultIntervals;
    if (OUTPUT != null) {
        if (SCATTER_COUNT == 1) {
            output.write(OUTPUT);
            resultIntervals = Arrays.asList(output);
        } else {
            final List<IntervalList> scattered = writeScatterIntervals(output);
            LOG.info(String.format("Wrote %s scatter subdirectories to %s.", scattered.size(), OUTPUT));
            if (scattered.size() != SCATTER_COUNT) {
                LOG.warn(String.format("Requested scatter width of %s, but only emitted %s.  (This may be an expected consequence of running in %s mode.)", SCATTER_COUNT, scattered.size(), IntervalListScatterer.Mode.BALANCING_WITHOUT_INTERVAL_SUBDIVISION));
            }
            resultIntervals = scattered;
        }
    } else {
        resultIntervals = Arrays.asList(output);
    }
    long totalUniqueBaseCount = 0;
    long intervalCount = 0;
    for (final IntervalList finalInterval : resultIntervals) {
        totalUniqueBaseCount = finalInterval.getUniqueBaseCount();
        intervalCount += finalInterval.size();
    }
    LOG.info("Produced " + intervalCount + " intervals totalling " + totalUniqueBaseCount + " unique bases.");
    return null;
}
Also used : IntervalList(htsjdk.samtools.util.IntervalList) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File) SAMProgramRecord(htsjdk.samtools.SAMProgramRecord) Interval(htsjdk.samtools.util.Interval)

Aggregations

IntervalList (htsjdk.samtools.util.IntervalList)37 Interval (htsjdk.samtools.util.Interval)23 File (java.io.File)18 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)12 UserException (org.broadinstitute.hellbender.exceptions.UserException)7 SAMFileHeader (htsjdk.samtools.SAMFileHeader)6 SamLocusIterator (htsjdk.samtools.util.SamLocusIterator)6 IOException (java.io.IOException)6 List (java.util.List)6 AllelicCount (org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount)6 Nucleotide (org.broadinstitute.hellbender.utils.Nucleotide)6 ParamUtils (org.broadinstitute.hellbender.utils.param.ParamUtils)6 DataProvider (org.testng.annotations.DataProvider)6 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)5 Test (org.testng.annotations.Test)5 htsjdk.samtools (htsjdk.samtools)4 QueryInterval (htsjdk.samtools.QueryInterval)4 DuplicateReadFilter (htsjdk.samtools.filter.DuplicateReadFilter)4 NotPrimaryAlignmentFilter (htsjdk.samtools.filter.NotPrimaryAlignmentFilter)4 SamRecordFilter (htsjdk.samtools.filter.SamRecordFilter)4