Search in sources :

Example 6 with Interval

use of htsjdk.samtools.util.Interval in project gatk by broadinstitute.

the class BedToIntervalList method doWork.

@Override
protected Object doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsReadable(SEQUENCE_DICTIONARY);
    IOUtil.assertFileIsWritable(OUTPUT);
    try {
        final SAMFileHeader header = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).getFileHeader(SEQUENCE_DICTIONARY);
        final IntervalList intervalList = new IntervalList(header);
        /**
             * NB: BED is zero-based, but a BEDCodec by default (since it is returns tribble Features) has an offset of one,
             * so it returns 1-based starts.  Ugh.  Set to zero.
             */
        final FeatureReader<BEDFeature> bedReader = AbstractFeatureReader.getFeatureReader(INPUT.getAbsolutePath(), new BEDCodec(BEDCodec.StartOffset.ZERO), false);
        final CloseableTribbleIterator<BEDFeature> iterator = bedReader.iterator();
        final ProgressLogger progressLogger = new ProgressLogger(logger, (int) 1e6);
        while (iterator.hasNext()) {
            final BEDFeature bedFeature = iterator.next();
            final String sequenceName = bedFeature.getContig();
            /**
                 * NB: BED is zero-based, so we need to add one here to make it one-based.  Please observe we set the start
                 * offset to zero when creating the BEDCodec.
                 */
            final int start = bedFeature.getStart() + 1;
            /**
                 * NB: BED is 0-based OPEN (which, for the end is equivalent to 1-based closed).
                 */
            final int end = bedFeature.getEnd();
            // NB: do not use an empty name within an interval
            String name = bedFeature.getName();
            if (name.isEmpty())
                name = null;
            final SAMSequenceRecord sequenceRecord = header.getSequenceDictionary().getSequence(sequenceName);
            // Do some validation
            if (null == sequenceRecord) {
                throw new GATKException(String.format("Sequence '%s' was not found in the sequence dictionary", sequenceName));
            } else if (start < 1) {
                throw new GATKException(String.format("Start on sequence '%s' was less than one: %d", sequenceName, start));
            } else if (sequenceRecord.getSequenceLength() < start) {
                throw new GATKException(String.format("Start on sequence '%s' was past the end: %d < %d", sequenceName, sequenceRecord.getSequenceLength(), start));
            } else if (end < 1) {
                throw new GATKException(String.format("End on sequence '%s' was less than one: %d", sequenceName, end));
            } else if (sequenceRecord.getSequenceLength() < end) {
                throw new GATKException(String.format("End on sequence '%s' was past the end: %d < %d", sequenceName, sequenceRecord.getSequenceLength(), end));
            } else if (end < start - 1) {
                throw new GATKException(String.format("On sequence '%s', end < start-1: %d <= %d", sequenceName, end, start));
            }
            final Interval interval = new Interval(sequenceName, start, end, bedFeature.getStrand() == Strand.POSITIVE, name);
            intervalList.add(interval);
            progressLogger.record(sequenceName, start);
        }
        CloserUtil.close(bedReader);
        // Sort and write the output
        intervalList.uniqued().write(OUTPUT);
    } catch (final IOException e) {
        throw new RuntimeException(e);
    }
    return null;
}
Also used : ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) IOException(java.io.IOException) IntervalList(htsjdk.samtools.util.IntervalList) BEDFeature(htsjdk.tribble.bed.BEDFeature) SAMFileHeader(htsjdk.samtools.SAMFileHeader) BEDCodec(htsjdk.tribble.bed.BEDCodec) GATKException(org.broadinstitute.hellbender.exceptions.GATKException) Interval(htsjdk.samtools.util.Interval)

Example 7 with Interval

use of htsjdk.samtools.util.Interval in project gatk by broadinstitute.

the class IntervalUtilsUnitTest method testIntervalFileToListNegativeOneLength.

// TODO - remove once a corrected version of the exome interval list is released.
@Test(dataProvider = "negativeOneLengthIntervalTestData")
public void testIntervalFileToListNegativeOneLength(GenomeLocParser genomeLocParser, String contig, int intervalStart, int intervalEnd) throws Exception {
    final SAMFileHeader picardFileHeader = new SAMFileHeader();
    picardFileHeader.addSequence(genomeLocParser.getContigInfo("1"));
    final IntervalList picardIntervals = new IntervalList(picardFileHeader);
    // Need one good interval or else a UserException.MalformedFile( is thrown if no intervals
    picardIntervals.add(new Interval(contig, 1, 2, true, "dummyname0"));
    picardIntervals.add(new Interval(contig, intervalStart, intervalEnd, true, "dummyname1"));
    final File picardIntervalFile = createTempFile("testIntervalFileToListNegativeOneLength", ".intervals");
    picardIntervals.write(picardIntervalFile);
    IntervalUtils.intervalFileToList(genomeLocParser, picardIntervalFile.getAbsolutePath());
}
Also used : IntervalList(htsjdk.samtools.util.IntervalList) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) Interval(htsjdk.samtools.util.Interval) QueryInterval(htsjdk.samtools.QueryInterval) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 8 with Interval

use of htsjdk.samtools.util.Interval in project gatk-protected by broadinstitute.

the class SplitIntervals method onTraversalStart.

@Override
public void onTraversalStart() {
    ParamUtils.isPositive(scatterCount, "scatter count must be > 0.");
    if (!outputDir.exists() && !outputDir.mkdir()) {
        throw new RuntimeIOException("Unable to create directory: " + outputDir.getAbsolutePath());
    }
    // in general dictionary will be from the reference, but using -I reads.bam or -F variants.vcf
    // to use the sequence dict from a bam or vcf is also supported
    final SAMSequenceDictionary sequenceDictionary = getBestAvailableSequenceDictionary();
    final List<SimpleInterval> intervals = hasIntervals() ? intervalArgumentCollection.getIntervals(sequenceDictionary) : IntervalUtils.getAllIntervalsForReference(sequenceDictionary);
    final IntervalList intervalList = new IntervalList(sequenceDictionary);
    intervals.stream().map(si -> new Interval(si.getContig(), si.getStart(), si.getEnd())).forEach(intervalList::add);
    final IntervalListScatterer scatterer = new IntervalListScatterer(subdivisionMode);
    final List<IntervalList> scattered = scatterer.scatter(intervalList, scatterCount, false);
    final DecimalFormat formatter = new DecimalFormat("0000");
    IntStream.range(0, scattered.size()).forEach(n -> scattered.get(n).write(new File(outputDir, formatter.format(n) + "-scattered.intervals")));
}
Also used : IntStream(java.util.stream.IntStream) CommandLineProgramProperties(org.broadinstitute.barclay.argparser.CommandLineProgramProperties) IntervalListScatterer(org.broadinstitute.hellbender.tools.picard.interval.IntervalListScatterer) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Argument(org.broadinstitute.barclay.argparser.Argument) DecimalFormat(java.text.DecimalFormat) IntervalList(htsjdk.samtools.util.IntervalList) StandardArgumentDefinitions(org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) VariantProgramGroup(org.broadinstitute.hellbender.cmdline.programgroups.VariantProgramGroup) IntervalArgumentCollection(org.broadinstitute.hellbender.cmdline.argumentcollections.IntervalArgumentCollection) ParamUtils(org.broadinstitute.hellbender.utils.param.ParamUtils) File(java.io.File) GATKTool(org.broadinstitute.hellbender.engine.GATKTool) Interval(htsjdk.samtools.util.Interval) List(java.util.List) IntervalUtils(org.broadinstitute.hellbender.utils.IntervalUtils) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) IntervalListScatterer(org.broadinstitute.hellbender.tools.picard.interval.IntervalListScatterer) IntervalList(htsjdk.samtools.util.IntervalList) DecimalFormat(java.text.DecimalFormat) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) File(java.io.File) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Interval(htsjdk.samtools.util.Interval)

Example 9 with Interval

use of htsjdk.samtools.util.Interval in project gatk by broadinstitute.

the class IntervalUtils method intervalFileToList.

/**
     * Read a file of genome locations to process. The file may be in Picard
     * or GATK interval format.
     *
     * @param glParser   GenomeLocParser
     * @param fileName  interval file
     * @return List<GenomeLoc> List of Genome Locs that have been parsed from file
     */
public static List<GenomeLoc> intervalFileToList(final GenomeLocParser glParser, final String fileName) {
    Utils.nonNull(glParser, "glParser is null");
    Utils.nonNull(fileName, "file name is null");
    final File inputFile = new File(fileName);
    final List<GenomeLoc> ret = new ArrayList<>();
    /**
         * First try to read the file as a Picard interval file since that's well structured --
         * we'll fail quickly if it's not a valid file.
         */
    boolean isPicardInterval = false;
    try {
        // Note: Picard will skip over intervals with contigs not in the sequence dictionary
        final IntervalList il = IntervalList.fromFile(inputFile);
        isPicardInterval = true;
        for (final Interval interval : il.getIntervals()) {
            // https://github.com/broadinstitute/gatk/issues/2089
            if (interval.getStart() - interval.getEnd() == 1) {
                logger.warn("Ignoring possibly incorrectly converted length 1 interval : " + interval);
            } else if (glParser.isValidGenomeLoc(interval.getContig(), interval.getStart(), interval.getEnd(), true)) {
                ret.add(glParser.createGenomeLoc(interval.getContig(), interval.getStart(), interval.getEnd(), true));
            } else {
                throw new UserException(inputFile.getAbsolutePath() + " has an invalid interval : " + interval);
            }
        }
    }// if that didn't work, try parsing file as a GATK interval file
     catch (final Exception e) {
        if (// definitely a picard file, but we failed to parse
        isPicardInterval) {
            throw new UserException.CouldNotReadInputFile(inputFile, e);
        } else {
            try (XReadLines reader = new XReadLines(new File(fileName))) {
                for (final String line : reader) {
                    if (!line.trim().isEmpty()) {
                        ret.add(glParser.parseGenomeLoc(line));
                    }
                }
            } catch (final IOException e2) {
                throw new UserException.CouldNotReadInputFile(inputFile, e2);
            }
        }
    }
    if (ret.isEmpty()) {
        throw new UserException.MalformedFile(new File(fileName), "It contains no intervals.");
    }
    return ret;
}
Also used : IOException(java.io.IOException) IOException(java.io.IOException) UserException(org.broadinstitute.hellbender.exceptions.UserException) CommandLineException(org.broadinstitute.barclay.argparser.CommandLineException) XReadLines(org.broadinstitute.hellbender.utils.text.XReadLines) IntervalList(htsjdk.samtools.util.IntervalList) UserException(org.broadinstitute.hellbender.exceptions.UserException) CachingIndexedFastaSequenceFile(org.broadinstitute.hellbender.utils.fasta.CachingIndexedFastaSequenceFile) File(java.io.File) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) Interval(htsjdk.samtools.util.Interval) QueryInterval(htsjdk.samtools.QueryInterval)

Example 10 with Interval

use of htsjdk.samtools.util.Interval in project gatk-protected by broadinstitute.

the class HetPulldownCalculatorUnitTest method inputGetTumorHetPulldown15.

@DataProvider(name = "inputGetTumorHetPulldownMin15")
public Object[][] inputGetTumorHetPulldown15() {
    final Pulldown tumorHetPulldown = new Pulldown(normalHeader);
    tumorHetPulldown.add(new AllelicCount(new SimpleInterval("1", 14630, 14630), 9, 8));
    tumorHetPulldown.add(new AllelicCount(new SimpleInterval("2", 14689, 14689), 6, 9));
    final IntervalList normalHetIntervals = new IntervalList(tumorHeader);
    normalHetIntervals.add(new Interval("1", 14630, 14630));
    normalHetIntervals.add(new Interval("2", 14689, 14689));
    return new Object[][] { { normalHetIntervals, tumorHetPulldown } };
}
Also used : IntervalList(htsjdk.samtools.util.IntervalList) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) AllelicCount(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Interval(htsjdk.samtools.util.Interval) DataProvider(org.testng.annotations.DataProvider)

Aggregations

Interval (htsjdk.samtools.util.Interval)24 IntervalList (htsjdk.samtools.util.IntervalList)23 File (java.io.File)10 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)8 SAMFileHeader (htsjdk.samtools.SAMFileHeader)6 DataProvider (org.testng.annotations.DataProvider)6 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)5 Test (org.testng.annotations.Test)5 QueryInterval (htsjdk.samtools.QueryInterval)4 AllelicCount (org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount)4 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)3 IndexedFastaSequenceFile (htsjdk.samtools.reference.IndexedFastaSequenceFile)3 SAMFileWriter (htsjdk.samtools.SAMFileWriter)2 SAMFileWriterFactory (htsjdk.samtools.SAMFileWriterFactory)2 SAMRecord (htsjdk.samtools.SAMRecord)2 SAMRecordSetBuilder (htsjdk.samtools.SAMRecordSetBuilder)2 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)2 MetricsFile (htsjdk.samtools.metrics.MetricsFile)2 RuntimeIOException (htsjdk.samtools.util.RuntimeIOException)2 SamLocusIterator (htsjdk.samtools.util.SamLocusIterator)2