Search in sources :

Example 1 with ReferenceSequence

use of htsjdk.samtools.reference.ReferenceSequence in project gatk by broadinstitute.

the class ClipReads method onTraversalStart.

/**
     * The initialize function.
     */
@Override
public void onTraversalStart() {
    if (qTrimmingThreshold >= 0) {
        logger.info(String.format("Creating Q-score clipper with threshold %d", qTrimmingThreshold));
    }
    //
    if (clipSequencesArgs != null) {
        int i = 0;
        for (String toClip : clipSequencesArgs) {
            i++;
            ReferenceSequence rs = new ReferenceSequence("CMDLINE-" + i, -1, StringUtil.stringToBytes(toClip));
            addSeqToClip(rs.getName(), rs.getBases());
        }
    }
    if (clipSequenceFile != null) {
        ReferenceSequenceFile rsf = ReferenceSequenceFileFactory.getReferenceSequenceFile(new File(clipSequenceFile));
        while (true) {
            ReferenceSequence rs = rsf.nextSequence();
            if (rs == null)
                break;
            else {
                addSeqToClip(rs.getName(), rs.getBases());
            }
        }
    }
    //
    if (cyclesToClipArg != null) {
        cyclesToClip = new ArrayList<>();
        for (String range : cyclesToClipArg.split(",")) {
            try {
                String[] elts = range.split("-");
                int start = Integer.parseInt(elts[0]) - 1;
                int stop = Integer.parseInt(elts[1]) - 1;
                if (start < 0)
                    throw new Exception();
                if (stop < start)
                    throw new Exception();
                logger.info(String.format("Creating cycle clipper %d-%d", start, stop));
                cyclesToClip.add(new MutablePair<>(start, stop));
            } catch (Exception e) {
                throw new RuntimeException("Badly formatted cyclesToClip argument: " + cyclesToClipArg);
            }
        }
    }
    final boolean presorted = EnumSet.of(ClippingRepresentation.WRITE_NS, ClippingRepresentation.WRITE_NS_Q0S, ClippingRepresentation.WRITE_Q0S).contains(clippingRepresentation);
    outputBam = createSAMWriter(OUTPUT, presorted);
    accumulator = new ClippingData(sequencesToClip);
    try {
        outputStats = STATSOUTPUT == null ? null : new PrintStream(STATSOUTPUT);
    } catch (FileNotFoundException e) {
        throw new UserException.CouldNotCreateOutputFile(STATSOUTPUT, e);
    }
}
Also used : PrintStream(java.io.PrintStream) FileNotFoundException(java.io.FileNotFoundException) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) ReferenceSequence(htsjdk.samtools.reference.ReferenceSequence) FileNotFoundException(java.io.FileNotFoundException) UserException(org.broadinstitute.hellbender.exceptions.UserException) UserException(org.broadinstitute.hellbender.exceptions.UserException) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) File(java.io.File)

Example 2 with ReferenceSequence

use of htsjdk.samtools.reference.ReferenceSequence in project gatk by broadinstitute.

the class ReferenceFileSource method getAllReferenceBases.

public Map<String, ReferenceBases> getAllReferenceBases() throws IOException {
    try (ReferenceSequenceFile referenceSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(new File(referencePath))) {
        Map<String, ReferenceBases> bases = new LinkedHashMap<>();
        ReferenceSequence seq;
        while ((seq = referenceSequenceFile.nextSequence()) != null) {
            String name = seq.getName();
            bases.put(name, new ReferenceBases(seq.getBases(), new SimpleInterval(name, 1, seq.length())));
        }
        return bases;
    }
}
Also used : ReferenceBases(org.broadinstitute.hellbender.utils.reference.ReferenceBases) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) ReferenceSequence(htsjdk.samtools.reference.ReferenceSequence) File(java.io.File) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) LinkedHashMap(java.util.LinkedHashMap)

Example 3 with ReferenceSequence

use of htsjdk.samtools.reference.ReferenceSequence in project gatk by broadinstitute.

the class ReferenceHadoopSource method getReferenceBases.

@Override
public ReferenceBases getReferenceBases(final PipelineOptions pipelineOptions, final SimpleInterval interval) {
    ReferenceSequenceFile referenceSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(IOUtils.getPath(referencePath));
    ReferenceSequence sequence = referenceSequenceFile.getSubsequenceAt(interval.getContig(), interval.getStart(), interval.getEnd());
    return new ReferenceBases(sequence.getBases(), interval);
}
Also used : ReferenceBases(org.broadinstitute.hellbender.utils.reference.ReferenceBases) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) ReferenceSequence(htsjdk.samtools.reference.ReferenceSequence)

Example 4 with ReferenceSequence

use of htsjdk.samtools.reference.ReferenceSequence in project gatk by broadinstitute.

the class SinglePassSamProgram method makeItSo.

public static void makeItSo(final File input, final File referenceSequence, final boolean assumeSorted, final long stopAfter, final Collection<SinglePassSamProgram> programs) {
    // Setup the standard inputs
    IOUtil.assertFileIsReadable(input);
    final SamReader in = SamReaderFactory.makeDefault().referenceSequence(referenceSequence).open(input);
    // Optionally load up the reference sequence and double check sequence dictionaries
    final ReferenceSequenceFileWalker walker;
    if (referenceSequence == null) {
        walker = null;
    } else {
        IOUtil.assertFileIsReadable(referenceSequence);
        walker = new ReferenceSequenceFileWalker(referenceSequence);
        if (!in.getFileHeader().getSequenceDictionary().isEmpty()) {
            SequenceUtil.assertSequenceDictionariesEqual(in.getFileHeader().getSequenceDictionary(), walker.getSequenceDictionary());
        }
    }
    // Check on the sort order of the BAM file
    {
        final SortOrder sort = in.getFileHeader().getSortOrder();
        if (sort != SortOrder.coordinate) {
            if (assumeSorted) {
                logger.warn("File reports sort order '" + sort + "', assuming it's coordinate sorted anyway.");
            } else {
                throw new UserException("File " + input.getAbsolutePath() + " should be coordinate sorted but " + "the header says the sort order is " + sort + ". If you believe the file " + "to be coordinate sorted you may pass ASSUME_SORTED=true");
            }
        }
    }
    // Call the abstract setup method!
    boolean anyUseNoRefReads = false;
    for (final SinglePassSamProgram program : programs) {
        program.setup(in.getFileHeader(), input);
        anyUseNoRefReads = anyUseNoRefReads || program.usesNoRefReads();
    }
    final ProgressLogger progress = new ProgressLogger(logger);
    for (final SAMRecord rec : in) {
        final ReferenceSequence ref;
        if (walker == null || rec.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
            ref = null;
        } else {
            ref = walker.get(rec.getReferenceIndex());
        }
        for (final SinglePassSamProgram program : programs) {
            program.acceptRead(rec, ref);
        }
        progress.record(rec);
        // See if we need to terminate early?
        if (stopAfter > 0 && progress.getCount() >= stopAfter) {
            break;
        }
        // And see if we're into the unmapped reads at the end
        if (!anyUseNoRefReads && rec.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
            break;
        }
    }
    CloserUtil.close(in);
    for (final SinglePassSamProgram program : programs) {
        program.finish();
    }
}
Also used : SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) SortOrder(htsjdk.samtools.SAMFileHeader.SortOrder) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger) UserException(org.broadinstitute.hellbender.exceptions.UserException) ReferenceSequence(htsjdk.samtools.reference.ReferenceSequence) ReferenceSequenceFileWalker(htsjdk.samtools.reference.ReferenceSequenceFileWalker)

Example 5 with ReferenceSequence

use of htsjdk.samtools.reference.ReferenceSequence in project gatk by broadinstitute.

the class CachingIndexedFastaSequenceFileUnitTest method testCachingIndexedFastaReaderTwoStage.

// Tests grabbing sequences around a middle cached value.
@Test(dataProvider = "fastas", enabled = !DEBUG)
public void testCachingIndexedFastaReaderTwoStage(File fasta, int cacheSize, int querySize) throws FileNotFoundException {
    final IndexedFastaSequenceFile uncached = new IndexedFastaSequenceFile(fasta);
    final CachingIndexedFastaSequenceFile caching = new CachingIndexedFastaSequenceFile(fasta, getCacheSize(cacheSize), true, false);
    SAMSequenceRecord contig = uncached.getSequenceDictionary().getSequence(0);
    int middleStart = (contig.getSequenceLength() - querySize) / 2;
    int middleStop = middleStart + querySize;
    logger.warn(String.format("Checking contig %s length %d with cache size %d and query size %d with intermediate query", contig.getSequenceName(), contig.getSequenceLength(), cacheSize, querySize));
    for (int i = 0; i < contig.getSequenceLength(); i += 10) {
        int start = i;
        int stop = start + querySize;
        if (stop <= contig.getSequenceLength()) {
            ReferenceSequence grabMiddle = caching.getSubsequenceAt(contig.getSequenceName(), middleStart, middleStop);
            ReferenceSequence cachedVal = caching.getSubsequenceAt(contig.getSequenceName(), start, stop);
            ReferenceSequence uncachedVal = uncached.getSubsequenceAt(contig.getSequenceName(), start, stop);
            Assert.assertEquals(cachedVal.getName(), uncachedVal.getName());
            Assert.assertEquals(cachedVal.getContigIndex(), uncachedVal.getContigIndex());
            Assert.assertEquals(cachedVal.getBases(), uncachedVal.getBases());
        }
    }
}
Also used : SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) ReferenceSequence(htsjdk.samtools.reference.ReferenceSequence) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Aggregations

ReferenceSequence (htsjdk.samtools.reference.ReferenceSequence)14 ReferenceSequenceFile (htsjdk.samtools.reference.ReferenceSequenceFile)5 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)3 SamReader (htsjdk.samtools.SamReader)3 ReferenceSequenceFileWalker (htsjdk.samtools.reference.ReferenceSequenceFileWalker)3 File (java.io.File)3 UserException (org.broadinstitute.hellbender.exceptions.UserException)3 ProgressLogger (org.broadinstitute.hellbender.utils.runtime.ProgressLogger)3 SAMRecord (htsjdk.samtools.SAMRecord)2 IndexedFastaSequenceFile (htsjdk.samtools.reference.IndexedFastaSequenceFile)2 ArrayList (java.util.ArrayList)2 HashSet (java.util.HashSet)2 ReferenceBases (org.broadinstitute.hellbender.utils.reference.ReferenceBases)2 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)2 Test (org.testng.annotations.Test)2 SAMException (htsjdk.samtools.SAMException)1 SortOrder (htsjdk.samtools.SAMFileHeader.SortOrder)1 SamRecordFilter (htsjdk.samtools.filter.SamRecordFilter)1 SecondaryAlignmentFilter (htsjdk.samtools.filter.SecondaryAlignmentFilter)1 MetricsFile (htsjdk.samtools.metrics.MetricsFile)1