Search in sources :

Example 1 with ReferenceSequenceFile

use of htsjdk.samtools.reference.ReferenceSequenceFile in project gatk by broadinstitute.

the class IntervalUtils method getContigSizes.

/**
     * Returns a map of contig names with their sizes.
     * @param reference The reference for the intervals.
     * @return A map of contig names with their sizes.
     */
public static Map<String, Integer> getContigSizes(final File reference) {
    final ReferenceSequenceFile referenceSequenceFile = createReference(reference);
    final List<GenomeLoc> locs = GenomeLocSortedSet.createSetFromSequenceDictionary(referenceSequenceFile.getSequenceDictionary()).toList();
    final Map<String, Integer> lengths = new LinkedHashMap<>();
    for (final GenomeLoc loc : locs) {
        lengths.put(loc.getContig(), loc.size());
    }
    return lengths;
}
Also used : ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile)

Example 2 with ReferenceSequenceFile

use of htsjdk.samtools.reference.ReferenceSequenceFile in project gatk by broadinstitute.

the class ClipReads method onTraversalStart.

/**
     * The initialize function.
     */
@Override
public void onTraversalStart() {
    if (qTrimmingThreshold >= 0) {
        logger.info(String.format("Creating Q-score clipper with threshold %d", qTrimmingThreshold));
    }
    //
    if (clipSequencesArgs != null) {
        int i = 0;
        for (String toClip : clipSequencesArgs) {
            i++;
            ReferenceSequence rs = new ReferenceSequence("CMDLINE-" + i, -1, StringUtil.stringToBytes(toClip));
            addSeqToClip(rs.getName(), rs.getBases());
        }
    }
    if (clipSequenceFile != null) {
        ReferenceSequenceFile rsf = ReferenceSequenceFileFactory.getReferenceSequenceFile(new File(clipSequenceFile));
        while (true) {
            ReferenceSequence rs = rsf.nextSequence();
            if (rs == null)
                break;
            else {
                addSeqToClip(rs.getName(), rs.getBases());
            }
        }
    }
    //
    if (cyclesToClipArg != null) {
        cyclesToClip = new ArrayList<>();
        for (String range : cyclesToClipArg.split(",")) {
            try {
                String[] elts = range.split("-");
                int start = Integer.parseInt(elts[0]) - 1;
                int stop = Integer.parseInt(elts[1]) - 1;
                if (start < 0)
                    throw new Exception();
                if (stop < start)
                    throw new Exception();
                logger.info(String.format("Creating cycle clipper %d-%d", start, stop));
                cyclesToClip.add(new MutablePair<>(start, stop));
            } catch (Exception e) {
                throw new RuntimeException("Badly formatted cyclesToClip argument: " + cyclesToClipArg);
            }
        }
    }
    final boolean presorted = EnumSet.of(ClippingRepresentation.WRITE_NS, ClippingRepresentation.WRITE_NS_Q0S, ClippingRepresentation.WRITE_Q0S).contains(clippingRepresentation);
    outputBam = createSAMWriter(OUTPUT, presorted);
    accumulator = new ClippingData(sequencesToClip);
    try {
        outputStats = STATSOUTPUT == null ? null : new PrintStream(STATSOUTPUT);
    } catch (FileNotFoundException e) {
        throw new UserException.CouldNotCreateOutputFile(STATSOUTPUT, e);
    }
}
Also used : PrintStream(java.io.PrintStream) FileNotFoundException(java.io.FileNotFoundException) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) ReferenceSequence(htsjdk.samtools.reference.ReferenceSequence) FileNotFoundException(java.io.FileNotFoundException) UserException(org.broadinstitute.hellbender.exceptions.UserException) UserException(org.broadinstitute.hellbender.exceptions.UserException) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) File(java.io.File)

Example 3 with ReferenceSequenceFile

use of htsjdk.samtools.reference.ReferenceSequenceFile in project gatk by broadinstitute.

the class ReferenceFileSource method getAllReferenceBases.

public Map<String, ReferenceBases> getAllReferenceBases() throws IOException {
    try (ReferenceSequenceFile referenceSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(new File(referencePath))) {
        Map<String, ReferenceBases> bases = new LinkedHashMap<>();
        ReferenceSequence seq;
        while ((seq = referenceSequenceFile.nextSequence()) != null) {
            String name = seq.getName();
            bases.put(name, new ReferenceBases(seq.getBases(), new SimpleInterval(name, 1, seq.length())));
        }
        return bases;
    }
}
Also used : ReferenceBases(org.broadinstitute.hellbender.utils.reference.ReferenceBases) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) ReferenceSequence(htsjdk.samtools.reference.ReferenceSequence) File(java.io.File) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) LinkedHashMap(java.util.LinkedHashMap)

Example 4 with ReferenceSequenceFile

use of htsjdk.samtools.reference.ReferenceSequenceFile in project gatk by broadinstitute.

the class ReferenceHadoopSource method getReferenceBases.

@Override
public ReferenceBases getReferenceBases(final PipelineOptions pipelineOptions, final SimpleInterval interval) {
    ReferenceSequenceFile referenceSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(IOUtils.getPath(referencePath));
    ReferenceSequence sequence = referenceSequenceFile.getSubsequenceAt(interval.getContig(), interval.getStart(), interval.getEnd());
    return new ReferenceBases(sequence.getBases(), interval);
}
Also used : ReferenceBases(org.broadinstitute.hellbender.utils.reference.ReferenceBases) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) ReferenceSequence(htsjdk.samtools.reference.ReferenceSequence)

Example 5 with ReferenceSequenceFile

use of htsjdk.samtools.reference.ReferenceSequenceFile in project gatk by broadinstitute.

the class ReorderSam method doWork.

@Override
protected Object doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
    IOUtil.assertFileIsWritable(OUTPUT);
    final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT);
    ReferenceSequenceFile reference = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE);
    SAMSequenceDictionary refDict = reference.getSequenceDictionary();
    if (refDict == null) {
        CloserUtil.close(in);
        throw new UserException("No reference sequence dictionary found. Aborting. " + "You can create a sequence dictionary for the reference fasta using CreateSequenceDictionary.jar.");
    }
    printDictionary("SAM/BAM file", in.getFileHeader().getSequenceDictionary());
    printDictionary("Reference", refDict);
    Map<Integer, Integer> newOrder = buildSequenceDictionaryMap(refDict, in.getFileHeader().getSequenceDictionary());
    // has to be after we create the newOrder
    SAMFileHeader outHeader = ReadUtils.cloneSAMFileHeader(in.getFileHeader());
    outHeader.setSequenceDictionary(refDict);
    logger.info("Writing reads...");
    if (in.hasIndex()) {
        try (final SAMFileWriter out = createSAMWriter(OUTPUT, REFERENCE_SEQUENCE, outHeader, true)) {
            // write the reads in contig order
            for (final SAMSequenceRecord contig : refDict.getSequences()) {
                final SAMRecordIterator it = in.query(contig.getSequenceName(), 0, 0, false);
                writeReads(out, it, newOrder, contig.getSequenceName());
            }
            // don't forget the unmapped reads
            writeReads(out, in.queryUnmapped(), newOrder, "unmapped");
        }
    } else {
        try (final SAMFileWriter out = createSAMWriter(OUTPUT, REFERENCE_SEQUENCE, outHeader, false)) {
            writeReads(out, in.iterator(), newOrder, "All reads");
        }
    }
    // cleanup
    CloserUtil.close(in);
    return null;
}
Also used : SamReader(htsjdk.samtools.SamReader) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMFileWriter(htsjdk.samtools.SAMFileWriter) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) UserException(org.broadinstitute.hellbender.exceptions.UserException) SAMFileHeader(htsjdk.samtools.SAMFileHeader) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary)

Aggregations

ReferenceSequenceFile (htsjdk.samtools.reference.ReferenceSequenceFile)20 File (java.io.File)9 SAMFileHeader (htsjdk.samtools.SAMFileHeader)5 SAMFileWriter (htsjdk.samtools.SAMFileWriter)5 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)5 ReferenceSequence (htsjdk.samtools.reference.ReferenceSequence)5 FileNotFoundException (java.io.FileNotFoundException)5 IOException (java.io.IOException)4 UserException (org.broadinstitute.hellbender.exceptions.UserException)4 CachingIndexedFastaSequenceFile (org.broadinstitute.hellbender.utils.fasta.CachingIndexedFastaSequenceFile)4 SamReader (htsjdk.samtools.SamReader)3 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)3 SAMFileWriterFactory (htsjdk.samtools.SAMFileWriterFactory)2 SAMRecord (htsjdk.samtools.SAMRecord)2 IndexedFastaSequenceFile (htsjdk.samtools.reference.IndexedFastaSequenceFile)2 Haplotype (org.broadinstitute.hellbender.utils.haplotype.Haplotype)2 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)2 ReferenceBases (org.broadinstitute.hellbender.utils.reference.ReferenceBases)2 BeforeClass (org.testng.annotations.BeforeClass)2 TwoBitBufferedReferenceSequenceFile (au.edu.wehi.idsv.picard.TwoBitBufferedReferenceSequenceFile)1