Search in sources :

Example 31 with SAMSequenceRecord

use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.

the class PlotSegmentedCopyRatio method doWork.

@Override
protected Object doWork() {
    checkRegularReadableUserFiles();
    //get sample name from input files (consistency check is performed)
    final String sampleName = getSampleName();
    //load contig names and lengths from the sequence dictionary into a LinkedHashMap
    final SAMSequenceDictionary sequenceDictionary = ReferenceUtils.loadFastaDictionary(sequenceDictionaryFile);
    Utils.validateArg(sequenceDictionary.getSequences().stream().map(SAMSequenceRecord::getSequenceName).noneMatch(n -> n.contains(CONTIG_DELIMITER)), String.format("Contig names cannot contain \"%s\".", CONTIG_DELIMITER));
    final Map<String, Integer> contigLengthMap = sequenceDictionary.getSequences().stream().filter(s -> s.getSequenceLength() >= minContigLength).collect(Collectors.toMap(SAMSequenceRecord::getSequenceName, SAMSequenceRecord::getSequenceLength, (c, l) -> {
        throw new IllegalArgumentException(String.format("Duplicate contig in sequence dictionary: %s", c));
    }, LinkedHashMap::new));
    Utils.validateArg(contigLengthMap.size() > 0, "There must be at least one contig above the threshold length in the sequence dictionary.");
    logger.info("Contigs above length threshold: " + contigLengthMap.toString());
    //check that contigs in input files are present in sequence dictionary and that data points are valid given lengths
    validateContigs(contigLengthMap);
    //generate the plots
    final List<String> contigNames = new ArrayList<>(contigLengthMap.keySet());
    final List<Integer> contigLengths = new ArrayList<>(contigLengthMap.values());
    writeSegmentedCopyRatioPlots(sampleName, contigNames, contigLengths);
    return "SUCCESS";
}
Also used : DocumentedFeature(org.broadinstitute.barclay.help.DocumentedFeature) CommandLineProgramProperties(org.broadinstitute.barclay.argparser.CommandLineProgramProperties) Resource(org.broadinstitute.hellbender.utils.io.Resource) java.util(java.util) IOUtils(org.broadinstitute.hellbender.utils.io.IOUtils) CopyNumberProgramGroup(org.broadinstitute.hellbender.cmdline.programgroups.CopyNumberProgramGroup) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Argument(org.broadinstitute.barclay.argparser.Argument) org.broadinstitute.hellbender.cmdline(org.broadinstitute.hellbender.cmdline) org.broadinstitute.hellbender.tools.exome(org.broadinstitute.hellbender.tools.exome) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) File(java.io.File) UserException(org.broadinstitute.hellbender.exceptions.UserException) ReferenceUtils(org.broadinstitute.hellbender.utils.reference.ReferenceUtils) Utils(org.broadinstitute.hellbender.utils.Utils) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) RScriptExecutor(org.broadinstitute.hellbender.utils.R.RScriptExecutor) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary)

Example 32 with SAMSequenceRecord

use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.

the class MultiVariantDataSource method validateAllSequenceDictionaries.

/**
     * GATKTool only validates individual feature dictionaries against the reference dictionary, so cross-validate
     * all of the dictionaries against each other here by ensuring that each contig found in any dictionary has the
     * same length (and md5, when a value is present for that contig in both dictionaries) in every other dictionary
     * in which its present.
     */
private void validateAllSequenceDictionaries() {
    final Map<String, FeatureDataSource<VariantContext>> contigMap = new HashMap<>();
    featureDataSources.forEach(ds -> {
        final SAMSequenceDictionary dictionary = ds.getSequenceDictionary();
        if (dictionary == null) {
            logger.warn("A sequence dictionary is required for each input when using multiple inputs, and one could" + " not be obtained for feature input: " + ds.getName() + ". The input may not exist or may not have a valid header");
        } else {
            dictionary.getSequences().forEach(sourceSequence -> {
                final String sourceSequenceName = sourceSequence.getSequenceName();
                final FeatureDataSource<VariantContext> previousDataSource = contigMap.getOrDefault(sourceSequenceName, null);
                if (previousDataSource != null) {
                    final SAMSequenceDictionary previousDictionary = previousDataSource.getSequenceDictionary();
                    final SAMSequenceRecord previousSequence = previousDictionary.getSequence(sourceSequenceName);
                    validateSequenceDictionaryRecords(ds.getName(), dictionary, sourceSequence, previousDataSource.getName(), previousDictionary, previousSequence);
                } else {
                    contigMap.put(sourceSequenceName, ds);
                }
            });
        }
    });
}
Also used : VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary)

Example 33 with SAMSequenceRecord

use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.

the class ReorderSam method doWork.

@Override
protected Object doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
    IOUtil.assertFileIsWritable(OUTPUT);
    final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT);
    ReferenceSequenceFile reference = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE);
    SAMSequenceDictionary refDict = reference.getSequenceDictionary();
    if (refDict == null) {
        CloserUtil.close(in);
        throw new UserException("No reference sequence dictionary found. Aborting. " + "You can create a sequence dictionary for the reference fasta using CreateSequenceDictionary.jar.");
    }
    printDictionary("SAM/BAM file", in.getFileHeader().getSequenceDictionary());
    printDictionary("Reference", refDict);
    Map<Integer, Integer> newOrder = buildSequenceDictionaryMap(refDict, in.getFileHeader().getSequenceDictionary());
    // has to be after we create the newOrder
    SAMFileHeader outHeader = ReadUtils.cloneSAMFileHeader(in.getFileHeader());
    outHeader.setSequenceDictionary(refDict);
    logger.info("Writing reads...");
    if (in.hasIndex()) {
        try (final SAMFileWriter out = createSAMWriter(OUTPUT, REFERENCE_SEQUENCE, outHeader, true)) {
            // write the reads in contig order
            for (final SAMSequenceRecord contig : refDict.getSequences()) {
                final SAMRecordIterator it = in.query(contig.getSequenceName(), 0, 0, false);
                writeReads(out, it, newOrder, contig.getSequenceName());
            }
            // don't forget the unmapped reads
            writeReads(out, in.queryUnmapped(), newOrder, "unmapped");
        }
    } else {
        try (final SAMFileWriter out = createSAMWriter(OUTPUT, REFERENCE_SEQUENCE, outHeader, false)) {
            writeReads(out, in.iterator(), newOrder, "All reads");
        }
    }
    // cleanup
    CloserUtil.close(in);
    return null;
}
Also used : SamReader(htsjdk.samtools.SamReader) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMFileWriter(htsjdk.samtools.SAMFileWriter) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) UserException(org.broadinstitute.hellbender.exceptions.UserException) SAMFileHeader(htsjdk.samtools.SAMFileHeader) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary)

Example 34 with SAMSequenceRecord

use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.

the class UpdateVCFSequenceDictionary method apply.

@Override
public void apply(final VariantContext vc, final ReadsContext readsContext, final ReferenceContext ref, final FeatureContext featureContext) {
    // Validate each variant against the source dictionary manually
    SAMSequenceRecord samSeqRec = sourceDictionary.getSequence(vc.getContig());
    if (samSeqRec == null) {
        throw new CommandLineException.BadArgumentValue(String.format("The input variant file contains a variant (ID: \"%s\") with a reference to a sequence (\"%s\") " + "that is not present in the provided dictionary", vc.getID(), vc.getContig()));
    } else if (vc.getEnd() > samSeqRec.getSequenceLength()) {
        throw new CommandLineException.BadArgumentValue(String.format("The input variant file contains a variant (ID: \"%s\") with a reference to a sequence (\"%s\") " + "that ends at a position (%d) that exceeds the length of that sequence (%d) in the provided dictionary", vc.getID(), vc.getContig(), vc.getEnd(), samSeqRec.getSequenceLength()));
    }
    vcfWriter.add(vc);
}
Also used : SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) CommandLineException(org.broadinstitute.barclay.argparser.CommandLineException)

Example 35 with SAMSequenceRecord

use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.

the class IndexUtils method getSamSequenceDictionaryFromIndex.

private static SAMSequenceDictionary getSamSequenceDictionaryFromIndex(final Index index) {
    final List<String> seqNames = index.getSequenceNames();
    if (seqNames == null || seqNames.isEmpty()) {
        return null;
    }
    final SAMSequenceDictionary dict = new SAMSequenceDictionary();
    //use UNKNOWN_SEQUENCE_LENGTH to indicate contigs that will not be compared by length (see SequenceDictionaryUtils.sequenceRecordsAreEquivalent)
    seqNames.forEach(seqName -> dict.addSequence(new SAMSequenceRecord(seqName, SAMSequenceRecord.UNKNOWN_SEQUENCE_LENGTH)));
    return dict;
}
Also used : SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary)

Aggregations

SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)72 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)35 Test (org.testng.annotations.Test)26 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)24 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)13 File (java.io.File)10 SAMFileHeader (htsjdk.samtools.SAMFileHeader)9 UserException (org.broadinstitute.hellbender.exceptions.UserException)8 DataProvider (org.testng.annotations.DataProvider)8 IndexedFastaSequenceFile (htsjdk.samtools.reference.IndexedFastaSequenceFile)7 GATKException (org.broadinstitute.hellbender.exceptions.GATKException)7 IOException (java.io.IOException)6 ArrayList (java.util.ArrayList)6 QueryInterval (htsjdk.samtools.QueryInterval)5 Allele (htsjdk.variant.variantcontext.Allele)4 VariantContext (htsjdk.variant.variantcontext.VariantContext)4 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)4 java.util (java.util)4 Collectors (java.util.stream.Collectors)4 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)4