Search in sources :

Example 21 with SAMSequenceRecord

use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.

the class PlotACNVResults method doWork.

@Override
protected Object doWork() {
    checkRegularReadableUserFiles();
    //get sample name from input files (consistency check is performed)
    final String sampleName = getSampleName();
    //load contig names and lengths from the sequence dictionary into a LinkedHashMap
    final SAMSequenceDictionary sequenceDictionary = ReferenceUtils.loadFastaDictionary(sequenceDictionaryFile);
    Utils.validateArg(sequenceDictionary.getSequences().stream().map(SAMSequenceRecord::getSequenceName).noneMatch(n -> n.contains(CONTIG_DELIMITER)), String.format("Contig names cannot contain \"%s\".", CONTIG_DELIMITER));
    final Map<String, Integer> contigLengthMap = sequenceDictionary.getSequences().stream().filter(s -> s.getSequenceLength() >= minContigLength).collect(Collectors.toMap(SAMSequenceRecord::getSequenceName, SAMSequenceRecord::getSequenceLength, (c, l) -> {
        throw new IllegalArgumentException(String.format("Duplicate contig in sequence dictionary: %s", c));
    }, LinkedHashMap::new));
    Utils.validateArg(contigLengthMap.size() > 0, "There must be at least one contig above the threshold length in the sequence dictionary.");
    logger.info("Contigs above length threshold: " + contigLengthMap.toString());
    //check that contigs in input files are present in sequence dictionary and that data points are valid given lengths
    validateContigs(contigLengthMap);
    //generate the plots
    final List<String> contigNames = new ArrayList<>(contigLengthMap.keySet());
    final List<Integer> contigLengths = new ArrayList<>(contigLengthMap.values());
    writeSegmentedAlleleFractionPlot(sampleName, contigNames, contigLengths);
    return "SUCCESS";
}
Also used : DocumentedFeature(org.broadinstitute.barclay.help.DocumentedFeature) CommandLineProgramProperties(org.broadinstitute.barclay.argparser.CommandLineProgramProperties) Resource(org.broadinstitute.hellbender.utils.io.Resource) java.util(java.util) IOUtils(org.broadinstitute.hellbender.utils.io.IOUtils) CopyNumberProgramGroup(org.broadinstitute.hellbender.cmdline.programgroups.CopyNumberProgramGroup) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Argument(org.broadinstitute.barclay.argparser.Argument) org.broadinstitute.hellbender.cmdline(org.broadinstitute.hellbender.cmdline) AllelicCount(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount) org.broadinstitute.hellbender.tools.exome(org.broadinstitute.hellbender.tools.exome) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) File(java.io.File) UserException(org.broadinstitute.hellbender.exceptions.UserException) ReferenceUtils(org.broadinstitute.hellbender.utils.reference.ReferenceUtils) Utils(org.broadinstitute.hellbender.utils.Utils) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) AllelicCountCollection(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection) RScriptExecutor(org.broadinstitute.hellbender.utils.R.RScriptExecutor) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary)

Example 22 with SAMSequenceRecord

use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.

the class PlotSegmentedCopyRatio method doWork.

@Override
protected Object doWork() {
    checkRegularReadableUserFiles();
    //get sample name from input files (consistency check is performed)
    final String sampleName = getSampleName();
    //load contig names and lengths from the sequence dictionary into a LinkedHashMap
    final SAMSequenceDictionary sequenceDictionary = ReferenceUtils.loadFastaDictionary(sequenceDictionaryFile);
    Utils.validateArg(sequenceDictionary.getSequences().stream().map(SAMSequenceRecord::getSequenceName).noneMatch(n -> n.contains(CONTIG_DELIMITER)), String.format("Contig names cannot contain \"%s\".", CONTIG_DELIMITER));
    final Map<String, Integer> contigLengthMap = sequenceDictionary.getSequences().stream().filter(s -> s.getSequenceLength() >= minContigLength).collect(Collectors.toMap(SAMSequenceRecord::getSequenceName, SAMSequenceRecord::getSequenceLength, (c, l) -> {
        throw new IllegalArgumentException(String.format("Duplicate contig in sequence dictionary: %s", c));
    }, LinkedHashMap::new));
    Utils.validateArg(contigLengthMap.size() > 0, "There must be at least one contig above the threshold length in the sequence dictionary.");
    logger.info("Contigs above length threshold: " + contigLengthMap.toString());
    //check that contigs in input files are present in sequence dictionary and that data points are valid given lengths
    validateContigs(contigLengthMap);
    //generate the plots
    final List<String> contigNames = new ArrayList<>(contigLengthMap.keySet());
    final List<Integer> contigLengths = new ArrayList<>(contigLengthMap.values());
    writeSegmentedCopyRatioPlots(sampleName, contigNames, contigLengths);
    return "SUCCESS";
}
Also used : DocumentedFeature(org.broadinstitute.barclay.help.DocumentedFeature) CommandLineProgramProperties(org.broadinstitute.barclay.argparser.CommandLineProgramProperties) Resource(org.broadinstitute.hellbender.utils.io.Resource) java.util(java.util) IOUtils(org.broadinstitute.hellbender.utils.io.IOUtils) CopyNumberProgramGroup(org.broadinstitute.hellbender.cmdline.programgroups.CopyNumberProgramGroup) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Argument(org.broadinstitute.barclay.argparser.Argument) org.broadinstitute.hellbender.cmdline(org.broadinstitute.hellbender.cmdline) org.broadinstitute.hellbender.tools.exome(org.broadinstitute.hellbender.tools.exome) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) File(java.io.File) UserException(org.broadinstitute.hellbender.exceptions.UserException) ReferenceUtils(org.broadinstitute.hellbender.utils.reference.ReferenceUtils) Utils(org.broadinstitute.hellbender.utils.Utils) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) RScriptExecutor(org.broadinstitute.hellbender.utils.R.RScriptExecutor) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary)

Example 23 with SAMSequenceRecord

use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.

the class IntervalUtils method intervalIsOnDictionaryContig.

/**
     * Determines whether the provided interval is within the bounds of its assigned contig according to the provided dictionary
     *
     * @param interval interval to check
     * @param dictionary dictionary to use to validate contig bounds
     * @return true if the interval's contig exists in the dictionary, and the interval is within its bounds, otherwise false
     */
public static boolean intervalIsOnDictionaryContig(final SimpleInterval interval, final SAMSequenceDictionary dictionary) {
    Utils.nonNull(interval);
    Utils.nonNull(dictionary);
    final SAMSequenceRecord contigRecord = dictionary.getSequence(interval.getContig());
    if (contigRecord == null) {
        return false;
    }
    return interval.getEnd() <= contigRecord.getSequenceLength();
}
Also used : SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord)

Example 24 with SAMSequenceRecord

use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.

the class SequenceDictionaryUtils method commonContigsAreInSameRelativeOrder.

/**
     * Returns true if the common contigs in dict1 and dict2 are in the same relative order, without regard to
     * absolute index position. This is accomplished by getting the common contigs in both dictionaries, sorting
     * these according to their indices, and then walking through the sorted list to ensure that each ordered contig
     * is equivalent
     *
     * @param commonContigs names of the contigs common to both dictionaries
     * @param dict1 first SAMSequenceDictionary
     * @param dict2 second SAMSequenceDictionary
     * @return true if the common contigs occur in the same relative order in both dict1 and dict2, otherwise false
     */
private static boolean commonContigsAreInSameRelativeOrder(Set<String> commonContigs, SAMSequenceDictionary dict1, SAMSequenceDictionary dict2) {
    final List<SAMSequenceRecord> list1 = getSequencesOfName(commonContigs, dict1);
    final List<SAMSequenceRecord> list2 = getSequencesOfName(commonContigs, dict2);
    list1.sort(SEQUENCE_INDEX_ORDER);
    list2.sort(SEQUENCE_INDEX_ORDER);
    for (int i = 0; i < list1.size(); i++) {
        SAMSequenceRecord elt1 = list1.get(i);
        SAMSequenceRecord elt2 = list2.get(i);
        if (!elt1.getSequenceName().equals(elt2.getSequenceName()))
            return false;
    }
    return true;
}
Also used : SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord)

Example 25 with SAMSequenceRecord

use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.

the class SequenceDictionaryUtils method findDisequalCommonContigs.

/**
     * Returns a List(x,y) that contains two disequal sequence records among the common contigs in both dicts.  Returns
     * null if all common contigs are equivalent
     *
     * @param commonContigs
     * @param dict1
     * @param dict2
     * @return
     */
private static List<SAMSequenceRecord> findDisequalCommonContigs(Set<String> commonContigs, SAMSequenceDictionary dict1, SAMSequenceDictionary dict2) {
    for (String name : commonContigs) {
        SAMSequenceRecord elt1 = dict1.getSequence(name);
        SAMSequenceRecord elt2 = dict2.getSequence(name);
        if (!sequenceRecordsAreEquivalent(elt1, elt2))
            return Arrays.asList(elt1, elt2);
    }
    return null;
}
Also used : SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord)

Aggregations

SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)72 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)35 Test (org.testng.annotations.Test)26 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)24 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)13 File (java.io.File)10 SAMFileHeader (htsjdk.samtools.SAMFileHeader)9 UserException (org.broadinstitute.hellbender.exceptions.UserException)8 DataProvider (org.testng.annotations.DataProvider)8 IndexedFastaSequenceFile (htsjdk.samtools.reference.IndexedFastaSequenceFile)7 GATKException (org.broadinstitute.hellbender.exceptions.GATKException)7 IOException (java.io.IOException)6 ArrayList (java.util.ArrayList)6 QueryInterval (htsjdk.samtools.QueryInterval)5 Allele (htsjdk.variant.variantcontext.Allele)4 VariantContext (htsjdk.variant.variantcontext.VariantContext)4 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)4 java.util (java.util)4 Collectors (java.util.stream.Collectors)4 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)4