Search in sources :

Example 71 with SAMSequenceDictionary

use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.

the class SimpleIntervalUnitTest method expandWithinContigTestData.

@DataProvider(name = "ExpandWithinContigData")
public Object[][] expandWithinContigTestData() {
    final int CONTIG_LENGTH = 10000;
    final SAMSequenceDictionary dictionary = new SAMSequenceDictionary(Arrays.asList(new SAMSequenceRecord("1", CONTIG_LENGTH)));
    return new Object[][] { { new SimpleInterval("1", 5, 10), 0, CONTIG_LENGTH, dictionary, new SimpleInterval("1", 5, 10) }, { new SimpleInterval("1", 5, 10), 1, CONTIG_LENGTH, dictionary, new SimpleInterval("1", 4, 11) }, { new SimpleInterval("1", 1, 10), 10, CONTIG_LENGTH, dictionary, new SimpleInterval("1", 1, 20) }, { new SimpleInterval("1", 10, 20), 10, CONTIG_LENGTH, dictionary, new SimpleInterval("1", 1, 30) }, { new SimpleInterval("1", 10, 20), 9, CONTIG_LENGTH, dictionary, new SimpleInterval("1", 1, 29) }, { new SimpleInterval("1", 30, 40), 5, CONTIG_LENGTH, dictionary, new SimpleInterval("1", 25, 45) }, { new SimpleInterval("1", CONTIG_LENGTH - 10, CONTIG_LENGTH), 10, CONTIG_LENGTH, dictionary, new SimpleInterval("1", CONTIG_LENGTH - 20, CONTIG_LENGTH) }, { new SimpleInterval("1", CONTIG_LENGTH - 20, CONTIG_LENGTH - 10), 11, CONTIG_LENGTH, dictionary, new SimpleInterval("1", CONTIG_LENGTH - 31, CONTIG_LENGTH) }, { new SimpleInterval("1", CONTIG_LENGTH - 20, CONTIG_LENGTH - 10), 10, CONTIG_LENGTH, dictionary, new SimpleInterval("1", CONTIG_LENGTH - 30, CONTIG_LENGTH) } };
}
Also used : SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) DataProvider(org.testng.annotations.DataProvider)

Example 72 with SAMSequenceDictionary

use of htsjdk.samtools.SAMSequenceDictionary in project gatk-protected by broadinstitute.

the class PlotSegmentedCopyRatio method doWork.

@Override
protected Object doWork() {
    checkRegularReadableUserFiles();
    //get sample name from input files (consistency check is performed)
    final String sampleName = getSampleName();
    //load contig names and lengths from the sequence dictionary into a LinkedHashMap
    final SAMSequenceDictionary sequenceDictionary = ReferenceUtils.loadFastaDictionary(sequenceDictionaryFile);
    Utils.validateArg(sequenceDictionary.getSequences().stream().map(SAMSequenceRecord::getSequenceName).noneMatch(n -> n.contains(CONTIG_DELIMITER)), String.format("Contig names cannot contain \"%s\".", CONTIG_DELIMITER));
    final Map<String, Integer> contigLengthMap = sequenceDictionary.getSequences().stream().filter(s -> s.getSequenceLength() >= minContigLength).collect(Collectors.toMap(SAMSequenceRecord::getSequenceName, SAMSequenceRecord::getSequenceLength, (c, l) -> {
        throw new IllegalArgumentException(String.format("Duplicate contig in sequence dictionary: %s", c));
    }, LinkedHashMap::new));
    Utils.validateArg(contigLengthMap.size() > 0, "There must be at least one contig above the threshold length in the sequence dictionary.");
    logger.info("Contigs above length threshold: " + contigLengthMap.toString());
    //check that contigs in input files are present in sequence dictionary and that data points are valid given lengths
    validateContigs(contigLengthMap);
    //generate the plots
    final List<String> contigNames = new ArrayList<>(contigLengthMap.keySet());
    final List<Integer> contigLengths = new ArrayList<>(contigLengthMap.values());
    writeSegmentedCopyRatioPlots(sampleName, contigNames, contigLengths);
    return "SUCCESS";
}
Also used : DocumentedFeature(org.broadinstitute.barclay.help.DocumentedFeature) CommandLineProgramProperties(org.broadinstitute.barclay.argparser.CommandLineProgramProperties) Resource(org.broadinstitute.hellbender.utils.io.Resource) java.util(java.util) IOUtils(org.broadinstitute.hellbender.utils.io.IOUtils) CopyNumberProgramGroup(org.broadinstitute.hellbender.cmdline.programgroups.CopyNumberProgramGroup) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Argument(org.broadinstitute.barclay.argparser.Argument) org.broadinstitute.hellbender.cmdline(org.broadinstitute.hellbender.cmdline) org.broadinstitute.hellbender.tools.exome(org.broadinstitute.hellbender.tools.exome) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) File(java.io.File) UserException(org.broadinstitute.hellbender.exceptions.UserException) ReferenceUtils(org.broadinstitute.hellbender.utils.reference.ReferenceUtils) Utils(org.broadinstitute.hellbender.utils.Utils) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) RScriptExecutor(org.broadinstitute.hellbender.utils.R.RScriptExecutor) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary)

Example 73 with SAMSequenceDictionary

use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.

the class TandemRepeatUnitTest method testUsingVC.

@Test
public void testUsingVC() {
    // - [ref] / ATC from 20-20
    final String insLoc = "chr1";
    final int insLocStart = 2;
    final int insLocStop = 2;
    final byte[] refBytes = "GTATCATCATCGGA".getBytes();
    final Allele nullR = Allele.create("A", true);
    final Allele atc = Allele.create("AATC", false);
    // A*,ATC, context = ATC ATC ATC : (ATC)3 -> (ATC)4
    final VariantContext vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStop, Arrays.asList(nullR, atc)).make();
    // we test that the interval from which the ReferenceContext is constructed does not need to exactly overlap
    // the VariantContext.  The annotation should be able to handle this.
    final SimpleInterval interval = new SimpleInterval(insLoc, insLocStart + 3, insLocStop + 4);
    final SimpleInterval interval1 = new SimpleInterval(insLoc, 1, refBytes.length);
    final ReferenceBases ref1 = new ReferenceBases(refBytes, interval1);
    final SAMSequenceDictionary dict = new SAMSequenceDictionary(Arrays.asList(new SAMSequenceRecord(insLoc, refBytes.length)));
    final ReferenceContext ref = new ReferenceContext(ReferenceDataSource.of(ref1, dict), interval, 20, 20);
    final InfoFieldAnnotation ann = new TandemRepeat();
    final Map<String, Object> a = ann.annotate(ref, vc, null);
    Assert.assertEquals(a.size(), 3);
    Assert.assertEquals(a.get(GATKVCFConstants.STR_PRESENT_KEY), true);
    Assert.assertEquals(a.get(GATKVCFConstants.REPEAT_UNIT_KEY), "ATC");
    //3 repeats in the reference, 4 in the variant
    Assert.assertEquals(a.get(GATKVCFConstants.REPEATS_PER_ALLELE_KEY), Arrays.asList(3, 4));
}
Also used : VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) ReferenceBases(org.broadinstitute.hellbender.utils.reference.ReferenceBases) Allele(htsjdk.variant.variantcontext.Allele) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) ReferenceContext(org.broadinstitute.hellbender.engine.ReferenceContext) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 74 with SAMSequenceDictionary

use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.

the class TandemRepeatUnitTest method testUsingVCNoRepeat.

@Test
public void testUsingVCNoRepeat() {
    // - [ref] / ATC from 20-20
    final String insLoc = "chr1";
    final int insLocStart = 6;
    final int insLocStop = 6;
    final byte[] refBytes = "GTATCATCATCGGA".getBytes();
    final Allele nullR = Allele.create("A", true);
    final Allele atc = Allele.create("AATC", false);
    // A*,ATC, context = ATC ATC ATC : (ATC)3 -> (ATC)4
    final VariantContext vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStop, Arrays.asList(nullR, atc)).make();
    final SimpleInterval interval = new SimpleInterval(insLoc, insLocStart, insLocStop);
    final SimpleInterval interval1 = new SimpleInterval(insLoc, 1, refBytes.length);
    final ReferenceBases ref1 = new ReferenceBases(refBytes, interval1);
    final SAMSequenceDictionary dict = new SAMSequenceDictionary(Arrays.asList(new SAMSequenceRecord(insLoc, refBytes.length)));
    final ReferenceContext ref = new ReferenceContext(ReferenceDataSource.of(ref1, dict), interval, 0, 20);
    final InfoFieldAnnotation ann = new TandemRepeat();
    final Map<String, Object> a = ann.annotate(ref, vc, null);
    Assert.assertTrue(a.isEmpty());
}
Also used : VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) ReferenceBases(org.broadinstitute.hellbender.utils.reference.ReferenceBases) Allele(htsjdk.variant.variantcontext.Allele) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) ReferenceContext(org.broadinstitute.hellbender.engine.ReferenceContext) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 75 with SAMSequenceDictionary

use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.

the class TandemRepeatUnitTest method testUsingVCNotIndel.

@Test
public void testUsingVCNotIndel() {
    // - [ref] / ATC from 20-20
    String insLoc = "chr1";
    int insLocStart = 2;
    int insLocStop = 2;
    byte[] refBytes = "GTATCATCATCGGA".getBytes();
    Allele nullR = Allele.create("A", true);
    Allele atc = Allele.create("C", false);
    // A*,ATC, context = ATC ATC ATC : (ATC)3 -> (ATC)4
    VariantContext vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStop, Arrays.asList(nullR, atc)).make();
    final SimpleInterval interval = new SimpleInterval("chr1", insLocStart, insLocStop);
    final String contigName = "chr1";
    final SimpleInterval interval1 = new SimpleInterval(contigName, 1, refBytes.length);
    final ReferenceBases ref1 = new ReferenceBases(refBytes, interval1);
    final SAMSequenceDictionary dict = new SAMSequenceDictionary(Arrays.asList(new SAMSequenceRecord(contigName, refBytes.length)));
    final ReferenceContext ref = new ReferenceContext(ReferenceDataSource.of(ref1, dict), interval, 0, 20);
    final InfoFieldAnnotation ann = new TandemRepeat();
    final Map<String, Object> a = ann.annotate(ref, vc, null);
    Assert.assertTrue(a.isEmpty());
}
Also used : VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) ReferenceBases(org.broadinstitute.hellbender.utils.reference.ReferenceBases) Allele(htsjdk.variant.variantcontext.Allele) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) ReferenceContext(org.broadinstitute.hellbender.engine.ReferenceContext) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Aggregations

SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)110 Test (org.testng.annotations.Test)41 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)37 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)37 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)35 File (java.io.File)31 UserException (org.broadinstitute.hellbender.exceptions.UserException)24 VariantContext (htsjdk.variant.variantcontext.VariantContext)23 Argument (org.broadinstitute.barclay.argparser.Argument)21 Collectors (java.util.stream.Collectors)20 ReferenceMultiSource (org.broadinstitute.hellbender.engine.datasources.ReferenceMultiSource)20 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)18 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)17 VCFHeader (htsjdk.variant.vcf.VCFHeader)16 IntervalUtils (org.broadinstitute.hellbender.utils.IntervalUtils)16 SAMFileHeader (htsjdk.samtools.SAMFileHeader)14 List (java.util.List)14 JavaRDD (org.apache.spark.api.java.JavaRDD)14 Broadcast (org.apache.spark.broadcast.Broadcast)12 StreamSupport (java.util.stream.StreamSupport)11