Search in sources :

Example 66 with GATKRead

use of org.broadinstitute.hellbender.utils.read.GATKRead in project gatk-protected by broadinstitute.

the class AnnotateVcfWithBamDepth method apply.

@Override
public void apply(final VariantContext vc, final ReadsContext readsContext, final ReferenceContext refContext, final FeatureContext fc) {
    final MutableInt depth = new MutableInt(0);
    for (final GATKRead read : readsContext) {
        if (!read.failsVendorQualityCheck() && !read.isDuplicate() && !read.isUnmapped() && read.getEnd() > read.getStart() && new SimpleInterval(read).contains(vc)) {
            depth.increment();
        }
    }
    vcfWriter.add(new VariantContextBuilder(vc).attribute(POOLED_BAM_DEPTH_ANNOTATION_NAME, depth.intValue()).make());
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) MutableInt(org.apache.commons.lang.mutable.MutableInt) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval)

Example 67 with GATKRead

use of org.broadinstitute.hellbender.utils.read.GATKRead in project gatk-protected by broadinstitute.

the class CalculateMixingFractions method apply.

@Override
public void apply(final VariantContext vc, final ReadsContext readsContext, final ReferenceContext refContext, final FeatureContext fc) {
    if (!isBiallelicSingletonHetSnp(vc)) {
        return;
    }
    final Optional<String> variantSample = StreamSupport.stream(vc.getGenotypes().spliterator(), false).filter(genotype -> genotype.isHet()).map(genotype -> genotype.getSampleName()).findFirst();
    if (!variantSample.isPresent()) {
        return;
    }
    final List<GATKRead> reads = new ArrayList<>();
    final List<Integer> offsets = new ArrayList<>();
    for (final GATKRead read : readsContext) {
        if (read.failsVendorQualityCheck()) {
            continue;
        }
        final AlignmentStateMachine asm = new AlignmentStateMachine(read);
        while (asm.stepForwardOnGenome() != null && asm.getGenomePosition() < vc.getStart()) {
        }
        if (asm.getGenomePosition() == vc.getStart()) {
            reads.add(read);
            offsets.add(asm.getReadOffset());
        }
    }
    final ReadPileup pileup = new ReadPileup(vc, reads, offsets);
    final byte altBase = vc.getAlternateAllele(0).getBases()[0];
    final long altCount = StreamSupport.stream(pileup.spliterator(), false).filter(pe -> pe.getBase() == altBase).count();
    final long totalCount = pileup.size();
    sampleCounts.get(variantSample.get()).addCounts(altCount, totalCount);
}
Also used : DocumentedFeature(org.broadinstitute.barclay.help.DocumentedFeature) htsjdk.variant.vcf(htsjdk.variant.vcf) CommandLineProgramProperties(org.broadinstitute.barclay.argparser.CommandLineProgramProperties) java.util(java.util) AlignmentStateMachine(org.broadinstitute.hellbender.utils.locusiterator.AlignmentStateMachine) ReadPileup(org.broadinstitute.hellbender.utils.pileup.ReadPileup) Argument(org.broadinstitute.barclay.argparser.Argument) StandardArgumentDefinitions(org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions) GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) Collectors(java.util.stream.Collectors) VariantProgramGroup(org.broadinstitute.hellbender.cmdline.programgroups.VariantProgramGroup) File(java.io.File) org.broadinstitute.hellbender.engine(org.broadinstitute.hellbender.engine) GATKProtectedVariantContextUtils(org.broadinstitute.hellbender.utils.GATKProtectedVariantContextUtils) VariantContext(htsjdk.variant.variantcontext.VariantContext) StreamSupport(java.util.stream.StreamSupport) MutableLong(org.apache.commons.lang.mutable.MutableLong) GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) ReadPileup(org.broadinstitute.hellbender.utils.pileup.ReadPileup) AlignmentStateMachine(org.broadinstitute.hellbender.utils.locusiterator.AlignmentStateMachine)

Example 68 with GATKRead

use of org.broadinstitute.hellbender.utils.read.GATKRead in project gatk-protected by broadinstitute.

the class AssemblyRegionTestDataSet method readList.

public List<GATKRead> readList() {
    if (readList == null) {
        final SAMFileHeader header = artificialSAMFileHeader();
        readList = new ArrayList<>(readCigars.length);
        final List<String> haplotypes = haplotypesStrings();
        int count = 0;
        for (final String descr : readCigars) {
            String sequence;
            if (descr.matches("^\\d+:\\d+:.+$")) {
                final String[] parts = descr.split(":");
                int allele = Integer.parseInt(parts[0]);
                int offset = Integer.parseInt(parts[1]);
                final String cigar = parts[2];
                final String base = allele == 0 ? reference : haplotypes.get(allele - 1);
                sequence = applyCigar(base, cigar, offset, false);
                final GATKRead samRecord = ArtificialReadUtils.createArtificialRead(header, "read_" + count, 0, 1, sequence.getBytes(), Arrays.copyOf(bq, sequence.length()));
                readList.add(samRecord);
            } else if (descr.matches("^\\*:\\d+:\\d+$")) {
                int readCount = Integer.parseInt(descr.split(":")[1]);
                int readLength = Integer.parseInt(descr.split(":")[2]);
                readList.addAll(generateSamRecords(haplotypes, readCount, readLength, header, count));
            } else {
                sequence = descr;
                final GATKRead samRecord = ArtificialReadUtils.createArtificialRead(header, "read_" + count, 0, 1, sequence.getBytes(), Arrays.copyOf(bq, sequence.length()));
                readList.add(samRecord);
            }
            count = readList.size();
        }
    }
    return readList;
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Example 69 with GATKRead

use of org.broadinstitute.hellbender.utils.read.GATKRead in project gatk-protected by broadinstitute.

the class AssemblyRegionTestDataSetUnitTest method testAssemblyRegionsDataSet.

@Test(dataProvider = "activeRegionTestDataSets")
@SuppressWarnings("fallthrough")
public void testAssemblyRegionsDataSet(final AssemblyRegionTestDataSet as, final int kmerSize, final int readLength, final String variation, final int readCount, final int regionSize, final byte bq, final byte iq, final byte dq) {
    Assert.assertNotNull(as);
    Assert.assertEquals(as.assemblyResultSet().getMaximumKmerSize(), kmerSize);
    final List<GATKRead> reads = as.readList();
    Assert.assertEquals(reads.size(), readCount);
    for (final GATKRead r : reads) {
        Assert.assertEquals(r.getLength(), readLength);
    }
    final List<Haplotype> haplotypes = as.haplotypeList();
    final List<Civar> haplotypeCivars = Civar.fromCharSequence(variation).optionalizeAll().unroll();
    Assert.assertEquals(haplotypes.size(), haplotypeCivars.size());
    Assert.assertTrue(haplotypeCivars.size() > 1);
    int variants = 0;
    for (int i = 0; i < variation.length(); i++) {
        final char c = variation.charAt(i);
        switch(c) {
            case 'W':
            case 'T':
            case 'C':
            case 'D':
            case 'I':
                variants++;
            default:
        }
    }
    Assert.assertEquals(haplotypes.size(), (int) Math.pow(2, variants));
    final Map<String, Integer> haplotypeNumberByString = new HashMap<>();
    for (int i = 0; i < haplotypes.size(); i++) {
        final Haplotype hap = haplotypes.get(i);
        final Civar civar = haplotypeCivars.get(i);
        Assert.assertEquals(hap.getBaseString(), civar.applyTo(as.getReference()));
        if (i == 0) {
            Assert.assertEquals(hap.getBaseString(), as.getReference());
        } else {
            Assert.assertNotEquals(hap.getBaseString(), as.getReference());
        }
        Assert.assertFalse(haplotypeNumberByString.containsKey(hap.getBaseString()));
        haplotypeNumberByString.put(hap.getBaseString(), i);
    }
    final int[] hapReadsNotInReference = new int[haplotypes.size()];
    for (int i = 0; i < readCount; i++) {
        final GATKRead r = as.readList().get(i);
        final int hapNumber = i % haplotypes.size();
        final int offset = i % (haplotypes.get(hapNumber).length() - readLength);
        Assert.assertEquals(getReadString(r), haplotypes.get(hapNumber).getBaseString().substring(offset, offset + readLength));
        if (!as.getReference().contains(getReadString(r))) {
            hapReadsNotInReference[hapNumber]++;
        }
    }
    Assert.assertEquals(hapReadsNotInReference[0], 0);
    for (int i = 1; i < hapReadsNotInReference.length; i++) {
        Assert.assertNotEquals(hapReadsNotInReference[i], 0);
    }
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) Haplotype(org.broadinstitute.hellbender.utils.haplotype.Haplotype) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 70 with GATKRead

use of org.broadinstitute.hellbender.utils.read.GATKRead in project gatk-protected by broadinstitute.

the class HaplotypeCallerGenotypingEngineUnitTest method testAddMiscellaneousAllele.

@Test(dataProvider = "AddMiscellaneousDataProvider", enabled = false)
public void testAddMiscellaneousAllele(final String readBases, final int readOffset, final String ref, final int refOffset, final String referenceAllele, final String[] alternatives, final double[] likelihoods, final double[] expected) {
    final byte baseQual = (byte) 30;
    final byte[] baseQuals = Utils.dupBytes(baseQual, readBases.length());
    final GATKRead read = ArtificialReadUtils.createArtificialRead(readBases.getBytes(), baseQuals, readBases.length() + "M");
    final Locatable loc = new SimpleInterval("20", refOffset, refOffset);
    final ReadPileup pileup = new ReadPileup(loc, Collections.singletonList(read), readOffset);
    final VariantContextBuilder vcb = new VariantContextBuilder();
    final GenotypeBuilder gb = new GenotypeBuilder();
    final List<String> alleleStrings = new ArrayList<>(1 + alternatives.length);
    alleleStrings.add(referenceAllele);
    alleleStrings.addAll(Arrays.asList(alternatives));
    gb.AD(new int[] { 1 });
    gb.DP(1);
    gb.PL(likelihoods);
    vcb.alleles(alleleStrings);
    vcb.loc("20", refOffset, refOffset + referenceAllele.length() - 1);
    vcb.genotypes(gb.make());
    final VariantContext vc = vcb.make();
    // GenotypingEngine.addMiscellaneousAllele(vc,pileup,ref.getBytes(),0);
    final VariantContext updatedVc = null;
    final GenotypeLikelihoods updatedLikelihoods = updatedVc.getGenotype(0).getLikelihoods();
    Assert.assertEquals(updatedLikelihoods.getAsVector().length, expected.length);
    final double[] updatedLikelihoodsArray = updatedVc.getGenotype(0).getLikelihoods().getAsVector();
    for (int i = 0; i < updatedLikelihoodsArray.length; i++) {
        Assert.assertEquals(updatedLikelihoodsArray[i], expected[i], 0.0001);
    }
    Allele altAllele = null;
    for (final Allele allele : updatedVc.getAlleles()) if (allele.isSymbolic() && allele.getBaseString().equals(GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE_NAME))
        altAllele = allele;
    Assert.assertNotNull(altAllele);
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) ReadPileup(org.broadinstitute.hellbender.utils.pileup.ReadPileup) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Locatable(htsjdk.samtools.util.Locatable) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Aggregations

GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)457 Test (org.testng.annotations.Test)286 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)163 SAMFileHeader (htsjdk.samtools.SAMFileHeader)87 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)59 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)40 ArrayList (java.util.ArrayList)34 Collectors (java.util.stream.Collectors)34 List (java.util.List)30 Cigar (htsjdk.samtools.Cigar)29 File (java.io.File)28 java.util (java.util)28 DataProvider (org.testng.annotations.DataProvider)28 JavaRDD (org.apache.spark.api.java.JavaRDD)26 Haplotype (org.broadinstitute.hellbender.utils.haplotype.Haplotype)26 Assert (org.testng.Assert)25 ReadPileup (org.broadinstitute.hellbender.utils.pileup.ReadPileup)24 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)22 Argument (org.broadinstitute.barclay.argparser.Argument)18 UserException (org.broadinstitute.hellbender.exceptions.UserException)18