use of htsjdk.variant.variantcontext.Allele in project gatk-protected by broadinstitute.
the class XHMMSegmentGenotyperIntegrationTest method assertVariantsInfoFieldsAreConsistent.
private void assertVariantsInfoFieldsAreConsistent(final List<VariantContext> variants) {
for (final VariantContext variant : variants) {
final int expectedAN = variant.getGenotypes().size();
final int[] expectedAC = new int[CopyNumberTriState.values().length];
final List<Allele> alleles = variant.getAlleles();
for (final Genotype genotype : variant.getGenotypes()) {
Assert.assertEquals(genotype.getAlleles().size(), 1);
final int alleleIndex = alleles.indexOf(genotype.getAllele(0));
Assert.assertTrue(alleleIndex >= 0);
expectedAC[alleleIndex]++;
}
Assert.assertEquals(variant.getAlleles(), CopyNumberTriStateAllele.ALL_ALLELES);
Assert.assertTrue(variant.hasAttribute(XHMMSegmentGenotyper.NUMBER_OF_TARGETS_KEY));
Assert.assertTrue(variant.hasAttribute(VCFConstants.ALLELE_COUNT_KEY));
Assert.assertTrue(variant.hasAttribute(VCFConstants.END_KEY));
Assert.assertTrue(variant.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY));
Assert.assertTrue(variant.hasAttribute(VCFConstants.ALLELE_NUMBER_KEY));
final int expectedANAnotherWay = IntStream.of(expectedAC).sum();
Assert.assertEquals(expectedANAnotherWay, expectedAN);
Assert.assertEquals(variant.getAttributeAsInt(VCFConstants.ALLELE_NUMBER_KEY, -1), expectedAN);
final double[] expectedAF = IntStream.of(expectedAC).mapToDouble(c -> c / (double) expectedAN).toArray();
final double[] observedAFWithoutRef = variant.getAttributeAsList(VCFConstants.ALLELE_FREQUENCY_KEY).stream().mapToDouble(o -> Double.parseDouble(String.valueOf(o))).toArray();
Assert.assertEquals(observedAFWithoutRef.length, expectedAF.length - 1);
for (int i = 0; i < observedAFWithoutRef.length; i++) {
Assert.assertEquals(observedAFWithoutRef[i], expectedAF[i + 1], 0.001);
}
final int[] observedACWithoutRef = variant.getAttributeAsList(VCFConstants.ALLELE_COUNT_KEY).stream().mapToInt(o -> Integer.parseInt(String.valueOf(o))).toArray();
Assert.assertEquals(observedACWithoutRef.length, expectedAC.length - 1);
for (int i = 0; i < observedACWithoutRef.length; i++) {
Assert.assertEquals(observedACWithoutRef[i], expectedAC[i + 1]);
}
Assert.assertEquals(variant.getAttributeAsInt(XHMMSegmentGenotyper.NUMBER_OF_TARGETS_KEY, -1), XHMMSegmentCallerBaseIntegrationTest.REALISTIC_TARGETS.targetCount(variant));
}
}
use of htsjdk.variant.variantcontext.Allele in project gatk-protected by broadinstitute.
the class XHMMSegmentGenotyperIntegrationTest method assertVariantSegmentsAreCovered.
private void assertVariantSegmentsAreCovered(final List<VariantContext> variants, final List<HiddenStateSegmentRecord<CopyNumberTriState, Target>> variantSegments) {
for (final HiddenStateSegmentRecord<CopyNumberTriState, Target> variantSegment : variantSegments) {
final Optional<VariantContext> match = variants.stream().filter(vc -> new SimpleInterval(vc).equals(variantSegment.getSegment().getInterval())).findFirst();
Assert.assertTrue(match.isPresent());
final VariantContext matchedVariant = match.get();
final Genotype genotype = matchedVariant.getGenotype(variantSegment.getSampleName());
final String discovery = genotype.getAnyAttribute(XHMMSegmentGenotyper.DISCOVERY_KEY).toString();
Assert.assertTrue(discovery.equals(XHMMSegmentGenotyper.DISCOVERY_TRUE));
final CopyNumberTriState call = variantSegment.getSegment().getCall();
final List<Allele> gt = genotype.getAlleles();
Assert.assertEquals(gt.size(), 1);
// The call may not be the same for case where the event-quality is relatively low.
if (variantSegment.getSegment().getEventQuality() > 10) {
Assert.assertEquals(CopyNumberTriStateAllele.valueOf(gt.get(0)).state, call, genotype.toString());
}
final String[] SQ = genotype.getAnyAttribute(XHMMSegmentGenotyper.SOME_QUALITY_KEY).toString().split(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR);
final double someQual = variantSegment.getSegment().getSomeQuality();
Assert.assertEquals(Double.parseDouble(SQ[call == CopyNumberTriState.DELETION ? 0 : 1]), someQual, XHMMSegmentGenotyper.PHRED_SCORE_PRECISION, variantSegment.getSampleName() + " => " + genotype.toString());
final String[] LQ = genotype.getAnyAttribute(XHMMSegmentGenotyper.START_QUALITY_KEY).toString().split(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR);
final double startQuality = variantSegment.getSegment().getStartQuality();
Assert.assertEquals(Double.parseDouble(LQ[call == CopyNumberTriState.DELETION ? 0 : 1]), startQuality, XHMMSegmentGenotyper.PHRED_SCORE_PRECISION, variantSegment.getSampleName() + " => " + genotype.toString());
final String[] RQ = genotype.getAnyAttribute(XHMMSegmentGenotyper.END_QUALITY_KEY).toString().split(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR);
final double endQuality = variantSegment.getSegment().getEndQuality();
Assert.assertEquals(Double.parseDouble(RQ[call == CopyNumberTriState.DELETION ? 0 : 1]), endQuality, XHMMSegmentGenotyper.PHRED_SCORE_PRECISION, variantSegment.getSampleName() + " => " + genotype.toString());
// Check the PL.
final int[] PL = genotype.getPL();
final int observedGQFromPL = Math.min(XHMMSegmentGenotyper.MAX_GQ, PL[CopyNumberTriStateAllele.REF.index()] - PL[CopyNumberTriStateAllele.valueOf(call).index()]);
final double expectedCallPL = GATKProtectedMathUtils.roundPhred(QualityUtils.phredScaleErrorRate(QualityUtils.qualToProb(variantSegment.getSegment().getExactQuality())), HMMPostProcessor.PHRED_SCORE_PRECISION);
final double expectedRefPL = GATKProtectedMathUtils.roundPhred(QualityUtils.phredScaleCorrectRate(QualityUtils.qualToProb(variantSegment.getSegment().getEventQuality())), HMMPostProcessor.PHRED_SCORE_PRECISION);
final int expectedGQFromPL = Math.min(XHMMSegmentGenotyper.MAX_GQ, (int) Math.round(expectedRefPL - expectedCallPL));
Assert.assertTrue(Math.abs(observedGQFromPL - expectedGQFromPL) <= 1, genotype.toString() + " " + variantSegment.getSegment().getEventQuality());
}
}
use of htsjdk.variant.variantcontext.Allele in project gatk by broadinstitute.
the class EventMap method makeBlock.
/**
* Create a block substitution out of two variant contexts that start at the same position
*
* vc1 can be SNP, and vc2 can then be either a insertion or deletion.
* If vc1 is an indel, then vc2 must be the opposite type (vc1 deletion => vc2 must be an insertion)
*
* @param vc1 the first variant context we want to merge
* @param vc2 the second
* @return a block substitution that represents the composite substitution implied by vc1 and vc2
*/
protected VariantContext makeBlock(final VariantContext vc1, final VariantContext vc2) {
Utils.validateArg(vc1.getStart() == vc2.getStart(), () -> "vc1 and 2 must have the same start but got " + vc1 + " and " + vc2);
Utils.validateArg(vc1.isBiallelic(), "vc1 must be biallelic");
if (!vc1.isSNP()) {
Utils.validateArg((vc1.isSimpleDeletion() && vc2.isSimpleInsertion()) || (vc1.isSimpleInsertion() && vc2.isSimpleDeletion()), () -> "Can only merge single insertion with deletion (or vice versa) but got " + vc1 + " merging with " + vc2);
} else {
Utils.validateArg(!vc2.isSNP(), () -> "vc1 is " + vc1 + " but vc2 is a SNP, which implies there's been some terrible bug in the cigar " + vc2);
}
final Allele ref, alt;
final VariantContextBuilder b = new VariantContextBuilder(vc1);
if (vc1.isSNP()) {
// we have to repair the first base, so SNP case is special cased
if (vc1.getReference().equals(vc2.getReference())) {
// we've got an insertion, so we just update the alt to have the prev alt
ref = vc1.getReference();
alt = Allele.create(vc1.getAlternateAllele(0).getDisplayString() + vc2.getAlternateAllele(0).getDisplayString().substring(1), false);
} else {
// we're dealing with a deletion, so we patch the ref
ref = vc2.getReference();
alt = vc1.getAlternateAllele(0);
b.stop(vc2.getEnd());
}
} else {
final VariantContext insertion = vc1.isSimpleInsertion() ? vc1 : vc2;
final VariantContext deletion = vc1.isSimpleInsertion() ? vc2 : vc1;
ref = deletion.getReference();
alt = insertion.getAlternateAllele(0);
b.stop(deletion.getEnd());
}
return b.alleles(Arrays.asList(ref, alt)).make();
}
use of htsjdk.variant.variantcontext.Allele in project gatk by broadinstitute.
the class SelectVariants method subsetRecord.
/**
* Helper method to subset a VC record, modifying some metadata stored in the INFO field (i.e. AN, AC, AF).
*
* @param vc the VariantContext record to subset
* @param preserveAlleles should we trim constant sequence from the beginning and/or end of all alleles, or preserve it?
* @param removeUnusedAlternates removes alternate alleles with AC=0
* @return the subsetted VariantContext
*/
private VariantContext subsetRecord(final VariantContext vc, final boolean preserveAlleles, final boolean removeUnusedAlternates) {
//subContextFromSamples() always decodes the vc, which is a fairly expensive operation. Avoid if possible
if (noSamplesSpecified && !removeUnusedAlternates) {
return vc;
}
// strip out the alternate alleles that aren't being used
final VariantContext sub = vc.subContextFromSamples(samples, removeUnusedAlternates);
// If no subsetting happened, exit now
if (sub.getNSamples() == vc.getNSamples() && sub.getNAlleles() == vc.getNAlleles()) {
return vc;
}
// fix the PL and AD values if sub has fewer alleles than original vc and remove a fraction of the genotypes if needed
final GenotypesContext oldGs = sub.getGenotypes();
GenotypesContext newGC = sub.getNAlleles() == vc.getNAlleles() ? oldGs : AlleleSubsettingUtils.subsetAlleles(oldGs, 0, vc.getAlleles(), sub.getAlleles(), GenotypeAssignmentMethod.DO_NOT_ASSIGN_GENOTYPES, vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0));
if (fractionGenotypes > 0) {
final List<Genotype> genotypes = newGC.stream().map(genotype -> randomGenotypes.nextDouble() > fractionGenotypes ? genotype : new GenotypeBuilder(genotype).alleles(getNoCallAlleles(genotype.getPloidy())).noGQ().make()).collect(Collectors.toList());
newGC = GenotypesContext.create(new ArrayList<>(genotypes));
}
// since the VC has been subset (either by sample or allele), we need to strip out the MLE tags
final VariantContextBuilder builder = new VariantContextBuilder(sub);
builder.rmAttributes(Arrays.asList(GATKVCFConstants.MLE_ALLELE_COUNT_KEY, GATKVCFConstants.MLE_ALLELE_FREQUENCY_KEY));
builder.genotypes(newGC);
addAnnotations(builder, vc, sub.getSampleNames());
final VariantContext subset = builder.make();
return preserveAlleles ? subset : GATKVariantContextUtils.trimAlleles(subset, true, true);
}
use of htsjdk.variant.variantcontext.Allele in project gatk by broadinstitute.
the class ApplyVQSR method doAlleleSpecificFiltering.
/**
* Calculate the allele-specific filter status of vc
* @param vc
* @param recals
* @param builder is modified by adding attributes
* @return a String with the filter status for this site
*/
private String doAlleleSpecificFiltering(final VariantContext vc, final List<VariantContext> recals, final VariantContextBuilder builder) {
double bestLod = VariantRecalibratorEngine.MIN_ACCEPTABLE_LOD_SCORE;
final List<String> culpritStrings = new ArrayList<>();
final List<String> lodStrings = new ArrayList<>();
final List<String> AS_filterStrings = new ArrayList<>();
String[] prevCulpritList = null;
String[] prevLodList = null;
String[] prevASfiltersList = null;
//get VQSR annotations from previous run of ApplyRecalibration, if applicable
if (foundINDELTranches || foundSNPTranches) {
final String prevCulprits = vc.getAttributeAsString(GATKVCFConstants.AS_CULPRIT_KEY, "");
prevCulpritList = prevCulprits.isEmpty() ? new String[0] : prevCulprits.split(listPrintSeparator);
final String prevLodString = vc.getAttributeAsString(GATKVCFConstants.AS_VQS_LOD_KEY, "");
prevLodList = prevLodString.isEmpty() ? new String[0] : prevLodString.split(listPrintSeparator);
final String prevASfilters = vc.getAttributeAsString(GATKVCFConstants.AS_FILTER_STATUS_KEY, "");
prevASfiltersList = prevASfilters.isEmpty() ? new String[0] : prevASfilters.split(listPrintSeparator);
}
//for each allele in the current VariantContext
for (int altIndex = 0; altIndex < vc.getNAlleles() - 1; altIndex++) {
final Allele allele = vc.getAlternateAllele(altIndex);
//if the current allele is not part of this recalibration mode, add its annotations to the list and go to the next allele
if (!VariantDataManager.checkVariationClass(vc, allele, MODE)) {
updateAnnotationsWithoutRecalibrating(altIndex, prevCulpritList, prevLodList, prevASfiltersList, culpritStrings, lodStrings, AS_filterStrings);
continue;
}
//if the current allele does need to have recalibration applied...
//initialize allele-specific VQSR annotation data with values for spanning deletion
String alleleLodString = emptyFloatValue;
String alleleFilterString = emptyStringValue;
String alleleCulpritString = emptyStringValue;
//if it's not a spanning deletion, replace those allele strings with the real values
if (!GATKVCFConstants.isSpanningDeletion(allele)) {
VariantContext recalDatum = getMatchingRecalVC(vc, recals, allele);
if (recalDatum == null) {
throw new UserException("Encountered input allele which isn't found in the input recal file. Please make sure VariantRecalibrator and ApplyRecalibration were run on the same set of input variants with flag -AS. First seen at: " + vc);
}
//compare VQSLODs for all alleles in the current mode for filtering later
final double lod = recalDatum.getAttributeAsDouble(GATKVCFConstants.VQS_LOD_KEY, VariantRecalibratorEngine.MIN_ACCEPTABLE_LOD_SCORE);
if (lod > bestLod)
bestLod = lod;
alleleLodString = String.format("%.4f", lod);
alleleFilterString = generateFilterString(lod);
alleleCulpritString = recalDatum.getAttributeAsString(GATKVCFConstants.CULPRIT_KEY, ".");
if (recalDatum != null) {
if (recalDatum.hasAttribute(GATKVCFConstants.POSITIVE_LABEL_KEY))
builder.attribute(GATKVCFConstants.POSITIVE_LABEL_KEY, true);
if (recalDatum.hasAttribute(GATKVCFConstants.NEGATIVE_LABEL_KEY))
builder.attribute(GATKVCFConstants.NEGATIVE_LABEL_KEY, true);
}
}
//append per-allele VQSR annotations
lodStrings.add(alleleLodString);
AS_filterStrings.add(alleleFilterString);
culpritStrings.add(alleleCulpritString);
}
// Annotate the new record with its VQSLOD, AS_FilterStatus, and the worst performing annotation
if (!AS_filterStrings.isEmpty())
builder.attribute(GATKVCFConstants.AS_FILTER_STATUS_KEY, AnnotationUtils.encodeStringList(AS_filterStrings));
if (!lodStrings.isEmpty())
builder.attribute(GATKVCFConstants.AS_VQS_LOD_KEY, AnnotationUtils.encodeStringList(lodStrings));
if (!culpritStrings.isEmpty())
builder.attribute(GATKVCFConstants.AS_CULPRIT_KEY, AnnotationUtils.encodeStringList(culpritStrings));
return generateFilterStringFromAlleles(vc, bestLod);
}
Aggregations