use of htsjdk.variant.variantcontext.GenotypeBuilder in project gatk-protected by broadinstitute.
the class EvaluateCopyNumberTriStateCalls method buildAndAnnotateTruthOverlappingGenotype.
private Genotype buildAndAnnotateTruthOverlappingGenotype(final String sample, final TargetCollection<Target> targets, final Genotype truthGenotype, final int truthCopyNumber, final CopyNumberTriStateAllele truthAllele, final List<Pair<VariantContext, Genotype>> calls) {
final Set<CopyNumberTriStateAllele> calledAlleles = calls.stream().map(pair -> CopyNumberTriStateAllele.valueOf(pair.getRight().getAllele(0))).collect(Collectors.toSet());
final Allele calledAllele = calledAlleles.size() == 1 ? calledAlleles.iterator().next() : Allele.NO_CALL;
final GenotypeBuilder builder = new GenotypeBuilder(sample);
// Set the call allele.
builder.alleles(Collections.singletonList(calledAllele));
// Set the truth allele.
builder.attribute(VariantEvaluationContext.TRUTH_GENOTYPE_KEY, CopyNumberTriStateAllele.ALL_ALLELES.indexOf(truthAllele));
// Annotate the genotype with the number of calls.
builder.attribute(VariantEvaluationContext.CALLED_SEGMENTS_COUNT_KEY, calls.size());
// When there is more than one qualified type of event we indicate how many.
builder.attribute(VariantEvaluationContext.CALLED_ALLELE_COUNTS_KEY, CopyNumberTriStateAllele.ALL_ALLELES.stream().mapToInt(allele -> (int) calls.stream().filter(pair -> pair.getRight().getAllele(0).equals(allele, true)).count()).toArray());
// Calculate the length in targets of the call as the sum across all calls.
builder.attribute(VariantEvaluationContext.CALLED_TARGET_COUNT_KEY, calls.stream().mapToInt(pair -> getTargetCount(targets, pair.getLeft(), pair.getRight())).sum());
// Calculate call quality-- if there is more than one overlapping call we take the maximum qual one.
builder.attribute(VariantEvaluationContext.CALL_QUALITY_KEY, calls.stream().mapToDouble(pair -> GATKProtectedVariantContextUtils.calculateGenotypeQualityFromPLs(pair.getRight())).max().orElse(0.0));
// Calculate the truth copy fraction.
builder.attribute(VariantEvaluationContext.TRUTH_COPY_FRACTION_KEY, truthGenotype.getExtendedAttribute(GS_COPY_NUMBER_FRACTION_KEY));
// Calculate the truth call quality.
final double truthQuality = calculateTruthQuality(truthGenotype, truthCopyNumber);
builder.attribute(VariantEvaluationContext.TRUTH_QUALITY_KEY, truthQuality);
// Set genotype filters:
final boolean truthPassQualityMinimum = truthQuality >= filterArguments.minimumTruthSegmentQuality;
builder.filter(truthPassQualityMinimum ? EvaluationFilter.PASS : EvaluationFilter.LowQuality.acronym);
// Calculate the evaluation class (TP, FN, etc.). Only if there is actually either a truth or a call that is not ref.
if (calledAlleles.contains(CopyNumberTriStateAllele.DEL) || calledAlleles.contains(CopyNumberTriStateAllele.DUP) || truthAllele != CopyNumberTriStateAllele.REF) {
final EvaluationClass evaluationClass;
if (calledAlleles.isEmpty() || (calledAlleles.size() == 1 && calledAlleles.contains(CopyNumberTriStateAllele.REF))) {
evaluationClass = EvaluationClass.FALSE_NEGATIVE;
} else if (calledAlleles.size() == 1) {
evaluationClass = calledAlleles.contains(truthAllele) ? EvaluationClass.TRUE_POSITIVE : truthAllele == CopyNumberTriStateAllele.REF ? EvaluationClass.FALSE_POSITIVE : /* else */
EvaluationClass.DISCORDANT_POSITIVE;
} else {
evaluationClass = truthAllele == CopyNumberTriStateAllele.REF ? EvaluationClass.FALSE_POSITIVE : EvaluationClass.MIXED_POSITIVE;
}
builder.attribute(VariantEvaluationContext.EVALUATION_CLASS_KEY, evaluationClass.acronym);
}
return builder.make();
}
use of htsjdk.variant.variantcontext.GenotypeBuilder in project gatk by broadinstitute.
the class EvaluateCopyNumberTriStateCalls method buildAndAnnotateTruthOverlappingGenotype.
private Genotype buildAndAnnotateTruthOverlappingGenotype(final String sample, final TargetCollection<Target> targets, final Genotype truthGenotype, final int truthCopyNumber, final CopyNumberTriStateAllele truthAllele, final List<Pair<VariantContext, Genotype>> calls) {
final Set<CopyNumberTriStateAllele> calledAlleles = calls.stream().map(pair -> CopyNumberTriStateAllele.valueOf(pair.getRight().getAllele(0))).collect(Collectors.toSet());
final Allele calledAllele = calledAlleles.size() == 1 ? calledAlleles.iterator().next() : Allele.NO_CALL;
final GenotypeBuilder builder = new GenotypeBuilder(sample);
// Set the call allele.
builder.alleles(Collections.singletonList(calledAllele));
// Set the truth allele.
builder.attribute(VariantEvaluationContext.TRUTH_GENOTYPE_KEY, CopyNumberTriStateAllele.ALL_ALLELES.indexOf(truthAllele));
// Annotate the genotype with the number of calls.
builder.attribute(VariantEvaluationContext.CALLED_SEGMENTS_COUNT_KEY, calls.size());
// When there is more than one qualified type of event we indicate how many.
builder.attribute(VariantEvaluationContext.CALLED_ALLELE_COUNTS_KEY, CopyNumberTriStateAllele.ALL_ALLELES.stream().mapToInt(allele -> (int) calls.stream().filter(pair -> pair.getRight().getAllele(0).equals(allele, true)).count()).toArray());
// Calculate the length in targets of the call as the sum across all calls.
builder.attribute(VariantEvaluationContext.CALLED_TARGET_COUNT_KEY, calls.stream().mapToInt(pair -> getTargetCount(targets, pair.getLeft(), pair.getRight())).sum());
// Calculate call quality-- if there is more than one overlapping call we take the maximum qual one.
builder.attribute(VariantEvaluationContext.CALL_QUALITY_KEY, calls.stream().mapToDouble(pair -> GATKProtectedVariantContextUtils.calculateGenotypeQualityFromPLs(pair.getRight())).max().orElse(0.0));
// Calculate the truth copy fraction.
builder.attribute(VariantEvaluationContext.TRUTH_COPY_FRACTION_KEY, truthGenotype.getExtendedAttribute(GS_COPY_NUMBER_FRACTION_KEY));
// Calculate the truth call quality.
final double truthQuality = calculateTruthQuality(truthGenotype, truthCopyNumber);
builder.attribute(VariantEvaluationContext.TRUTH_QUALITY_KEY, truthQuality);
// Set genotype filters:
final boolean truthPassQualityMinimum = truthQuality >= filterArguments.minimumTruthSegmentQuality;
builder.filter(truthPassQualityMinimum ? EvaluationFilter.PASS : EvaluationFilter.LowQuality.acronym);
// Calculate the evaluation class (TP, FN, etc.). Only if there is actually either a truth or a call that is not ref.
if (calledAlleles.contains(CopyNumberTriStateAllele.DEL) || calledAlleles.contains(CopyNumberTriStateAllele.DUP) || truthAllele != CopyNumberTriStateAllele.REF) {
final EvaluationClass evaluationClass;
if (calledAlleles.isEmpty() || (calledAlleles.size() == 1 && calledAlleles.contains(CopyNumberTriStateAllele.REF))) {
evaluationClass = EvaluationClass.FALSE_NEGATIVE;
} else if (calledAlleles.size() == 1) {
evaluationClass = calledAlleles.contains(truthAllele) ? EvaluationClass.TRUE_POSITIVE : truthAllele == CopyNumberTriStateAllele.REF ? EvaluationClass.FALSE_POSITIVE : /* else */
EvaluationClass.DISCORDANT_POSITIVE;
} else {
evaluationClass = truthAllele == CopyNumberTriStateAllele.REF ? EvaluationClass.FALSE_POSITIVE : EvaluationClass.MIXED_POSITIVE;
}
builder.attribute(VariantEvaluationContext.EVALUATION_CLASS_KEY, evaluationClass.acronym);
}
return builder.make();
}
use of htsjdk.variant.variantcontext.GenotypeBuilder in project gatk by broadinstitute.
the class EvaluateCopyNumberTriStateCalls method composeNonTruthOverlappingGenotype.
private Genotype composeNonTruthOverlappingGenotype(final VariantContext enclosingContext, final Genotype genotype) {
final GenotypeBuilder builder = new GenotypeBuilder(genotype.getSampleName());
if (genotype.isCalled()) {
GATKProtectedVariantContextUtils.setGenotypeQualityFromPLs(builder, genotype);
final int[] PL = genotype.getPL();
final int callAlleleIndex = GATKProtectedMathUtils.minIndex(PL);
final double quality = callQuality(genotype);
builder.alleles(Collections.singletonList(enclosingContext.getAlleles().get(callAlleleIndex)));
builder.attribute(VariantEvaluationContext.CALL_QUALITY_KEY, quality);
final boolean discovered = XHMMSegmentGenotyper.DISCOVERY_TRUE.equals(GATKProtectedVariantContextUtils.getAttributeAsString(genotype, XHMMSegmentGenotyper.DISCOVERY_KEY, XHMMSegmentGenotyper.DISCOVERY_FALSE));
if (callAlleleIndex != 0 && discovered) {
builder.attribute(VariantEvaluationContext.EVALUATION_CLASS_KEY, EvaluationClass.UNKNOWN_POSITIVE.acronym);
}
if (quality < filterArguments.minimumCalledSegmentQuality) {
builder.filter(EvaluationFilter.LowQuality.acronym);
} else {
builder.filter(EvaluationFilter.PASS);
}
} else {
/* assume it is REF */
/* TODO this is a hack to make Andrey's CODEX vcf work; and in general, VCFs that only include discovered
* variants and NO_CALL (".") on other samples. The idea is to force the evaluation tool to take it call
* as REF on all other samples. Otherwise, the effective allele frequency of the variant will be erroneously
* high and will be filtered. */
builder.alleles(Collections.singletonList(CopyNumberTriStateAllele.REF));
builder.attribute(VariantEvaluationContext.CALL_QUALITY_KEY, 100000);
builder.filter(EvaluationFilter.PASS);
}
return builder.make();
}
use of htsjdk.variant.variantcontext.GenotypeBuilder in project gatk by broadinstitute.
the class FilterApplyingVariantIterator method next.
/**
* Provides the next record from the underlying iterator after applying filter strings generated
* by the set of filters in use by the iterator.
*/
@Override
public VariantContext next() {
final VariantContext ctx = this.iterator.next();
final Set<String> filterStrings = new HashSet<>();
// Collect variant level filters
for (final VariantFilter filter : this.filters) {
final String val = filter.filter(ctx);
if (val != null)
filterStrings.add(val);
}
// Collect genotype level filters in a Map of Sample -> List<filter string>
final ListMap<String, String> gtFilterStrings = new ListMap<>();
final Set<String> variantSamples = new HashSet<>();
for (final Genotype gt : ctx.getGenotypes()) {
if (gt.isCalled() && !gt.isHomRef())
variantSamples.add(gt.getSampleName());
for (final GenotypeFilter filter : gtFilters) {
final String filterString = filter.filter(ctx, gt);
if (filterString != null)
gtFilterStrings.add(gt.getSampleName(), filterString);
}
}
// If all genotypes are filtered apply a site level filter
if (gtFilterStrings.keySet().containsAll(variantSamples)) {
filterStrings.add(ALL_GTS_FILTERED);
}
// Make a builder and set the site level filter appropriately
final VariantContextBuilder builder = new VariantContextBuilder(ctx);
if (filterStrings.isEmpty()) {
builder.passFilters();
} else {
builder.filters(filterStrings);
}
// Apply filters to the necessary genotypes
builder.noGenotypes();
final List<Genotype> newGenotypes = new ArrayList<>(ctx.getNSamples());
for (final Genotype gt : ctx.getGenotypes()) {
final GenotypeBuilder gtBuilder = new GenotypeBuilder(gt);
final List<String> filters = gtFilterStrings.get(gt.getSampleName());
if (filters == null || filters.isEmpty()) {
gtBuilder.filter(PASS_FILTER);
} else {
gtBuilder.filters(filters);
}
newGenotypes.add(gtBuilder.make());
}
builder.genotypes(newGenotypes);
return builder.make();
}
use of htsjdk.variant.variantcontext.GenotypeBuilder in project gatk by broadinstitute.
the class HomRefBlock method createHomRefGenotype.
// create a single Genotype with GQ and DP annotations
private Genotype createHomRefGenotype(String sampleName) {
final GenotypeBuilder gb = new GenotypeBuilder(sampleName, Collections.nCopies(getPloidy(), getRef()));
// clear all attributes
gb.noAD().noPL().noAttributes();
final int[] minPLs = getMinPLs();
gb.PL(minPLs);
gb.GQ(GATKVariantContextUtils.calculateGQFromPLs(minPLs));
gb.DP(getMedianDP());
gb.attribute(GATKVCFConstants.MIN_DP_FORMAT_KEY, getMinDP());
return gb.make();
}
Aggregations