use of htsjdk.variant.variantcontext.VariantContext in project gatk by broadinstitute.
the class SVVCFWriter method writeVCF.
/**
* FASTA and Broadcast references are both required because 2bit Broadcast references currently order their
* sequence dictionaries in a scrambled order, see https://github.com/broadinstitute/gatk/issues/2037.
*/
public static void writeVCF(final PipelineOptions pipelineOptions, final String vcfFileName, final String fastaReference, final JavaRDD<VariantContext> variantContexts, final Logger logger) {
final SAMSequenceDictionary referenceSequenceDictionary = new ReferenceMultiSource(pipelineOptions, fastaReference, ReferenceWindowFunctions.IDENTITY_FUNCTION).getReferenceSequenceDictionary(null);
final List<VariantContext> sortedVariantsList = sortVariantsByCoordinate(variantContexts.collect(), referenceSequenceDictionary);
logNumOfVarByTypes(sortedVariantsList, logger);
writeVariants(pipelineOptions, vcfFileName, sortedVariantsList, referenceSequenceDictionary);
}
use of htsjdk.variant.variantcontext.VariantContext in project gatk by broadinstitute.
the class GenotypingEngineUnitTest method init.
@BeforeTest
public void init() {
genotypingEngine = getGenotypingEngine();
final int deletionSize = refAllele.length() - altT.length();
final int start = 1;
final VariantContext deletionVC = new VariantContextBuilder("testDeletion", "1", start, start + deletionSize, allelesDel).make();
genotypingEngine.recordDeletion(deletionSize, deletionVC);
}
use of htsjdk.variant.variantcontext.VariantContext in project gatk by broadinstitute.
the class ApplyVQSRIntegrationTest method testApplyRecalibrationSnpAndIndelTogetherExcludeFiltered.
@Test
public void testApplyRecalibrationSnpAndIndelTogetherExcludeFiltered() throws Exception {
ArgumentsBuilder args = new ArgumentsBuilder();
File tempOut = createTempFile("testApplyRecalibrationSnpAndIndelTogetherExcludeFiltered", ".vcf");
args.add("--variant");
args.add(new File(getToolTestDataDir() + "VQSR.mixedTest.input.vcf"));
args.add("-L");
args.add("20:1000100-1000500");
args.add("-mode");
args.add("BOTH");
args.add("--excludeFiltered");
args.add("-ts_filter_level");
args.add("90.0");
args.add("-tranchesFile ");
args.add(getToolTestDataDir() + "VQSR.mixedTest.tranches");
args.add("-recalFile");
args.add(getToolTestDataDir() + "VQSR.mixedTest.recal.vcf");
args.addOutput(tempOut);
runCommandLine(args);
try (FeatureDataSource<VariantContext> featureSource = new FeatureDataSource<>(tempOut)) {
for (VariantContext feature : featureSource) {
// there should only be unfiltered records in the output VCF file
Assert.assertTrue(feature.isNotFiltered());
}
}
}
use of htsjdk.variant.variantcontext.VariantContext in project gatk-protected by broadinstitute.
the class EvaluateCopyNumberTriStateCalls method processInterval.
/**
* Processes a cluster of truth and called variants that may overlap over a genome region.
* <p>
* It returns the list of {@link VariantEvaluationContext} instances to be output for the given interval. These
* are already sorted by coordinates.
* </p>
* @param truthReader reader to the truth variants.
* @param callsReader reader to the called variants.
* @param interval the interval to analyze.
* @return never {@code null}.
*/
private List<VariantEvaluationContext> processInterval(final VCFFileReader truthReader, final VCFFileReader callsReader, final SimpleInterval interval, final TargetCollection<Target> targets) {
final List<VariantContext> truthVariants = variantQueryToList(truthReader, interval);
final List<VariantContext> callsVariants = variantQueryToList(callsReader, interval);
final List<VariantEvaluationContext> evaluatedVariants = new ArrayList<>(truthVariants.size() + callsVariants.size());
for (final VariantContext truth : truthVariants) {
// skip truth that does not overlap a single target.
if (targets.targetCount(truth) == 0) {
continue;
}
final List<VariantContext> overlappingCalls = callsVariants.stream().filter(vc -> IntervalUtils.overlaps(truth, vc)).collect(Collectors.toList());
evaluatedVariants.add(composeTruthOverlappingVariantContext(truth, overlappingCalls, targets));
}
for (final VariantContext call : callsVariants) {
// skip call that does not overlap a single target (the user might want to call on a smaller set of targets)
if (targets.targetCount(call) == 0) {
continue;
}
final List<VariantContext> overlappingTruth = truthVariants.stream().filter(vc -> IntervalUtils.overlaps(call, vc)).collect(Collectors.toList());
if (overlappingTruth.isEmpty()) {
evaluatedVariants.add(composeNonTruthOverlappingVariantContext(call, targets));
}
}
return evaluatedVariants.stream().sorted(IntervalUtils.LEXICOGRAPHICAL_ORDER_COMPARATOR).map(this::applyVariantContextFilters).collect(Collectors.toList());
}
use of htsjdk.variant.variantcontext.VariantContext in project gatk-protected by broadinstitute.
the class EvaluateCopyNumberTriStateCalls method buildAndAnnotateTruthOverlappingGenotype.
private Genotype buildAndAnnotateTruthOverlappingGenotype(final String sample, final TargetCollection<Target> targets, final Genotype truthGenotype, final int truthCopyNumber, final CopyNumberTriStateAllele truthAllele, final List<Pair<VariantContext, Genotype>> calls) {
final Set<CopyNumberTriStateAllele> calledAlleles = calls.stream().map(pair -> CopyNumberTriStateAllele.valueOf(pair.getRight().getAllele(0))).collect(Collectors.toSet());
final Allele calledAllele = calledAlleles.size() == 1 ? calledAlleles.iterator().next() : Allele.NO_CALL;
final GenotypeBuilder builder = new GenotypeBuilder(sample);
// Set the call allele.
builder.alleles(Collections.singletonList(calledAllele));
// Set the truth allele.
builder.attribute(VariantEvaluationContext.TRUTH_GENOTYPE_KEY, CopyNumberTriStateAllele.ALL_ALLELES.indexOf(truthAllele));
// Annotate the genotype with the number of calls.
builder.attribute(VariantEvaluationContext.CALLED_SEGMENTS_COUNT_KEY, calls.size());
// When there is more than one qualified type of event we indicate how many.
builder.attribute(VariantEvaluationContext.CALLED_ALLELE_COUNTS_KEY, CopyNumberTriStateAllele.ALL_ALLELES.stream().mapToInt(allele -> (int) calls.stream().filter(pair -> pair.getRight().getAllele(0).equals(allele, true)).count()).toArray());
// Calculate the length in targets of the call as the sum across all calls.
builder.attribute(VariantEvaluationContext.CALLED_TARGET_COUNT_KEY, calls.stream().mapToInt(pair -> getTargetCount(targets, pair.getLeft(), pair.getRight())).sum());
// Calculate call quality-- if there is more than one overlapping call we take the maximum qual one.
builder.attribute(VariantEvaluationContext.CALL_QUALITY_KEY, calls.stream().mapToDouble(pair -> GATKProtectedVariantContextUtils.calculateGenotypeQualityFromPLs(pair.getRight())).max().orElse(0.0));
// Calculate the truth copy fraction.
builder.attribute(VariantEvaluationContext.TRUTH_COPY_FRACTION_KEY, truthGenotype.getExtendedAttribute(GS_COPY_NUMBER_FRACTION_KEY));
// Calculate the truth call quality.
final double truthQuality = calculateTruthQuality(truthGenotype, truthCopyNumber);
builder.attribute(VariantEvaluationContext.TRUTH_QUALITY_KEY, truthQuality);
// Set genotype filters:
final boolean truthPassQualityMinimum = truthQuality >= filterArguments.minimumTruthSegmentQuality;
builder.filter(truthPassQualityMinimum ? EvaluationFilter.PASS : EvaluationFilter.LowQuality.acronym);
// Calculate the evaluation class (TP, FN, etc.). Only if there is actually either a truth or a call that is not ref.
if (calledAlleles.contains(CopyNumberTriStateAllele.DEL) || calledAlleles.contains(CopyNumberTriStateAllele.DUP) || truthAllele != CopyNumberTriStateAllele.REF) {
final EvaluationClass evaluationClass;
if (calledAlleles.isEmpty() || (calledAlleles.size() == 1 && calledAlleles.contains(CopyNumberTriStateAllele.REF))) {
evaluationClass = EvaluationClass.FALSE_NEGATIVE;
} else if (calledAlleles.size() == 1) {
evaluationClass = calledAlleles.contains(truthAllele) ? EvaluationClass.TRUE_POSITIVE : truthAllele == CopyNumberTriStateAllele.REF ? EvaluationClass.FALSE_POSITIVE : /* else */
EvaluationClass.DISCORDANT_POSITIVE;
} else {
evaluationClass = truthAllele == CopyNumberTriStateAllele.REF ? EvaluationClass.FALSE_POSITIVE : EvaluationClass.MIXED_POSITIVE;
}
builder.attribute(VariantEvaluationContext.EVALUATION_CLASS_KEY, evaluationClass.acronym);
}
return builder.make();
}
Aggregations