use of org.broadinstitute.hellbender.utils.genotyper.SampleList in project gatk by broadinstitute.
the class OrientationBiasFilterer method createVCFHeader.
/** Ingest the current VCF header and update it with the information necessary for the Orientation Bias filter to run.
*
* @param inputVCFHeader original header. Never {@code null}
* @param commandLine The command line used to run this tool.
* @param transitions Never {@code null}
* @return updated VCF Header
*/
public static VCFHeader createVCFHeader(final VCFHeader inputVCFHeader, final String commandLine, final List<String> transitions) {
Utils.nonNull(inputVCFHeader);
Utils.nonNull(transitions);
// Setup header for output file
final Set<VCFHeaderLine> headerLines = new LinkedHashSet<>(inputVCFHeader.getMetaDataInInputOrder());
headerLines.add(new VCFFormatHeaderLine(OrientationBiasFilterConstants.PRE_ADAPTER_METRIC_FIELD_NAME, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Measure (across entire bam file) of orientation bias for a given REF/ALT error."));
headerLines.add(new VCFFormatHeaderLine(OrientationBiasFilterConstants.PRE_ADAPTER_METRIC_RC_FIELD_NAME, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Measure (across entire bam file) of orientation bias for the complement of a given REF/ALT error."));
headerLines.add(new VCFFormatHeaderLine(OrientationBiasFilterConstants.P_ARTIFACT_FIELD_NAME, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Orientation bias p value for the given REF/ALT artifact or its complement."));
headerLines.add(new VCFFormatHeaderLine(OrientationBiasFilterConstants.FOB, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Fraction of alt reads indicating orientation bias error (taking into account artifact mode complement)."));
headerLines.add(new VCFFormatHeaderLine(OrientationBiasFilterConstants.IS_ORIENTATION_BIAS_ARTIFACT_MODE, VCFHeaderLineCount.A, VCFHeaderLineType.String, "Whether the variant can be one of the given REF/ALT artifact modes."));
headerLines.add(new VCFFormatHeaderLine(OrientationBiasFilterConstants.IS_ORIENTATION_BIAS_RC_ARTIFACT_MODE, VCFHeaderLineCount.A, VCFHeaderLineType.String, "Whether the variant can be one of the given REF/ALT artifact mode complements."));
headerLines.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_FILTER_KEY, 1, VCFHeaderLineType.String, "Genotype-level filter"));
headerLines.add(new VCFFilterHeaderLine(OrientationBiasFilterConstants.IS_ORIENTATION_BIAS_CUT, "Orientation bias (in one of the specified artifact mode(s) or complement) seen in one or more samples."));
headerLines.add(new VCFSimpleHeaderLine("orientation_bias_artifact_modes", String.join("|", transitions), "The artifact modes that were used for orientation bias artifact filtering for this VCF"));
headerLines.add(new VCFHeaderLine("command", commandLine));
final SampleList samples = new IndexedSampleList(inputVCFHeader.getGenotypeSamples());
final Set<String> sampleNameSet = samples.asSetOfSamples();
return new VCFHeader(headerLines, sampleNameSet);
}
use of org.broadinstitute.hellbender.utils.genotyper.SampleList in project gatk by broadinstitute.
the class PairHMMLikelihoodCalculationEngineUnitTest method testComputeLikelihoods.
@Test
public void testComputeLikelihoods() {
final LikelihoodEngineArgumentCollection LEAC = new LikelihoodEngineArgumentCollection();
PairHMMLikelihoodCalculationEngine.writeLikelihoodsToFile = true;
final ReadLikelihoodCalculationEngine lce = new PairHMMLikelihoodCalculationEngine((byte) SAMUtils.MAX_PHRED_SCORE, new PairHMMNativeArguments(), PairHMM.Implementation.LOGLESS_CACHING, MathUtils.logToLog10(QualityUtils.qualToErrorProbLog10(LEAC.phredScaledGlobalReadMismappingRate)), PairHMMLikelihoodCalculationEngine.PCRErrorModel.CONSERVATIVE);
final Map<String, List<GATKRead>> perSampleReadList = new HashMap<>();
final int n = 10;
final GATKRead read1 = ArtificialReadUtils.createArtificialRead(TextCigarCodec.decode(n + "M"));
read1.setMappingQuality(60);
final String sample1 = "sample1";
perSampleReadList.put(sample1, Arrays.asList(read1));
final SampleList samples = new IndexedSampleList(sample1);
final AssemblyResultSet assemblyResultSet = new AssemblyResultSet();
final byte[] bases = Strings.repeat("A", n + 1).getBytes();
final Haplotype hap1 = new Haplotype(bases, true);
hap1.setGenomeLocation(read1);
assemblyResultSet.add(hap1);
final byte[] basesModified = bases;
//different bases
basesModified[5] = 'C';
final Haplotype hap2 = new Haplotype(basesModified, false);
//use same loc
hap2.setGenomeLocation(read1);
assemblyResultSet.add(hap2);
final ReadLikelihoods<Haplotype> likes = lce.computeReadLikelihoods(assemblyResultSet, samples, perSampleReadList);
final LikelihoodMatrix<Haplotype> mtx = likes.sampleMatrix(0);
Assert.assertEquals(mtx.numberOfAlleles(), 2);
Assert.assertEquals(mtx.numberOfReads(), 1);
final double v1 = mtx.get(0, 0);
final double v2 = mtx.get(1, 0);
Assert.assertTrue(v1 > v2, "matching haplotype should have a higher likelihood");
lce.close();
new File(PairHMMLikelihoodCalculationEngine.LIKELIHOODS_FILENAME).delete();
}
use of org.broadinstitute.hellbender.utils.genotyper.SampleList in project gatk by broadinstitute.
the class RandomLikelihoodCalculationEngineUnitTest method testComputeLikelihoods.
@Test
public void testComputeLikelihoods() {
final ReadLikelihoodCalculationEngine lce = new RandomLikelihoodCalculationEngine();
final Map<String, List<GATKRead>> perSampleReadList = new HashMap<>();
final int n = 10;
final GATKRead read1 = ArtificialReadUtils.createArtificialRead(TextCigarCodec.decode(n + "M"));
read1.setMappingQuality(60);
final String sample1 = "sample1";
perSampleReadList.put(sample1, Arrays.asList(read1));
final SampleList samples = new IndexedSampleList(sample1);
final AssemblyResultSet assemblyResultSet = new AssemblyResultSet();
final byte[] bases = Strings.repeat("A", n + 1).getBytes();
final Haplotype hap1 = new Haplotype(bases, true);
hap1.setGenomeLocation(read1);
assemblyResultSet.add(hap1);
final byte[] basesModified = bases;
//different bases
basesModified[5] = 'C';
final Haplotype hap2 = new Haplotype(basesModified, false);
//use same loc
hap2.setGenomeLocation(read1);
assemblyResultSet.add(hap2);
final ReadLikelihoods<Haplotype> likes = lce.computeReadLikelihoods(assemblyResultSet, samples, perSampleReadList);
final LikelihoodMatrix<Haplotype> mtx = likes.sampleMatrix(0);
Assert.assertEquals(mtx.numberOfAlleles(), 2);
Assert.assertEquals(mtx.numberOfReads(), 1);
final double v1 = mtx.get(0, 0);
final double v2 = mtx.get(1, 0);
Assert.assertTrue(v1 < 0);
Assert.assertTrue(v2 < 0);
lce.close();
}
use of org.broadinstitute.hellbender.utils.genotyper.SampleList in project gatk by broadinstitute.
the class GenotypeGVCFs method onTraversalStart.
@Override
public void onTraversalStart() {
final VCFHeader inputVCFHeader = getHeaderForVariants();
if (onlyOutputCallsStartingInIntervals) {
if (!hasIntervals()) {
throw new CommandLineException.MissingArgument("-L or -XL", "Intervals are required if --" + ONLY_OUTPUT_CALLS_STARTING_IN_INTERVALS_FULL_NAME + " was specified.");
}
}
intervals = hasIntervals() ? intervalArgumentCollection.getIntervals(getBestAvailableSequenceDictionary()) : Collections.emptyList();
//todo should this be getSampleNamesInOrder?
final SampleList samples = new IndexedSampleList(inputVCFHeader.getGenotypeSamples());
genotypingEngine = new MinimalGenotypingEngine(createUAC(), samples, new GeneralPloidyFailOverAFCalculatorProvider(genotypeArgs));
annotationEngine = VariantAnnotatorEngine.ofSelectedMinusExcluded(annotationGroupsToUse, annotationsToUse, annotationsToExclude, dbsnp.dbsnp, Collections.emptyList());
merger = new ReferenceConfidenceVariantContextMerger();
setupVCFWriter(inputVCFHeader, samples);
}
use of org.broadinstitute.hellbender.utils.genotyper.SampleList in project gatk-protected by broadinstitute.
the class GenotypeGVCFs method onTraversalStart.
@Override
public void onTraversalStart() {
final VCFHeader inputVCFHeader = getHeaderForVariants();
if (onlyOutputCallsStartingInIntervals) {
if (!hasIntervals()) {
throw new CommandLineException.MissingArgument("-L or -XL", "Intervals are required if --" + ONLY_OUTPUT_CALLS_STARTING_IN_INTERVALS_FULL_NAME + " was specified.");
}
}
intervals = hasIntervals() ? intervalArgumentCollection.getIntervals(getBestAvailableSequenceDictionary()) : Collections.emptyList();
//todo should this be getSampleNamesInOrder?
final SampleList samples = new IndexedSampleList(inputVCFHeader.getGenotypeSamples());
genotypingEngine = new MinimalGenotypingEngine(createUAC(), samples, new GeneralPloidyFailOverAFCalculatorProvider(genotypeArgs));
annotationEngine = VariantAnnotatorEngine.ofSelectedMinusExcluded(annotationGroupsToUse, annotationsToUse, annotationsToExclude, dbsnp.dbsnp, Collections.emptyList());
merger = new ReferenceConfidenceVariantContextMerger();
setupVCFWriter(inputVCFHeader, samples);
}
Aggregations