use of htsjdk.variant.vcf.VCFFileReader in project gatk by broadinstitute.
the class ConvertGSVariantsToSegmentsIntegrationTest method composeExpectedSegments.
private List<HiddenStateSegmentRecord<CopyNumberTriState, Target>> composeExpectedSegments(final File vcf, final TargetCollection<Target> targets) throws IOException {
final VCFFileReader reader = new VCFFileReader(vcf, false);
final List<HiddenStateSegmentRecord<CopyNumberTriState, Target>> result = new ArrayList<>();
reader.iterator().forEachRemaining(vc -> {
final int targetCount = targets.indexRange(vc).size();
for (final Genotype genotype : vc.getGenotypes()) {
final int cn = Integer.parseInt(genotype.getExtendedAttribute("CN").toString());
final double[] cnp = Stream.of(genotype.getExtendedAttribute("CNP").toString().replaceAll("\\[\\]", "").split(",")).mapToDouble(Double::parseDouble).toArray();
final double cnpSum = MathUtils.approximateLog10SumLog10(cnp);
final CopyNumberTriState call = expectedCall(cn);
final double exactLog10Prob = expectedExactLog10(call, cnp);
final HiddenStateSegment<CopyNumberTriState, Target> expectedSegment = new HiddenStateSegment<>(new SimpleInterval(vc), targetCount, Double.parseDouble(genotype.getExtendedAttribute("CNF").toString()), 0.000, call, -10.0 * exactLog10Prob, Double.NaN, Double.NaN, Double.NaN, -10.0 * (cnp[ConvertGSVariantsToSegments.NEUTRAL_COPY_NUMBER_DEFAULT] - cnpSum));
result.add(new HiddenStateSegmentRecord<>(genotype.getSampleName(), expectedSegment));
}
});
return result;
}
use of htsjdk.variant.vcf.VCFFileReader in project gatk by broadinstitute.
the class GATKToolUnitTest method testFeaturesHeader.
@Test
public void testFeaturesHeader() throws Exception {
final TestGATKToolWithFeatures tool = new TestGATKToolWithFeatures();
final CommandLineParser clp = new CommandLineArgumentParser(tool);
final File vcfFile = new File(publicTestDir + "org/broadinstitute/hellbender/engine/feature_data_source_test_with_bigHeader.vcf");
final String[] args = { "--mask", vcfFile.getCanonicalPath() };
clp.parseArguments(System.out, args);
tool.onStartup();
final Object headerForFeatures = tool.getHeaderForFeatures(tool.mask);
Assert.assertTrue(headerForFeatures instanceof VCFHeader);
final VCFHeader vcfheaderForFeatures = (VCFHeader) headerForFeatures;
try (final VCFFileReader vcfReader = new VCFFileReader(vcfFile, false)) {
//read the file directly and compare headers
final VCFHeader vcfFileHeader = vcfReader.getFileHeader();
Assert.assertEquals(vcfheaderForFeatures.getGenotypeSamples(), vcfFileHeader.getGenotypeSamples());
Assert.assertEquals(vcfheaderForFeatures.getInfoHeaderLines(), vcfFileHeader.getInfoHeaderLines());
Assert.assertEquals(vcfheaderForFeatures.getFormatHeaderLines(), vcfFileHeader.getFormatHeaderLines());
Assert.assertEquals(vcfheaderForFeatures.getFilterLines(), vcfFileHeader.getFilterLines());
Assert.assertEquals(vcfheaderForFeatures.getContigLines(), vcfFileHeader.getContigLines());
Assert.assertEquals(vcfheaderForFeatures.getOtherHeaderLines(), vcfFileHeader.getOtherHeaderLines());
}
tool.doWork();
tool.onShutdown();
}
use of htsjdk.variant.vcf.VCFFileReader in project gatk by broadinstitute.
the class GATKToolUnitTest method testBestSequenceDictionary_fromVariants.
@Test
public void testBestSequenceDictionary_fromVariants() throws Exception {
final GATKTool tool = new TestGATKToolWithFeatures();
final CommandLineParser clp = new CommandLineArgumentParser(tool);
final File vcfFile = new File(publicTestDir + "org/broadinstitute/hellbender/engine/feature_data_source_test_withSequenceDict.vcf");
final String[] args = { "--mask", vcfFile.getCanonicalPath() };
clp.parseArguments(System.out, args);
tool.onStartup();
//read the dict back in and compare to vcf dict
final SAMSequenceDictionary toolDict = tool.getBestAvailableSequenceDictionary();
try (final VCFFileReader reader = new VCFFileReader(vcfFile)) {
final SAMSequenceDictionary vcfDict = reader.getFileHeader().getSequenceDictionary();
toolDict.assertSameDictionary(vcfDict);
vcfDict.assertSameDictionary(toolDict);
Assert.assertEquals(toolDict, vcfDict);
}
}
use of htsjdk.variant.vcf.VCFFileReader in project gatk by broadinstitute.
the class SortVcfTest method validateSortingResults.
/**
* Checks the ordering and total number of variant context entries in the specified output VCF file.
* Does NOT check explicitly that the VC genomic positions match exactly those from the inputs. We assume this behavior from other tests.
*
* @param output VCF file representing the output of SortVCF
* @param expectedVariantContextCount the total number of variant context entries from all input files that were merged/sorted
*/
private void validateSortingResults(final File output, final int expectedVariantContextCount) {
final VCFFileReader outputReader = new VCFFileReader(output, false);
final VariantContextComparator outputComparator = outputReader.getFileHeader().getVCFRecordComparator();
VariantContext last = null;
int variantContextCount = 0;
try (final CloseableIterator<VariantContext> iterator = outputReader.iterator()) {
while (iterator.hasNext()) {
final VariantContext outputContext = iterator.next();
if (last != null)
Assert.assertTrue(outputComparator.compare(last, outputContext) <= 0);
last = outputContext;
variantContextCount++;
}
}
Assert.assertEquals(variantContextCount, expectedVariantContextCount);
}
use of htsjdk.variant.vcf.VCFFileReader in project gatk by broadinstitute.
the class SplitVcfsTest method testSplit.
@Test
public void testSplit() {
final File indelOutputFile = new File(OUTPUT_DATA_PATH, "split-vcfs-test-indels-delete-me.vcf");
final File snpOutputFile = new File(OUTPUT_DATA_PATH, "split-vcfs-test-snps-delete-me.vcf");
final File input = new File(TEST_DATA_PATH, "CEUTrio-merged-indels-snps.vcf");
indelOutputFile.deleteOnExit();
snpOutputFile.deleteOnExit();
final String[] args = new String[] { "--input", input.getAbsolutePath(), "--SNP_OUTPUT", snpOutputFile.getAbsolutePath(), "--INDEL_OUTPUT", indelOutputFile.getAbsolutePath() };
runCommandLine(args);
final Queue<String> indelContigPositions = AbstractVcfMergingClpTester.loadContigPositions(indelOutputFile);
final Queue<String> snpContigPositions = AbstractVcfMergingClpTester.loadContigPositions(snpOutputFile);
final VCFFileReader reader = new VCFFileReader(input);
for (final VariantContext inputContext : reader) {
if (inputContext.isIndel())
Assert.assertEquals(AbstractVcfMergingClpTester.getContigPosition(inputContext), indelContigPositions.poll());
if (inputContext.isSNP())
Assert.assertEquals(AbstractVcfMergingClpTester.getContigPosition(inputContext), snpContigPositions.poll());
}
// We should have polled everything off the indel (snp) queues
Assert.assertEquals(indelContigPositions.size(), 0);
Assert.assertEquals(snpContigPositions.size(), 0);
}
Aggregations