use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.
the class SplitVcfs method doWork.
@Override
protected Object doWork() {
IOUtil.assertFileIsReadable(INPUT);
final ProgressLogger progress = new ProgressLogger(logger, 10000);
final VCFFileReader fileReader = new VCFFileReader(INPUT);
final VCFHeader fileHeader = fileReader.getFileHeader();
final SAMSequenceDictionary sequenceDictionary = SEQUENCE_DICTIONARY != null ? SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).getFileHeader(SEQUENCE_DICTIONARY).getSequenceDictionary() : fileHeader.getSequenceDictionary();
if (CREATE_INDEX && sequenceDictionary == null) {
throw new UserException("A sequence dictionary must be available (either through the input file or by setting it explicitly) when creating indexed output.");
}
final VariantContextWriterBuilder builder = new VariantContextWriterBuilder().setReferenceDictionary(sequenceDictionary).clearOptions();
if (CREATE_INDEX)
builder.setOption(Options.INDEX_ON_THE_FLY);
try (final VariantContextWriter snpWriter = builder.setOutputFile(SNP_OUTPUT).build();
final VariantContextWriter indelWriter = builder.setOutputFile(INDEL_OUTPUT).build()) {
snpWriter.writeHeader(fileHeader);
indelWriter.writeHeader(fileHeader);
int incorrectVariantCount = 0;
final CloseableIterator<VariantContext> iterator = fileReader.iterator();
while (iterator.hasNext()) {
final VariantContext context = iterator.next();
if (context.isIndel())
indelWriter.add(context);
else if (context.isSNP())
snpWriter.add(context);
else {
if (STRICT)
throw new IllegalStateException("Found a record with type " + context.getType().name());
else
incorrectVariantCount++;
}
progress.record(context.getContig(), context.getStart());
}
if (incorrectVariantCount > 0) {
logger.debug("Found " + incorrectVariantCount + " records that didn't match SNP or INDEL");
}
CloserUtil.close(iterator);
CloserUtil.close(fileReader);
}
return null;
}
use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.
the class PathSeqKmerSpark method runTool.
/** Get the list of distinct kmers in the reference, and write them to a file as a HopScotchSet. */
@Override
protected void runTool(final JavaSparkContext ctx) {
final SAMFileHeader hdr = getHeaderForReads();
SAMSequenceDictionary dict = null;
if (hdr != null)
dict = hdr.getSequenceDictionary();
final PipelineOptions options = getAuthenticatedGCSOptions();
final ReferenceMultiSource referenceMultiSource = getReference();
final List<SVKmer> kmerList = findKmers(ctx, KMER_SIZE, referenceMultiSource, options, dict);
final HopscotchSet<SVKmer> kmerSet = new HopscotchSet<>(kmerList);
final Output output = new Output(BucketUtils.createFile(OUTPUT_FILE));
final Kryo kryo = new Kryo();
kryo.setReferences(false);
kryo.writeClassAndObject(output, kmerSet);
output.close();
}
use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.
the class GATKToolUnitTest method testBestSequenceDictionary_fromVariants.
@Test
public void testBestSequenceDictionary_fromVariants() throws Exception {
final GATKTool tool = new TestGATKToolWithFeatures();
final CommandLineParser clp = new CommandLineArgumentParser(tool);
final File vcfFile = new File(publicTestDir + "org/broadinstitute/hellbender/engine/feature_data_source_test_withSequenceDict.vcf");
final String[] args = { "--mask", vcfFile.getCanonicalPath() };
clp.parseArguments(System.out, args);
tool.onStartup();
//read the dict back in and compare to vcf dict
final SAMSequenceDictionary toolDict = tool.getBestAvailableSequenceDictionary();
try (final VCFFileReader reader = new VCFFileReader(vcfFile)) {
final SAMSequenceDictionary vcfDict = reader.getFileHeader().getSequenceDictionary();
toolDict.assertSameDictionary(vcfDict);
vcfDict.assertSameDictionary(toolDict);
Assert.assertEquals(toolDict, vcfDict);
}
}
use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.
the class GATKToolUnitTest method testBestSequenceDictionary_fromNothing.
@Test
public void testBestSequenceDictionary_fromNothing() throws Exception {
final GATKTool tool = new TestGATKToolWithNothing();
final CommandLineParser clp = new CommandLineArgumentParser(tool);
final String[] args = {};
clp.parseArguments(System.out, args);
tool.onStartup();
//read the dict back in and assert that it's null
final SAMSequenceDictionary toolDict = tool.getBestAvailableSequenceDictionary();
Assert.assertNull(toolDict);
}
use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.
the class GATKToolUnitTest method testBestSequenceDictionary_fromReadsAndReference.
@Test
public void testBestSequenceDictionary_fromReadsAndReference() throws Exception {
final GATKTool tool = new TestGATKToolWithReads();
final CommandLineParser clp = new CommandLineArgumentParser(tool);
final File bamFile = new File(publicTestDir + "org/broadinstitute/hellbender/engine/reads_data_source_test1.bam");
final String fastaFile = hg19MiniReference;
final String[] args = { "-I", bamFile.getCanonicalPath(), "-R", fastaFile };
clp.parseArguments(System.out, args);
tool.onStartup();
//read the dict back in and compare to reference dict
final SAMSequenceDictionary toolDict = tool.getBestAvailableSequenceDictionary();
final SAMSequenceDictionary fastaDict = new IndexedFastaSequenceFile(new File(fastaFile)).getSequenceDictionary();
toolDict.assertSameDictionary(fastaDict);
fastaDict.assertSameDictionary(toolDict);
Assert.assertEquals(toolDict, fastaDict);
}
Aggregations