use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.
the class SelectVariants method onTraversalStart.
/**
* Set up the VCF writer, the sample expressions and regexs, filters inputs, and the JEXL matcher
*
*/
@Override
public void onTraversalStart() {
final Map<String, VCFHeader> vcfHeaders = Collections.singletonMap(getDrivingVariantsFeatureInput().getName(), getHeaderForVariants());
// Initialize VCF header lines
final Set<VCFHeaderLine> headerLines = createVCFHeaderLineList(vcfHeaders);
for (int i = 0; i < selectExpressions.size(); i++) {
// It's not necessary that the user supply select names for the JEXL expressions, since those
// expressions will only be needed for omitting records. Make up the select names here.
selectNames.add(String.format("select-%d", i));
}
jexls = VariantContextUtils.initializeMatchExps(selectNames, selectExpressions);
// Prepare the sample names and types to be used by the corresponding filters
samples = createSampleNameInclusionList(vcfHeaders);
selectedTypes = createSampleTypeInclusionList();
// Look at the parameters to decide which analysis to perform
discordanceOnly = discordanceTrack != null;
if (discordanceOnly) {
logger.info("Selecting only variants discordant with the track: " + discordanceTrack.getName());
}
concordanceOnly = concordanceTrack != null;
if (concordanceOnly) {
logger.info("Selecting only variants concordant with the track: " + concordanceTrack.getName());
}
if (mendelianViolations) {
sampleDB = initializeSampleDB();
mv = new MendelianViolation(mendelianViolationQualThreshold, false, true);
}
selectRandomFraction = fractionRandom > 0;
if (selectRandomFraction) {
logger.info("Selecting approximately " + 100.0 * fractionRandom + "% of the variants at random from the variant track");
}
//TODO: this should be refactored/consolidated as part of
// https://github.com/broadinstitute/gatk/issues/121 and
// https://github.com/broadinstitute/gatk/issues/1116
Set<VCFHeaderLine> actualLines = null;
SAMSequenceDictionary sequenceDictionary = null;
if (hasReference()) {
File refFile = referenceArguments.getReferenceFile();
sequenceDictionary = this.getReferenceDictionary();
actualLines = VcfUtils.updateHeaderContigLines(headerLines, refFile, sequenceDictionary, suppressReferencePath);
} else {
sequenceDictionary = getHeaderForVariants().getSequenceDictionary();
if (null != sequenceDictionary) {
actualLines = VcfUtils.updateHeaderContigLines(headerLines, null, sequenceDictionary, suppressReferencePath);
} else {
actualLines = headerLines;
}
}
vcfWriter = createVCFWriter(outFile);
vcfWriter.writeHeader(new VCFHeader(actualLines, samples));
}
use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.
the class UpdateVCFSequenceDictionary method onTraversalStart.
@Override
public void onTraversalStart() {
VCFHeader inputHeader = getHeaderForVariants();
VCFHeader outputHeader = inputHeader == null ? new VCFHeader() : new VCFHeader(inputHeader.getMetaDataInInputOrder(), inputHeader.getGenotypeSamples());
getDefaultToolVCFHeaderLines().forEach(line -> outputHeader.addMetaDataLine(line));
sourceDictionary = getSequenceDictionaryFromInput(dictionarySource);
// Warn and require opt-in via -replace if we're about to clobber a valid sequence
// dictionary. Check the input file directly via the header rather than using the
// engine, since it might dig one up from an index.
SAMSequenceDictionary oldDictionary = inputHeader == null ? null : inputHeader.getSequenceDictionary();
if ((oldDictionary != null && !oldDictionary.getSequences().isEmpty()) && !replace) {
throw new CommandLineException.BadArgumentValue(String.format("The input variant file %s already contains a sequence dictionary. " + "Use %s to force the dictionary to be replaced.", getDrivingVariantsFeatureInput().getName(), REPLACE_ARGUMENT_NAME));
}
outputHeader.setSequenceDictionary(sourceDictionary);
vcfWriter = createVCFWriter(new File(outFile));
vcfWriter.writeHeader(outputHeader);
}
use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.
the class IndexUtils method getSamSequenceDictionaryFromIndex.
private static SAMSequenceDictionary getSamSequenceDictionaryFromIndex(final Index index) {
final List<String> seqNames = index.getSequenceNames();
if (seqNames == null || seqNames.isEmpty()) {
return null;
}
final SAMSequenceDictionary dict = new SAMSequenceDictionary();
//use UNKNOWN_SEQUENCE_LENGTH to indicate contigs that will not be compared by length (see SequenceDictionaryUtils.sequenceRecordsAreEquivalent)
seqNames.forEach(seqName -> dict.addSequence(new SAMSequenceRecord(seqName, SAMSequenceRecord.UNKNOWN_SEQUENCE_LENGTH)));
return dict;
}
use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.
the class AnnotateTargetsIntegrationTest method checkOutputFileContent.
private void checkOutputFileContent(final File outputFile, final boolean mustHaveGCContent, final boolean mustHaveRepeats) throws IOException {
try (final TargetTableReader outputReader = new TargetTableReader(outputFile);
final TargetTableReader inputReader = new TargetTableReader(TARGET_FILE);
final ReferenceFileSource reference = new ReferenceFileSource(REFERENCE)) {
final SAMSequenceDictionary dictionary = reference.getSequenceDictionary();
Target outputTarget;
Target inputTarget;
do {
outputTarget = outputReader.readRecord();
inputTarget = inputReader.readRecord();
if (outputTarget == inputTarget) {
Assert.assertNull(outputTarget);
break;
}
Assert.assertNotNull(outputTarget, "too few targets in output");
Assert.assertNotNull(inputTarget, "too many targets in output");
Assert.assertEquals(outputTarget.getName(), inputTarget.getName());
Assert.assertEquals(outputTarget.getInterval(), inputTarget.getInterval());
final TargetAnnotationCollection annotations = outputTarget.getAnnotations();
if (mustHaveGCContent) {
Assert.assertTrue(annotations.hasAnnotation(TargetAnnotation.GC_CONTENT));
checkOutputGCContent(reference, outputTarget.getInterval(), annotations.getDouble(TargetAnnotation.GC_CONTENT));
} else {
Assert.assertFalse(annotations.hasAnnotation(TargetAnnotation.GC_CONTENT));
}
} while (true);
}
}
use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.
the class AnnotateTargetsIntegrationTest method createTargetFile.
@BeforeClass
public void createTargetFile() throws IOException {
final SAMSequenceDictionary referenceDictionary = resolveReferenceDictionary();
final List<SimpleInterval> targetIntervals = createRandomIntervals(referenceDictionary, NUMBER_OF_TARGETS, MIN_TARGET_SIZE, MAX_TARGET_SIZE, MEAN_TARGET_SIZE, TARGET_SIZE_STDEV);
final List<Target> targets = targetIntervals.stream().map(Target::new).collect(Collectors.toList());
TargetWriter.writeTargetsToFile(TARGET_FILE, targets);
final Index index = IndexFactory.createIndex(TARGET_FILE, new TargetCodec(), IndexFactory.IndexType.LINEAR);
final LittleEndianOutputStream stream = new LittleEndianOutputStream(new FileOutputStream(TARGET_FILE_IDX));
index.write(stream);
stream.close();
}
Aggregations