Search in sources :

Example 56 with SAMSequenceDictionary

use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.

the class SelectVariants method onTraversalStart.

/**
     * Set up the VCF writer, the sample expressions and regexs, filters inputs, and the JEXL matcher
     *
     */
@Override
public void onTraversalStart() {
    final Map<String, VCFHeader> vcfHeaders = Collections.singletonMap(getDrivingVariantsFeatureInput().getName(), getHeaderForVariants());
    // Initialize VCF header lines
    final Set<VCFHeaderLine> headerLines = createVCFHeaderLineList(vcfHeaders);
    for (int i = 0; i < selectExpressions.size(); i++) {
        // It's not necessary that the user supply select names for the JEXL expressions, since those
        // expressions will only be needed for omitting records.  Make up the select names here.
        selectNames.add(String.format("select-%d", i));
    }
    jexls = VariantContextUtils.initializeMatchExps(selectNames, selectExpressions);
    // Prepare the sample names and types to be used by the corresponding filters
    samples = createSampleNameInclusionList(vcfHeaders);
    selectedTypes = createSampleTypeInclusionList();
    // Look at the parameters to decide which analysis to perform
    discordanceOnly = discordanceTrack != null;
    if (discordanceOnly) {
        logger.info("Selecting only variants discordant with the track: " + discordanceTrack.getName());
    }
    concordanceOnly = concordanceTrack != null;
    if (concordanceOnly) {
        logger.info("Selecting only variants concordant with the track: " + concordanceTrack.getName());
    }
    if (mendelianViolations) {
        sampleDB = initializeSampleDB();
        mv = new MendelianViolation(mendelianViolationQualThreshold, false, true);
    }
    selectRandomFraction = fractionRandom > 0;
    if (selectRandomFraction) {
        logger.info("Selecting approximately " + 100.0 * fractionRandom + "% of the variants at random from the variant track");
    }
    //TODO: this should be refactored/consolidated as part of
    // https://github.com/broadinstitute/gatk/issues/121 and
    // https://github.com/broadinstitute/gatk/issues/1116
    Set<VCFHeaderLine> actualLines = null;
    SAMSequenceDictionary sequenceDictionary = null;
    if (hasReference()) {
        File refFile = referenceArguments.getReferenceFile();
        sequenceDictionary = this.getReferenceDictionary();
        actualLines = VcfUtils.updateHeaderContigLines(headerLines, refFile, sequenceDictionary, suppressReferencePath);
    } else {
        sequenceDictionary = getHeaderForVariants().getSequenceDictionary();
        if (null != sequenceDictionary) {
            actualLines = VcfUtils.updateHeaderContigLines(headerLines, null, sequenceDictionary, suppressReferencePath);
        } else {
            actualLines = headerLines;
        }
    }
    vcfWriter = createVCFWriter(outFile);
    vcfWriter.writeHeader(new VCFHeader(actualLines, samples));
}
Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) MendelianViolation(org.broadinstitute.hellbender.utils.samples.MendelianViolation) VCFHeader(htsjdk.variant.vcf.VCFHeader) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) File(java.io.File)

Example 57 with SAMSequenceDictionary

use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.

the class UpdateVCFSequenceDictionary method onTraversalStart.

@Override
public void onTraversalStart() {
    VCFHeader inputHeader = getHeaderForVariants();
    VCFHeader outputHeader = inputHeader == null ? new VCFHeader() : new VCFHeader(inputHeader.getMetaDataInInputOrder(), inputHeader.getGenotypeSamples());
    getDefaultToolVCFHeaderLines().forEach(line -> outputHeader.addMetaDataLine(line));
    sourceDictionary = getSequenceDictionaryFromInput(dictionarySource);
    // Warn and require opt-in via -replace if we're about to clobber a valid sequence
    // dictionary. Check the input file directly via the header rather than using the
    // engine, since it might dig one up from an index.
    SAMSequenceDictionary oldDictionary = inputHeader == null ? null : inputHeader.getSequenceDictionary();
    if ((oldDictionary != null && !oldDictionary.getSequences().isEmpty()) && !replace) {
        throw new CommandLineException.BadArgumentValue(String.format("The input variant file %s already contains a sequence dictionary. " + "Use %s to force the dictionary to be replaced.", getDrivingVariantsFeatureInput().getName(), REPLACE_ARGUMENT_NAME));
    }
    outputHeader.setSequenceDictionary(sourceDictionary);
    vcfWriter = createVCFWriter(new File(outFile));
    vcfWriter.writeHeader(outputHeader);
}
Also used : VCFHeader(htsjdk.variant.vcf.VCFHeader) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) File(java.io.File)

Example 58 with SAMSequenceDictionary

use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.

the class IndexUtils method getSamSequenceDictionaryFromIndex.

private static SAMSequenceDictionary getSamSequenceDictionaryFromIndex(final Index index) {
    final List<String> seqNames = index.getSequenceNames();
    if (seqNames == null || seqNames.isEmpty()) {
        return null;
    }
    final SAMSequenceDictionary dict = new SAMSequenceDictionary();
    //use UNKNOWN_SEQUENCE_LENGTH to indicate contigs that will not be compared by length (see SequenceDictionaryUtils.sequenceRecordsAreEquivalent)
    seqNames.forEach(seqName -> dict.addSequence(new SAMSequenceRecord(seqName, SAMSequenceRecord.UNKNOWN_SEQUENCE_LENGTH)));
    return dict;
}
Also used : SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary)

Example 59 with SAMSequenceDictionary

use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.

the class AnnotateTargetsIntegrationTest method checkOutputFileContent.

private void checkOutputFileContent(final File outputFile, final boolean mustHaveGCContent, final boolean mustHaveRepeats) throws IOException {
    try (final TargetTableReader outputReader = new TargetTableReader(outputFile);
        final TargetTableReader inputReader = new TargetTableReader(TARGET_FILE);
        final ReferenceFileSource reference = new ReferenceFileSource(REFERENCE)) {
        final SAMSequenceDictionary dictionary = reference.getSequenceDictionary();
        Target outputTarget;
        Target inputTarget;
        do {
            outputTarget = outputReader.readRecord();
            inputTarget = inputReader.readRecord();
            if (outputTarget == inputTarget) {
                Assert.assertNull(outputTarget);
                break;
            }
            Assert.assertNotNull(outputTarget, "too few targets in output");
            Assert.assertNotNull(inputTarget, "too many targets in output");
            Assert.assertEquals(outputTarget.getName(), inputTarget.getName());
            Assert.assertEquals(outputTarget.getInterval(), inputTarget.getInterval());
            final TargetAnnotationCollection annotations = outputTarget.getAnnotations();
            if (mustHaveGCContent) {
                Assert.assertTrue(annotations.hasAnnotation(TargetAnnotation.GC_CONTENT));
                checkOutputGCContent(reference, outputTarget.getInterval(), annotations.getDouble(TargetAnnotation.GC_CONTENT));
            } else {
                Assert.assertFalse(annotations.hasAnnotation(TargetAnnotation.GC_CONTENT));
            }
        } while (true);
    }
}
Also used : SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) ReferenceFileSource(org.broadinstitute.hellbender.engine.ReferenceFileSource)

Example 60 with SAMSequenceDictionary

use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.

the class AnnotateTargetsIntegrationTest method createTargetFile.

@BeforeClass
public void createTargetFile() throws IOException {
    final SAMSequenceDictionary referenceDictionary = resolveReferenceDictionary();
    final List<SimpleInterval> targetIntervals = createRandomIntervals(referenceDictionary, NUMBER_OF_TARGETS, MIN_TARGET_SIZE, MAX_TARGET_SIZE, MEAN_TARGET_SIZE, TARGET_SIZE_STDEV);
    final List<Target> targets = targetIntervals.stream().map(Target::new).collect(Collectors.toList());
    TargetWriter.writeTargetsToFile(TARGET_FILE, targets);
    final Index index = IndexFactory.createIndex(TARGET_FILE, new TargetCodec(), IndexFactory.IndexType.LINEAR);
    final LittleEndianOutputStream stream = new LittleEndianOutputStream(new FileOutputStream(TARGET_FILE_IDX));
    index.write(stream);
    stream.close();
}
Also used : LittleEndianOutputStream(htsjdk.tribble.util.LittleEndianOutputStream) TargetCodec(org.broadinstitute.hellbender.utils.codecs.TargetCodec) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Index(htsjdk.tribble.index.Index) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) BeforeClass(org.testng.annotations.BeforeClass)

Aggregations

SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)110 Test (org.testng.annotations.Test)41 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)37 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)37 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)35 File (java.io.File)31 UserException (org.broadinstitute.hellbender.exceptions.UserException)24 VariantContext (htsjdk.variant.variantcontext.VariantContext)23 Argument (org.broadinstitute.barclay.argparser.Argument)21 Collectors (java.util.stream.Collectors)20 ReferenceMultiSource (org.broadinstitute.hellbender.engine.datasources.ReferenceMultiSource)20 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)18 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)17 VCFHeader (htsjdk.variant.vcf.VCFHeader)16 IntervalUtils (org.broadinstitute.hellbender.utils.IntervalUtils)16 SAMFileHeader (htsjdk.samtools.SAMFileHeader)14 List (java.util.List)14 JavaRDD (org.apache.spark.api.java.JavaRDD)14 Broadcast (org.apache.spark.broadcast.Broadcast)12 StreamSupport (java.util.stream.StreamSupport)11