Search in sources :

Example 16 with SAMSequenceDictionary

use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.

the class MergeVcfs method doWork.

@Override
protected Object doWork() {
    final ProgressLogger progress = new ProgressLogger(logger, 10000);
    final List<String> sampleList = new ArrayList<>();
    final Collection<CloseableIterator<VariantContext>> iteratorCollection = new ArrayList<>(INPUT.size());
    final Collection<VCFHeader> headers = new HashSet<>(INPUT.size());
    VariantContextComparator variantContextComparator = null;
    SAMSequenceDictionary sequenceDictionary = null;
    if (SEQUENCE_DICTIONARY != null) {
        sequenceDictionary = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(SEQUENCE_DICTIONARY).getFileHeader().getSequenceDictionary();
    }
    for (final File file : INPUT) {
        IOUtil.assertFileIsReadable(file);
        final VCFFileReader fileReader = new VCFFileReader(file, false);
        final VCFHeader fileHeader = fileReader.getFileHeader();
        if (variantContextComparator == null) {
            variantContextComparator = fileHeader.getVCFRecordComparator();
        } else {
            if (!variantContextComparator.isCompatible(fileHeader.getContigLines())) {
                throw new IllegalArgumentException("The contig entries in input file " + file.getAbsolutePath() + " are not compatible with the others.");
            }
        }
        if (sequenceDictionary == null)
            sequenceDictionary = fileHeader.getSequenceDictionary();
        if (sampleList.isEmpty()) {
            sampleList.addAll(fileHeader.getSampleNamesInOrder());
        } else {
            if (!sampleList.equals(fileHeader.getSampleNamesInOrder())) {
                throw new IllegalArgumentException("Input file " + file.getAbsolutePath() + " has sample entries that don't match the other files.");
            }
        }
        headers.add(fileHeader);
        iteratorCollection.add(fileReader.iterator());
    }
    if (CREATE_INDEX && sequenceDictionary == null) {
        throw new UserException("A sequence dictionary must be available (either through the input file or by setting it explicitly) when creating indexed output.");
    }
    final VariantContextWriterBuilder builder = new VariantContextWriterBuilder().setOutputFile(OUTPUT).setReferenceDictionary(sequenceDictionary).clearOptions();
    if (CREATE_INDEX) {
        builder.setOption(Options.INDEX_ON_THE_FLY);
    }
    try (final VariantContextWriter writer = builder.build()) {
        writer.writeHeader(new VCFHeader(VCFUtils.smartMergeHeaders(headers, false), sampleList));
        final MergingIterator<VariantContext> mergingIterator = new MergingIterator<>(variantContextComparator, iteratorCollection);
        while (mergingIterator.hasNext()) {
            final VariantContext context = mergingIterator.next();
            writer.add(context);
            progress.record(context.getContig(), context.getStart());
        }
        CloserUtil.close(mergingIterator);
    }
    return null;
}
Also used : CloseableIterator(htsjdk.samtools.util.CloseableIterator) ArrayList(java.util.ArrayList) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VariantContext(htsjdk.variant.variantcontext.VariantContext) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger) VariantContextComparator(htsjdk.variant.variantcontext.VariantContextComparator) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) MergingIterator(htsjdk.samtools.util.MergingIterator) VariantContextWriterBuilder(htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder) UserException(org.broadinstitute.hellbender.exceptions.UserException) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) File(java.io.File) HashSet(java.util.HashSet)

Example 17 with SAMSequenceDictionary

use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.

the class SortVcf method collectFileReadersAndHeaders.

private void collectFileReadersAndHeaders(final List<String> sampleList, SAMSequenceDictionary samSequenceDictionary) {
    for (final File input : INPUT) {
        final VCFFileReader in = new VCFFileReader(input, false);
        final VCFHeader header = in.getFileHeader();
        final SAMSequenceDictionary dict = in.getFileHeader().getSequenceDictionary();
        if (dict == null || dict.isEmpty()) {
            if (null == samSequenceDictionary) {
                throw new IllegalArgumentException("Sequence dictionary was missing or empty for the VCF: " + input.getAbsolutePath() + " Please add a sequence dictionary to this VCF or specify SEQUENCE_DICTIONARY.");
            }
            header.setSequenceDictionary(samSequenceDictionary);
        } else {
            if (null == samSequenceDictionary) {
                samSequenceDictionary = dict;
            } else {
                try {
                    samSequenceDictionary.assertSameDictionary(dict);
                } catch (final AssertionError e) {
                    throw new IllegalArgumentException(e);
                }
            }
        }
        if (sampleList.isEmpty()) {
            sampleList.addAll(header.getSampleNamesInOrder());
        } else {
            if (!sampleList.equals(header.getSampleNamesInOrder())) {
                throw new IllegalArgumentException("Input file " + input.getAbsolutePath() + " has sample names that don't match the other files.");
            }
        }
        inputReaders.add(in);
        inputHeaders.add(header);
    }
}
Also used : VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VCFHeader(htsjdk.variant.vcf.VCFHeader) File(java.io.File) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary)

Example 18 with SAMSequenceDictionary

use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.

the class VariantRecalibrator method onTraversalStart.

//---------------------------------------------------------------------------------------------------------------
//
// onTraversalStart
//
//---------------------------------------------------------------------------------------------------------------
@Override
public void onTraversalStart() {
    if (gatk3Compatibility) {
        // Temporary argument for validation of GATK4 implementation against GATK3 results:
        // Reset the RNG and draw a single int to align the RNG initial state with that used
        // by GATK3 to allow comparison of results with GATK3
        Utils.resetRandomGenerator();
        Utils.getRandomGenerator().nextInt();
    }
    dataManager = new VariantDataManager(new ArrayList<>(USE_ANNOTATIONS), VRAC);
    if (RSCRIPT_FILE != null && !RScriptExecutor.RSCRIPT_EXISTS)
        Utils.warnUser(logger, String.format("Rscript not found in environment path. %s will be generated but PDF plots will not.", RSCRIPT_FILE));
    if (IGNORE_INPUT_FILTERS != null) {
        ignoreInputFilterSet.addAll(IGNORE_INPUT_FILTERS);
    }
    try {
        tranchesStream = new PrintStream(TRANCHES_FILE);
    } catch (FileNotFoundException e) {
        throw new UserException.CouldNotCreateOutputFile(TRANCHES_FILE, e);
    }
    for (FeatureInput<VariantContext> variantFeature : resource) {
        dataManager.addTrainingSet(new TrainingSet(variantFeature));
    }
    if (!dataManager.checkHasTrainingSet()) {
        throw new CommandLineException("No training set found! Please provide sets of known polymorphic loci marked with the training=true feature input tag. For example, -resource hapmap,VCF,known=false,training=true,truth=true,prior=12.0 hapmapFile.vcf");
    }
    if (!dataManager.checkHasTruthSet()) {
        throw new CommandLineException("No truth set found! Please provide sets of known polymorphic loci marked with the truth=true feature input tag. For example, -resource hapmap,VCF,known=false,training=true,truth=true,prior=12.0 hapmapFile.vcf");
    }
    //TODO: this should be refactored/consolidated as part of
    // https://github.com/broadinstitute/gatk/issues/2112
    // https://github.com/broadinstitute/gatk/issues/121,
    // https://github.com/broadinstitute/gatk/issues/1116 and
    // Initialize VCF header lines
    Set<VCFHeaderLine> hInfo = getDefaultToolVCFHeaderLines();
    VariantRecalibrationUtils.addVQSRStandardHeaderLines(hInfo);
    SAMSequenceDictionary sequenceDictionary = getBestAvailableSequenceDictionary();
    if (hasReference()) {
        hInfo = VcfUtils.updateHeaderContigLines(hInfo, referenceArguments.getReferenceFile(), sequenceDictionary, true);
    } else if (null != sequenceDictionary) {
        hInfo = VcfUtils.updateHeaderContigLines(hInfo, null, sequenceDictionary, true);
    }
    recalWriter = createVCFWriter(new File(output));
    recalWriter.writeHeader(new VCFHeader(hInfo));
    for (int iii = 0; iii < REPLICATE * 2; iii++) {
        replicate.add(Utils.getRandomGenerator().nextDouble());
    }
}
Also used : PrintStream(java.io.PrintStream) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) ExpandingArrayList(org.broadinstitute.hellbender.utils.collections.ExpandingArrayList) FileNotFoundException(java.io.FileNotFoundException) VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) CommandLineException(org.broadinstitute.barclay.argparser.CommandLineException) UserException(org.broadinstitute.hellbender.exceptions.UserException) VCFHeader(htsjdk.variant.vcf.VCFHeader) File(java.io.File)

Example 19 with SAMSequenceDictionary

use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.

the class SelectVariants method onTraversalStart.

/**
     * Set up the VCF writer, the sample expressions and regexs, filters inputs, and the JEXL matcher
     *
     */
@Override
public void onTraversalStart() {
    final Map<String, VCFHeader> vcfHeaders = Collections.singletonMap(getDrivingVariantsFeatureInput().getName(), getHeaderForVariants());
    // Initialize VCF header lines
    final Set<VCFHeaderLine> headerLines = createVCFHeaderLineList(vcfHeaders);
    for (int i = 0; i < selectExpressions.size(); i++) {
        // It's not necessary that the user supply select names for the JEXL expressions, since those
        // expressions will only be needed for omitting records.  Make up the select names here.
        selectNames.add(String.format("select-%d", i));
    }
    jexls = VariantContextUtils.initializeMatchExps(selectNames, selectExpressions);
    // Prepare the sample names and types to be used by the corresponding filters
    samples = createSampleNameInclusionList(vcfHeaders);
    selectedTypes = createSampleTypeInclusionList();
    // Look at the parameters to decide which analysis to perform
    discordanceOnly = discordanceTrack != null;
    if (discordanceOnly) {
        logger.info("Selecting only variants discordant with the track: " + discordanceTrack.getName());
    }
    concordanceOnly = concordanceTrack != null;
    if (concordanceOnly) {
        logger.info("Selecting only variants concordant with the track: " + concordanceTrack.getName());
    }
    if (mendelianViolations) {
        sampleDB = initializeSampleDB();
        mv = new MendelianViolation(mendelianViolationQualThreshold, false, true);
    }
    selectRandomFraction = fractionRandom > 0;
    if (selectRandomFraction) {
        logger.info("Selecting approximately " + 100.0 * fractionRandom + "% of the variants at random from the variant track");
    }
    //TODO: this should be refactored/consolidated as part of
    // https://github.com/broadinstitute/gatk/issues/121 and
    // https://github.com/broadinstitute/gatk/issues/1116
    Set<VCFHeaderLine> actualLines = null;
    SAMSequenceDictionary sequenceDictionary = null;
    if (hasReference()) {
        File refFile = referenceArguments.getReferenceFile();
        sequenceDictionary = this.getReferenceDictionary();
        actualLines = VcfUtils.updateHeaderContigLines(headerLines, refFile, sequenceDictionary, suppressReferencePath);
    } else {
        sequenceDictionary = getHeaderForVariants().getSequenceDictionary();
        if (null != sequenceDictionary) {
            actualLines = VcfUtils.updateHeaderContigLines(headerLines, null, sequenceDictionary, suppressReferencePath);
        } else {
            actualLines = headerLines;
        }
    }
    vcfWriter = createVCFWriter(outFile);
    vcfWriter.writeHeader(new VCFHeader(actualLines, samples));
}
Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) MendelianViolation(org.broadinstitute.hellbender.utils.samples.MendelianViolation) VCFHeader(htsjdk.variant.vcf.VCFHeader) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) File(java.io.File)

Example 20 with SAMSequenceDictionary

use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.

the class UpdateVCFSequenceDictionary method onTraversalStart.

@Override
public void onTraversalStart() {
    VCFHeader inputHeader = getHeaderForVariants();
    VCFHeader outputHeader = inputHeader == null ? new VCFHeader() : new VCFHeader(inputHeader.getMetaDataInInputOrder(), inputHeader.getGenotypeSamples());
    getDefaultToolVCFHeaderLines().forEach(line -> outputHeader.addMetaDataLine(line));
    sourceDictionary = getSequenceDictionaryFromInput(dictionarySource);
    // Warn and require opt-in via -replace if we're about to clobber a valid sequence
    // dictionary. Check the input file directly via the header rather than using the
    // engine, since it might dig one up from an index.
    SAMSequenceDictionary oldDictionary = inputHeader == null ? null : inputHeader.getSequenceDictionary();
    if ((oldDictionary != null && !oldDictionary.getSequences().isEmpty()) && !replace) {
        throw new CommandLineException.BadArgumentValue(String.format("The input variant file %s already contains a sequence dictionary. " + "Use %s to force the dictionary to be replaced.", getDrivingVariantsFeatureInput().getName(), REPLACE_ARGUMENT_NAME));
    }
    outputHeader.setSequenceDictionary(sourceDictionary);
    vcfWriter = createVCFWriter(new File(outFile));
    vcfWriter.writeHeader(outputHeader);
}
Also used : VCFHeader(htsjdk.variant.vcf.VCFHeader) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) File(java.io.File)

Aggregations

SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)110 Test (org.testng.annotations.Test)41 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)37 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)37 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)35 File (java.io.File)31 UserException (org.broadinstitute.hellbender.exceptions.UserException)24 VariantContext (htsjdk.variant.variantcontext.VariantContext)23 Argument (org.broadinstitute.barclay.argparser.Argument)21 Collectors (java.util.stream.Collectors)20 ReferenceMultiSource (org.broadinstitute.hellbender.engine.datasources.ReferenceMultiSource)20 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)18 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)17 VCFHeader (htsjdk.variant.vcf.VCFHeader)16 IntervalUtils (org.broadinstitute.hellbender.utils.IntervalUtils)16 SAMFileHeader (htsjdk.samtools.SAMFileHeader)14 List (java.util.List)14 JavaRDD (org.apache.spark.api.java.JavaRDD)14 Broadcast (org.apache.spark.broadcast.Broadcast)12 StreamSupport (java.util.stream.StreamSupport)11