Search in sources :

Example 86 with VCFHeader

use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.

the class VcfRegulomeDB method doVcfToVcf.

@Override
protected int doVcfToVcf(String inputName, VcfIterator in, VariantContextWriter out) {
    VCFHeader header = in.getHeader();
    SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header.getSequenceDictionary());
    header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
    header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
    header.addMetaDataLine(new VCFInfoHeaderLine(this.infoTag, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Format: Position|Distance|Rank"));
    out.writeHeader(header);
    while (in.hasNext()) {
        List<String> regDataList = new ArrayList<String>();
        VariantContext ctx = in.next();
        progress.watch(ctx.getContig(), ctx.getStart());
        int start = Math.max(0, ctx.getStart() - this.extend);
        int end = ctx.getEnd() + this.extend;
        for (Iterator<RegData> iter = this.regDataTabixFileReader.iterator(ctx.getContig(), start, end); iter.hasNext(); ) {
            RegData curr = iter.next();
            if (this.acceptRegex != null && !this.acceptRegex.matcher(curr.rank).matches()) {
                continue;
            }
            String str = String.valueOf(curr.chromSart) + "|" + String.valueOf(Math.abs(curr.chromSart - (ctx.getStart() - 1))) + "|" + curr.rank;
            regDataList.add(str);
        }
        if (regDataList.isEmpty()) {
            out.add(ctx);
            continue;
        }
        VariantContextBuilder vcb = new VariantContextBuilder(ctx);
        vcb.attribute(this.infoTag, regDataList.toArray());
        out.add(vcb.make());
    }
    progress.finish();
    return 0;
}
Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) VCFHeader(htsjdk.variant.vcf.VCFHeader) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine)

Example 87 with VCFHeader

use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.

the class VcfRemoveGenotypeJs method doVcfToVcf.

@Override
protected int doVcfToVcf(String inputName, VcfIterator in, VariantContextWriter out) {
    try {
        this.script = super.compileJavascript(scriptExpr, scriptFile);
        final VCFHeader h2 = new VCFHeader(in.getHeader());
        if (!this.filterName.isEmpty()) {
            h2.addMetaDataLine(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_FILTER_KEY, 1, VCFHeaderLineType.String, "Genotype-level filter"));
        }
        addMetaData(h2);
        out.writeHeader(h2);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(in.getHeader());
        final Bindings bindings = this.script.getEngine().createBindings();
        bindings.put("header", in.getHeader());
        while (in.hasNext()) {
            final VariantContext ctx = progress.watch(in.next());
            bindings.put("variant", ctx);
            final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
            List<Genotype> genotypes = new ArrayList<>();
            int countCalled = ctx.getNSamples();
            for (int i = 0; i < ctx.getNSamples(); ++i) {
                Genotype genotype = ctx.getGenotype(i);
                bindings.put("genotype", genotype);
                if (!genotype.isCalled() || genotype.isNoCall() || !genotype.isAvailable()) {
                    countCalled--;
                } else if (genotype.isCalled() && !super.evalJavaScriptBoolean(this.script, bindings)) {
                    if (!this.filterName.isEmpty()) {
                        if (!genotype.isFiltered()) {
                            genotype = new GenotypeBuilder(genotype).filters(this.filterName).make();
                        }
                    } else if (this.replaceByHomRef) {
                        List<Allele> homRefList = new ArrayList<>(genotype.getPloidy());
                        for (int p = 0; p < genotype.getPloidy(); ++p) {
                            homRefList.add(ctx.getReference());
                        }
                        genotype = new GenotypeBuilder(genotype).alleles(homRefList).make();
                    } else {
                        genotype = GenotypeBuilder.createMissing(genotype.getSampleName(), genotype.getPloidy());
                    }
                    countCalled--;
                }
                genotypes.add(genotype);
            }
            if (countCalled == 0 && this.removeCtxNoGenotype) {
                continue;
            }
            vcb.genotypes(genotypes);
            out.add(vcb.make());
        }
        progress.finish();
        return RETURN_OK;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        this.script = null;
    }
}
Also used : SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) Genotype(htsjdk.variant.variantcontext.Genotype) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) Bindings(javax.script.Bindings) Allele(htsjdk.variant.variantcontext.Allele) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VCFHeader(htsjdk.variant.vcf.VCFHeader) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine)

Example 88 with VCFHeader

use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.

the class VcfSkatSlidingWindow method doWork.

@Override
public int doWork(final List<String> args) {
    if (this.nJobs < 1) {
        this.nJobs = Math.max(1, Runtime.getRuntime().availableProcessors());
        LOG.info("setting njobs to " + this.nJobs);
    }
    VcfIterator r = null;
    try {
        final VCFHeader header;
        final SAMSequenceDictionary dict;
        final File vcfFile = new File(oneAndOnlyOneFile(args));
        try (final VCFFileReader vr = new VCFFileReader(vcfFile, true)) {
            header = vr.getFileHeader();
            dict = header.getSequenceDictionary();
        }
        if (dict == null || dict.isEmpty()) {
            throw new JvarkitException.VcfDictionaryMissing(vcfFile);
        }
        if (!this.limit_contigs.isEmpty()) {
            if (this.limit_contigs.stream().anyMatch(C -> dict.getSequence(C) == null)) {
                LOG.error("user contig missing in vcf dictionary.");
                return -1;
            }
        }
        final Pedigree pedigree;
        if (this.pedigreeFile != null) {
            pedigree = new Pedigree.Parser().parse(this.pedigreeFile);
        } else {
            pedigree = new Pedigree.Parser().parse(header);
        }
        final Set<Pedigree.Person> samples = new HashSet<>(pedigree.getPersons());
        samples.removeIf(I -> !(I.isAffected() || I.isUnaffected()) || !header.getSampleNamesInOrder().contains(I.getId()));
        this.writer = super.openFileOrStdoutAsPrintWriter(this.outputFile);
        final Consumer<SkatCallerResult> writeResult = (R) -> {
            synchronized (this.writer) {
                this.writer.println(R.toString());
            }
        };
        for (final SAMSequenceRecord ssr : dict.getSequences()) {
            if (!this.limit_contigs.isEmpty() && !this.limit_contigs.contains(ssr.getSequenceName())) {
                LOG.warning("skipping contig " + ssr.getSequenceName());
                continue;
            }
            LOG.info("contig " + ssr.getSequenceName());
            final ExecutorService executorService = new ThreadPoolExecutor(this.nJobs, this.nJobs, 0L, TimeUnit.MILLISECONDS, new LinkedBlockingDeque<>(this.nJobs));
            final List<Future<Integer>> results = new ArrayList<>(this.nJobs);
            for (int i = 0; i < this.nJobs; i++) {
                final int windowLen = Math.max(1, ssr.getSequenceLength() / this.nJobs);
                final SkatWorker caller = new SkatWorker(vcfFile, new Interval(ssr.getSequenceName(), i * windowLen, Math.min(ssr.getSequenceLength(), (i + 1) * windowLen)), samples, this.skat.build(), writeResult);
                results.add(executorService.submit(caller));
            }
            executorService.shutdown();
            executorService.awaitTermination(365, TimeUnit.DAYS);
            for (final Future<Integer> fl : results) {
                try {
                    if (fl.get() != 0) {
                        LOG.error("An error occured");
                        return -1;
                    }
                } catch (final Exception err) {
                    LOG.error(err);
                    return -1;
                }
            }
        }
        this.writer.flush();
        this.writer.close();
        this.writer = null;
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(r);
        CloserUtil.close(this.writer);
    }
}
Also used : CloseableIterator(htsjdk.samtools.util.CloseableIterator) Program(com.github.lindenb.jvarkit.util.jcommander.Program) ThreadPoolExecutor(java.util.concurrent.ThreadPoolExecutor) Parameter(com.beust.jcommander.Parameter) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VCFHeader(htsjdk.variant.vcf.VCFHeader) Callable(java.util.concurrent.Callable) ParametersDelegate(com.beust.jcommander.ParametersDelegate) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Interval(htsjdk.samtools.util.Interval) Future(java.util.concurrent.Future) Pedigree(com.github.lindenb.jvarkit.util.Pedigree) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) ExecutorService(java.util.concurrent.ExecutorService) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintWriter(java.io.PrintWriter) Logger(com.github.lindenb.jvarkit.util.log.Logger) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Set(java.util.Set) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) File(java.io.File) TimeUnit(java.util.concurrent.TimeUnit) Consumer(java.util.function.Consumer) List(java.util.List) LinkedBlockingDeque(java.util.concurrent.LinkedBlockingDeque) VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) ArrayList(java.util.ArrayList) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashSet(java.util.HashSet) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) Pedigree(com.github.lindenb.jvarkit.util.Pedigree) ExecutorService(java.util.concurrent.ExecutorService) Future(java.util.concurrent.Future) ThreadPoolExecutor(java.util.concurrent.ThreadPoolExecutor) File(java.io.File) Interval(htsjdk.samtools.util.Interval)

Example 89 with VCFHeader

use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.

the class SortVcfOnRef2 method sortvcf.

protected int sortvcf(BufferedReader in) throws IOException {
    if (this.refdict != null) {
        LOG.info("load dict from " + this.refdict);
        this.dict = SAMSequenceDictionaryExtractor.extractDictionary(this.refdict);
        if (this.dict == null) {
            LOG.error("cannot find sam sequence dictionary from " + refdict);
        }
    }
    final VCFUtils.CodecAndHeader cah = VCFUtils.parseHeader(in);
    final VCFHeader h2 = new VCFHeader(cah.header);
    if (this.dict != null) {
        h2.setSequenceDictionary(this.dict);
    } else {
        this.dict = h2.getSequenceDictionary();
        if (this.dict == null) {
            LOG.error("No internal sequence dictionay found in input");
            return -1;
        }
    }
    addMetaData(h2);
    if (this.dict.isEmpty()) {
        LOG.warn("SEQUENCE DICTIONARY IS EMPTY/NULL");
    }
    CloseableIterator<ChromPosLine> iter = null;
    SortingCollection<ChromPosLine> array = null;
    VariantContextWriter w = null;
    try {
        array = SortingCollection.newInstance(ChromPosLine.class, new VariantCodec(), new VariantComparator(), this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
        array.setDestructiveIteration(true);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(this.dict);
        String line;
        while ((line = in.readLine()) != null) {
            final ChromPosLine cpl = new ChromPosLine(line);
            progress.watch(cpl.tid, cpl.pos);
            array.add(cpl);
        }
        array.doneAdding();
        progress.finish();
        w = super.openVariantContextWriter(outputFile);
        w.writeHeader(h2);
        iter = array.iterator();
        while (iter.hasNext()) {
            w.add(cah.codec.decode(iter.next().line));
            if (w.checkError())
                break;
        }
        return RETURN_OK;
    } catch (Exception e) {
        LOG.error(e);
        return -1;
    } finally {
        CloserUtil.close(w);
        CloserUtil.close(iter);
        if (array != null)
            array.cleanup();
    }
}
Also used : SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) VCFUtils(com.github.lindenb.jvarkit.util.vcf.VCFUtils) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) IOException(java.io.IOException)

Example 90 with VCFHeader

use of htsjdk.variant.vcf.VCFHeader in project gatk by broadinstitute.

the class GenomicsDBImportIntegrationTest method testPreserveContigOrderingInHeader.

@Test
public void testPreserveContigOrderingInHeader() throws IOException {
    final String workspace = createTempDir("testPreserveContigOrderingInHeader-").getAbsolutePath() + "/workspace";
    writeToGenomicsDB(Arrays.asList(GENOMICSDB_TEST_DIR + "testHeaderContigLineSorting1.g.vcf", GENOMICSDB_TEST_DIR + "testHeaderContigLineSorting2.g.vcf"), new SimpleInterval("chr20", 17959479, 17959479), workspace, 0, false, 0);
    try (final GenomicsDBFeatureReader<VariantContext, PositionalBufferedStream> genomicsDBFeatureReader = new GenomicsDBFeatureReader<>(new File(workspace, GenomicsDBConstants.DEFAULT_VIDMAP_FILE_NAME).getAbsolutePath(), new File(workspace, GenomicsDBConstants.DEFAULT_CALLSETMAP_FILE_NAME).getAbsolutePath(), workspace, GenomicsDBConstants.DEFAULT_ARRAY_NAME, b38_reference_20_21, null, new BCF2Codec());
        final AbstractFeatureReader<VariantContext, LineIterator> inputGVCFReader = AbstractFeatureReader.getFeatureReader(GENOMICSDB_TEST_DIR + "testHeaderContigLineSorting1.g.vcf", new VCFCodec(), true)) {
        final SAMSequenceDictionary dictionaryFromGenomicsDB = ((VCFHeader) genomicsDBFeatureReader.getHeader()).getSequenceDictionary();
        final SAMSequenceDictionary dictionaryFromInputGVCF = ((VCFHeader) inputGVCFReader.getHeader()).getSequenceDictionary();
        Assert.assertEquals(dictionaryFromGenomicsDB, dictionaryFromInputGVCF, "Sequence dictionary from GenomicsDB does not match original sequence dictionary from input GVCF");
    }
}
Also used : VCFCodec(htsjdk.variant.vcf.VCFCodec) VariantContext(htsjdk.variant.variantcontext.VariantContext) LineIterator(htsjdk.tribble.readers.LineIterator) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) GenomicsDBFeatureReader(com.intel.genomicsdb.GenomicsDBFeatureReader) PositionalBufferedStream(htsjdk.tribble.readers.PositionalBufferedStream) BCF2Codec(htsjdk.variant.bcf2.BCF2Codec) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) VCFHeader(htsjdk.variant.vcf.VCFHeader) File(java.io.File) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test) CommandLineProgramTest(org.broadinstitute.hellbender.CommandLineProgramTest)

Aggregations

VCFHeader (htsjdk.variant.vcf.VCFHeader)182 VariantContext (htsjdk.variant.variantcontext.VariantContext)113 File (java.io.File)93 ArrayList (java.util.ArrayList)79 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)73 VCFHeaderLine (htsjdk.variant.vcf.VCFHeaderLine)64 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)63 HashSet (java.util.HashSet)60 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)58 IOException (java.io.IOException)55 VCFInfoHeaderLine (htsjdk.variant.vcf.VCFInfoHeaderLine)52 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)49 Genotype (htsjdk.variant.variantcontext.Genotype)48 Allele (htsjdk.variant.variantcontext.Allele)47 VCFFileReader (htsjdk.variant.vcf.VCFFileReader)47 List (java.util.List)44 Set (java.util.Set)38 VcfIterator (com.github.lindenb.jvarkit.util.vcf.VcfIterator)36 CloserUtil (htsjdk.samtools.util.CloserUtil)35 Collectors (java.util.stream.Collectors)34