Search in sources :

Example 1 with VcfList

use of com.github.lindenb.jvarkit.tools.vcflist.VcfList in project jvarkit by lindenb.

the class VcfEpistatis01 method doWork.

@Override
public int doWork(final List<String> args) {
    if (this.number_of_jobs < 1) {
        LOG.error("bad number of jobs");
        return -1;
    }
    try {
        final int variantsCount;
        final List<VariantContext> inMemoryVariants;
        final File vcfFile = new File(oneAndOnlyOneFile(args));
        final File tmpIndexFile;
        if (vcfFile.equals(this.outputFile)) {
            LOG.error("input == output");
            return -1;
        }
        VCFFileReader vcfFileReader = new VCFFileReader(vcfFile, false);
        final VCFHeader header = vcfFileReader.getFileHeader();
        final Pedigree pedigree;
        if (this.pedigreeFile != null) {
            pedigree = new Pedigree.Parser().parse(this.pedigreeFile);
        } else {
            pedigree = new Pedigree.Parser().parse(header);
        }
        pedigree.verifyPersonsHaveUniqueNames();
        final Map<String, Integer> sample2index = header.getSampleNameToOffset();
        final int[] caseIndexes = pedigree.getAffected().stream().filter(P -> sample2index.containsKey(P.getId())).mapToInt(P -> sample2index.get(P.getId())).sorted().toArray();
        final int[] ctrlIndexes = new ArrayList<>(pedigree.getUnaffected()).stream().filter(P -> sample2index.containsKey(P.getId())).mapToInt(P -> sample2index.get(P.getId())).sorted().toArray();
        if (caseIndexes.length == 0 || ctrlIndexes.length == 0) {
            LOG.error("empty ped or no case/ctrl");
            vcfFileReader.close();
            return -1;
        }
        if (this.load_variants_in_memory) {
            LOG.info("loading variants in memory");
            tmpIndexFile = null;
            final CloseableIterator<VariantContext> iter2 = vcfFileReader.iterator();
            inMemoryVariants = Collections.unmodifiableList(iter2.stream().filter(this.variantFilter).filter(// should fix https://github.com/samtools/htsjdk/issues/1026 ?
            V -> V.getGenotypes().stream().filter(G -> G.isCalled()).count() > 0).collect(Collectors.toList()));
            variantsCount = inMemoryVariants.size();
            iter2.close();
        } else {
            tmpIndexFile = File.createTempFile("epistatsis", VcfOffsetsIndexFactory.INDEX_EXTENSION);
            tmpIndexFile.deleteOnExit();
            new VcfOffsetsIndexFactory().setLogger(LOG).setPredicate(variantFilter).indexVcfFile(vcfFile, tmpIndexFile);
            final VcfList tmpList = VcfList.fromFile(vcfFile, tmpIndexFile);
            variantsCount = tmpList.size();
            tmpList.close();
            inMemoryVariants = null;
        }
        vcfFileReader.close();
        LOG.info("Number of variants: " + variantsCount);
        Result bestResult = null;
        int x = this.start_index_at;
        while (x + 1 < variantsCount) {
            final List<Runner> runners = new Vector<>(this.number_of_jobs);
            while (x + 1 < variantsCount && runners.size() < this.number_of_jobs) {
                LOG.info("starting " + x + "/" + variantsCount);
                runners.add(new Runner(inMemoryVariants == null ? VcfList.fromFile(vcfFile, tmpIndexFile) : new Vector<>(inMemoryVariants), x, caseIndexes, ctrlIndexes));
                ++x;
            }
            final ExecutorService execSvc;
            if (this.number_of_jobs == 1) {
                execSvc = null;
            } else {
                execSvc = Executors.newFixedThreadPool(this.number_of_jobs);
                ;
            }
            if (this.number_of_jobs == 1) {
                runners.get(0).call();
            } else {
                execSvc.invokeAll(runners);
            }
            if (execSvc != null) {
                execSvc.shutdown();
                execSvc.awaitTermination(10000L, TimeUnit.DAYS);
                execSvc.shutdownNow();
            }
            runners.stream().mapToLong(R -> R.duration).min().ifPresent(D -> {
                LOG.info("That took " + (D / 1000f) + " seconds.");
            });
            for (final Runner r : runners) {
                final Result rez = r.result;
                if (rez == null)
                    continue;
                if (bestResult == null || bestResult.score < rez.score) {
                    bestResult = rez;
                    if (this.output_score) {
                        final PrintWriter pw = super.openFileOrStdoutAsPrintWriter(this.outputFile);
                        pw.println(bestResult.score + "\t" + bestResult.toString());
                        pw.flush();
                        pw.close();
                    } else {
                        final VariantContextWriter w = openVariantContextWriter(this.outputFile);
                        final VCFHeader header2 = new VCFHeader(header);
                        header2.addMetaDataLine(new VCFHeaderLine(VcfEpistatis01.class.getName(), bestResult.toString()));
                        w.writeHeader(header2);
                        w.add(bestResult.ctx1);
                        w.add(bestResult.ctx2);
                        w.close();
                    }
                }
            }
            LOG.info("best: " + bestResult);
        }
        if (tmpIndexFile != null)
            tmpIndexFile.delete();
        return 0;
    } catch (final Exception err) {
        err.printStackTrace();
        LOG.error(err);
        return -1;
    } finally {
    }
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) CloseableIterator(htsjdk.samtools.util.CloseableIterator) Allele(htsjdk.variant.variantcontext.Allele) JexlVariantPredicate(com.github.lindenb.jvarkit.util.vcf.JexlVariantPredicate) Parameter(com.beust.jcommander.Parameter) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VCFHeader(htsjdk.variant.vcf.VCFHeader) Callable(java.util.concurrent.Callable) Function(java.util.function.Function) ArrayList(java.util.ArrayList) Vector(java.util.Vector) Pedigree(com.github.lindenb.jvarkit.util.Pedigree) Map(java.util.Map) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) ExecutorService(java.util.concurrent.ExecutorService) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintWriter(java.io.PrintWriter) Predicate(java.util.function.Predicate) Logger(com.github.lindenb.jvarkit.util.log.Logger) Collectors(java.util.stream.Collectors) File(java.io.File) Executors(java.util.concurrent.Executors) VcfOffsetsIndexFactory(com.github.lindenb.jvarkit.tools.vcflist.VcfOffsetsIndexFactory) TimeUnit(java.util.concurrent.TimeUnit) List(java.util.List) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VariantContext(htsjdk.variant.variantcontext.VariantContext) Collections(java.util.Collections) VcfList(com.github.lindenb.jvarkit.tools.vcflist.VcfList) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VariantContext(htsjdk.variant.variantcontext.VariantContext) VcfOffsetsIndexFactory(com.github.lindenb.jvarkit.tools.vcflist.VcfOffsetsIndexFactory) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) Vector(java.util.Vector) PrintWriter(java.io.PrintWriter) Pedigree(com.github.lindenb.jvarkit.util.Pedigree) ExecutorService(java.util.concurrent.ExecutorService) File(java.io.File) VcfList(com.github.lindenb.jvarkit.tools.vcflist.VcfList)

Example 2 with VcfList

use of com.github.lindenb.jvarkit.tools.vcflist.VcfList in project jvarkit by lindenb.

the class TestNg01 method testVcfFileOffsets.

@Test(dataProvider = "all_vcfs")
public void testVcfFileOffsets(final String path) throws IOException {
    final File vcfFile = new File(path);
    VcfOffsetsIndexFactory factory = new VcfOffsetsIndexFactory();
    final File indexFile = factory.indexVcfFile(vcfFile);
    Assert.assertTrue(indexFile.exists());
    final VcfList list = VcfList.fromFile(vcfFile);
    final List<Integer> positions = streamVcf(vcfFile).map(CTX -> CTX.getStart()).collect(Collectors.toList());
    Assert.assertEquals(positions.size(), list.size());
    Assert.assertNotNull(list.getHeader());
    for (int i = list.size() - 1; i >= 0; --i) {
        Assert.assertNotNull(list.get(i));
        Assert.assertEquals(list.get(i).getStart(), positions.get(i).intValue());
    }
    Assert.assertEquals((long) list.size(), streamVcf(vcfFile).count());
    list.close();
    Assert.assertTrue(indexFile.delete());
}
Also used : Arrays(java.util.Arrays) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VCFHeader(htsjdk.variant.vcf.VCFHeader) VcfFilterJdk(com.github.lindenb.jvarkit.tools.vcffilterjs.VcfFilterJdk) Test(org.testng.annotations.Test) VcfBurdenFisherH(com.github.lindenb.jvarkit.tools.burden.VcfBurdenFisherH) Vcf2Xml(com.github.lindenb.jvarkit.tools.vcf2xml.Vcf2Xml) VcfInjectPedigree(com.github.lindenb.jvarkit.tools.burden.VcfInjectPedigree) VcfToTable(com.github.lindenb.jvarkit.tools.misc.VcfToTable) ReferenceGenomeFactory(com.github.lindenb.jvarkit.util.bio.fasta.ReferenceGenomeFactory) VcfMultiToOneAllele(com.github.lindenb.jvarkit.tools.misc.VcfMultiToOneAllele) SAXParser(javax.xml.parsers.SAXParser) VcfNoCallToHomRef(com.github.lindenb.jvarkit.tools.misc.VcfNoCallToHomRef) VcfBurdenFisherV(com.github.lindenb.jvarkit.tools.burden.VcfBurdenFisherV) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintWriter(java.io.PrintWriter) VcfBurdenFilterGenes(com.github.lindenb.jvarkit.tools.burden.VcfBurdenFilterGenes) Set(java.util.Set) PadEmptyFastq(com.github.lindenb.jvarkit.tools.misc.PadEmptyFastq) VcfMultiToOne(com.github.lindenb.jvarkit.tools.onesamplevcf.VcfMultiToOne) VcfOffsetsIndexFactory(com.github.lindenb.jvarkit.tools.vcflist.VcfOffsetsIndexFactory) Stream(java.util.stream.Stream) VcfBurdenFilterExac(com.github.lindenb.jvarkit.tools.burden.VcfBurdenFilterExac) VcfMoveFiltersToInfo(com.github.lindenb.jvarkit.tools.burden.VcfMoveFiltersToInfo) CloseableIterator(htsjdk.samtools.util.CloseableIterator) Bam2Raster(com.github.lindenb.jvarkit.tools.bam2graphics.Bam2Raster) SortVcfOnInfo(com.github.lindenb.jvarkit.tools.sortvcfonref.SortVcfOnInfo) TrapIndexer(com.github.lindenb.jvarkit.tools.trap.TrapIndexer) IterableAdapter(htsjdk.samtools.util.IterableAdapter) VcfTrap(com.github.lindenb.jvarkit.tools.trap.VcfTrap) ArrayList(java.util.ArrayList) FixVcfMissingGenotypes(com.github.lindenb.jvarkit.tools.misc.FixVcfMissingGenotypes) Assert(org.testng.Assert) SequenceOntologyTree(com.github.lindenb.jvarkit.util.so.SequenceOntologyTree) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) VcfRebase(com.github.lindenb.jvarkit.tools.vcfrebase.VcfRebase) StreamSupport(java.util.stream.StreamSupport) Properties(java.util.Properties) Files(java.nio.file.Files) VCFBigWig(com.github.lindenb.jvarkit.tools.vcfbigwig.VCFBigWig) VcfGnomad(com.github.lindenb.jvarkit.tools.gnomad.VcfGnomad) IOException(java.io.IOException) File(java.io.File) DefaultHandler(org.xml.sax.helpers.DefaultHandler) VcfSetSequenceDictionary(com.github.lindenb.jvarkit.tools.misc.VcfSetSequenceDictionary) VcfCreateDictionary(com.github.lindenb.jvarkit.tools.misc.VcfCreateDictionary) NgsFilesSummary(com.github.lindenb.jvarkit.tools.ngsfiles.NgsFilesSummary) VcfBurdenRscriptV(com.github.lindenb.jvarkit.tools.burden.VcfBurdenRscriptV) AnnPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParserFactory) BufferedReader(java.io.BufferedReader) GroupByGene(com.github.lindenb.jvarkit.tools.groupbygene.GroupByGene) VCFFamilies(com.github.lindenb.jvarkit.tools.vcftrios.VCFFamilies) ReferenceGenome(com.github.lindenb.jvarkit.util.bio.fasta.ReferenceGenome) Algorithms(com.github.lindenb.jvarkit.util.Algorithms) Biostar84452(com.github.lindenb.jvarkit.tools.biostar.Biostar84452) VCFTrios(com.github.lindenb.jvarkit.tools.vcftrios.VCFTrios) URL(java.net.URL) LowResBam2Raster(com.github.lindenb.jvarkit.tools.bam2graphics.LowResBam2Raster) VCFBed(com.github.lindenb.jvarkit.tools.vcfbed.VCFBed) Random(java.util.Random) VcfToSql(com.github.lindenb.jvarkit.tools.vcf2sql.VcfToSql) MiniCaller(com.github.lindenb.jvarkit.tools.calling.MiniCaller) GoUtils(com.github.lindenb.jvarkit.tools.misc.GoUtils) FindAVariation(com.github.lindenb.jvarkit.tools.misc.FindAVariation) VcfXmlAmalgamation(com.github.lindenb.jvarkit.tools.vcfamalgation.VcfXmlAmalgamation) ImageIO(javax.imageio.ImageIO) BamToSql(com.github.lindenb.jvarkit.tools.misc.BamToSql) Gff2KnownGene(com.github.lindenb.jvarkit.tools.misc.Gff2KnownGene) VCFFixIndels(com.github.lindenb.jvarkit.tools.vcffixindels.VCFFixIndels) VCFStripAnnotations(com.github.lindenb.jvarkit.tools.vcfstripannot.VCFStripAnnotations) BufferedImage(java.awt.image.BufferedImage) Predicate(java.util.function.Predicate) BeforeClass(org.testng.annotations.BeforeClass) Collectors(java.util.stream.Collectors) IlluminaReadName(com.github.lindenb.jvarkit.tools.misc.IlluminaReadName) List(java.util.List) BackLocate(com.github.lindenb.jvarkit.tools.backlocate.BackLocate) VcfToSvg(com.github.lindenb.jvarkit.tools.misc.VcfToSvg) VariantContext(htsjdk.variant.variantcontext.VariantContext) Pattern(java.util.regex.Pattern) FastaSequenceReader(com.github.lindenb.jvarkit.util.bio.fasta.FastaSequenceReader) VcfList(com.github.lindenb.jvarkit.tools.vcflist.VcfList) SamReaderFactory(htsjdk.samtools.SamReaderFactory) VcfFilterNotInPedigree(com.github.lindenb.jvarkit.tools.burden.VcfFilterNotInPedigree) VcfFilterSequenceOntology(com.github.lindenb.jvarkit.tools.vcffilterso.VcfFilterSequenceOntology) FindAllCoverageAtPosition(com.github.lindenb.jvarkit.tools.misc.FindAllCoverageAtPosition) VcfToRdf(com.github.lindenb.jvarkit.tools.vcf2rdf.VcfToRdf) DataProvider(org.testng.annotations.DataProvider) AnnPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParser) SAXParserFactory(javax.xml.parsers.SAXParserFactory) VcfLoopOverGenes(com.github.lindenb.jvarkit.tools.burden.VcfLoopOverGenes) VcfToHilbert(com.github.lindenb.jvarkit.tools.hilbert.VcfToHilbert) VcfStats(com.github.lindenb.jvarkit.tools.vcfstats.VcfStats) VcfRemoveUnusedAlt(com.github.lindenb.jvarkit.tools.misc.VcfRemoveUnusedAlt) FileInputStream(java.io.FileInputStream) CaseControlCanvas(com.github.lindenb.jvarkit.tools.burden.CaseControlCanvas) VcfCompareCallers(com.github.lindenb.jvarkit.tools.vcfcmp.VcfCompareCallers) ReferenceContig(com.github.lindenb.jvarkit.util.bio.fasta.ReferenceContig) SamReader(htsjdk.samtools.SamReader) FastqShuffle(com.github.lindenb.jvarkit.tools.fastq.FastqShuffle) BamTile(com.github.lindenb.jvarkit.tools.misc.BamTile) VcfBurdenMAF(com.github.lindenb.jvarkit.tools.burden.VcfBurdenMAF) FastaSequence(com.github.lindenb.jvarkit.util.bio.fasta.FastaSequence) ConcatSam(com.github.lindenb.jvarkit.tools.misc.ConcatSam) Bam2Wig(com.github.lindenb.jvarkit.tools.bam2wig.Bam2Wig) FileReader(java.io.FileReader) File(java.io.File) VcfOffsetsIndexFactory(com.github.lindenb.jvarkit.tools.vcflist.VcfOffsetsIndexFactory) VcfList(com.github.lindenb.jvarkit.tools.vcflist.VcfList) Test(org.testng.annotations.Test)

Aggregations

Parameter (com.beust.jcommander.Parameter)1 IOUtils (com.github.lindenb.jvarkit.io.IOUtils)1 BackLocate (com.github.lindenb.jvarkit.tools.backlocate.BackLocate)1 Bam2Raster (com.github.lindenb.jvarkit.tools.bam2graphics.Bam2Raster)1 LowResBam2Raster (com.github.lindenb.jvarkit.tools.bam2graphics.LowResBam2Raster)1 Bam2Wig (com.github.lindenb.jvarkit.tools.bam2wig.Bam2Wig)1 Biostar84452 (com.github.lindenb.jvarkit.tools.biostar.Biostar84452)1 CaseControlCanvas (com.github.lindenb.jvarkit.tools.burden.CaseControlCanvas)1 VcfBurdenFilterExac (com.github.lindenb.jvarkit.tools.burden.VcfBurdenFilterExac)1 VcfBurdenFilterGenes (com.github.lindenb.jvarkit.tools.burden.VcfBurdenFilterGenes)1 VcfBurdenFisherH (com.github.lindenb.jvarkit.tools.burden.VcfBurdenFisherH)1 VcfBurdenFisherV (com.github.lindenb.jvarkit.tools.burden.VcfBurdenFisherV)1 VcfBurdenMAF (com.github.lindenb.jvarkit.tools.burden.VcfBurdenMAF)1 VcfBurdenRscriptV (com.github.lindenb.jvarkit.tools.burden.VcfBurdenRscriptV)1 VcfFilterNotInPedigree (com.github.lindenb.jvarkit.tools.burden.VcfFilterNotInPedigree)1 VcfInjectPedigree (com.github.lindenb.jvarkit.tools.burden.VcfInjectPedigree)1 VcfLoopOverGenes (com.github.lindenb.jvarkit.tools.burden.VcfLoopOverGenes)1 VcfMoveFiltersToInfo (com.github.lindenb.jvarkit.tools.burden.VcfMoveFiltersToInfo)1 MiniCaller (com.github.lindenb.jvarkit.tools.calling.MiniCaller)1 FastqShuffle (com.github.lindenb.jvarkit.tools.fastq.FastqShuffle)1