Search in sources :

Example 11 with CloseableIterator

use of htsjdk.samtools.util.CloseableIterator in project jvarkit by lindenb.

the class AbstractBamSplitter method processInput.

@Override
protected int processInput(final SAMFileHeader header, final CloseableIterator<SAMRecord> iter) {
    if (beforeIterator(header) != 0) {
        getLogger().error("initialisation failed.");
        return -1;
    }
    this.writingBamArgs.setReferencePath(super.faidxPath);
    final Set<String> samples = header.getReadGroups().stream().map(RG -> RG.getSample()).filter(S -> !StringUtils.isBlank(S)).collect(Collectors.toSet());
    final String samplePrefix;
    if (samples.size() == 1) {
        samplePrefix = "." + samples.iterator().next();
    } else {
        samplePrefix = "";
    }
    final Map<T, SAMCount> tag2sw = new HashMap<>();
    try (PrintWriter mOut = this.manifestFile == null ? new PrintWriter(new NullOuputStream()) : IOUtils.openPathForPrintWriter(this.manifestFile)) {
        long n_lost = 0L;
        while (iter.hasNext()) {
            final SAMRecord rec = iter.next();
            final Set<T> keys = createKeys(rec);
            if (keys == null || keys.isEmpty()) {
                n_lost++;
                continue;
            }
            for (final T id : keys) {
                SAMCount sc = tag2sw.get(id);
                if (sc == null) {
                    getLogger().info("Creating output for \"" + id + "\" N=" + (tag2sw.size() + 1));
                    final Path path = this.outputDir.resolve(this.prefix + samplePrefix + String.format(".%06d", (tag2sw.size() + 1)) + this.writingBamArgs.getFileExtension());
                    if (Files.exists(path) && !force_overwrite) {
                        getLogger().error("file exists : " + path + ". Use --force to overwrite");
                        return -1;
                    }
                    final SAMFileHeader h2 = header.clone();
                    h2.addComment(getProgramName() + " : " + id.toString());
                    JVarkitVersion.getInstance().addMetaData(this, h2);
                    sc = new SAMCount(this.writingBamArgs.openSamWriter(path, h2, true));
                    sc.path = path;
                    tag2sw.put(id, sc);
                }
                sc.sfw.addAlignment(rec);
                sc.count++;
            }
        }
        if (n_lost > 0)
            getLogger().warn(StringUtils.niceInt(n_lost) + " read(s) where lost because no group was assigned.");
        mOut.println("#KEY\tPATH\tCOUNT");
        for (final T id : tag2sw.keySet()) {
            final SAMCount sc = tag2sw.get(id);
            mOut.print(id.toString());
            mOut.print("\t");
            mOut.print(sc.path);
            mOut.print("\t");
            mOut.print(sc.count);
            mOut.println();
        }
        mOut.flush();
        return 0;
    } catch (final Throwable err) {
        getLogger().error(err);
        return -1;
    } finally {
        for (SAMCount sw : tag2sw.values()) {
            sw.sfw.close();
        }
    }
}
Also used : CloseableIterator(htsjdk.samtools.util.CloseableIterator) Arrays(java.util.Arrays) WritingSamReaderType(com.github.lindenb.jvarkit.util.jcommander.Launcher.WritingSamReaderType) Parameter(com.beust.jcommander.Parameter) NullOuputStream(com.github.lindenb.jvarkit.io.NullOuputStream) IOUtil(htsjdk.samtools.util.IOUtil) HashMap(java.util.HashMap) SAMFileHeader(htsjdk.samtools.SAMFileHeader) ParametersDelegate(com.beust.jcommander.ParametersDelegate) Matcher(java.util.regex.Matcher) Map(java.util.Map) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Path(java.nio.file.Path) PrintWriter(java.io.PrintWriter) Files(java.nio.file.Files) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) SAMFileWriter(htsjdk.samtools.SAMFileWriter) Collectors(java.util.stream.Collectors) File(java.io.File) SAMRecord(htsjdk.samtools.SAMRecord) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) WritingBamArgs(com.github.lindenb.jvarkit.util.jcommander.Launcher.WritingBamArgs) Pattern(java.util.regex.Pattern) Path(java.nio.file.Path) HashMap(java.util.HashMap) SAMRecord(htsjdk.samtools.SAMRecord) NullOuputStream(com.github.lindenb.jvarkit.io.NullOuputStream) SAMFileHeader(htsjdk.samtools.SAMFileHeader) PrintWriter(java.io.PrintWriter)

Example 12 with CloseableIterator

use of htsjdk.samtools.util.CloseableIterator in project jvarkit by lindenb.

the class VcfEpistatis01 method doWork.

@Override
public int doWork(final List<String> args) {
    if (this.number_of_jobs < 1) {
        LOG.error("bad number of jobs");
        return -1;
    }
    try {
        final int variantsCount;
        final List<VariantContext> inMemoryVariants;
        final File vcfFile = new File(oneAndOnlyOneFile(args));
        final File tmpIndexFile;
        if (vcfFile.equals(this.outputFile)) {
            LOG.error("input == output");
            return -1;
        }
        VCFReader vcfFileReader = VCFReaderFactory.makeDefault().open(vcfFile.toPath(), false);
        final VCFHeader header = vcfFileReader.getHeader();
        final Pedigree pedigree;
        if (this.pedigreeFile != null) {
            pedigree = new Pedigree.Parser().parse(this.pedigreeFile);
        } else {
            pedigree = new Pedigree.Parser().parse(header);
        }
        pedigree.verifyPersonsHaveUniqueNames();
        final Map<String, Integer> sample2index = header.getSampleNameToOffset();
        final int[] caseIndexes = pedigree.getAffected().stream().filter(P -> sample2index.containsKey(P.getId())).mapToInt(P -> sample2index.get(P.getId())).sorted().toArray();
        final int[] ctrlIndexes = new ArrayList<>(pedigree.getUnaffected()).stream().filter(P -> sample2index.containsKey(P.getId())).mapToInt(P -> sample2index.get(P.getId())).sorted().toArray();
        if (caseIndexes.length == 0 || ctrlIndexes.length == 0) {
            LOG.error("empty ped or no case/ctrl");
            vcfFileReader.close();
            return -1;
        }
        if (this.load_variants_in_memory) {
            LOG.info("loading variants in memory");
            tmpIndexFile = null;
            final CloseableIterator<VariantContext> iter2 = vcfFileReader.iterator();
            inMemoryVariants = Collections.unmodifiableList(iter2.stream().filter(this.variantFilter).filter(// should fix https://github.com/samtools/htsjdk/issues/1026 ?
            V -> V.getGenotypes().stream().filter(G -> G.isCalled()).count() > 0).collect(Collectors.toList()));
            variantsCount = inMemoryVariants.size();
            iter2.close();
        } else {
            tmpIndexFile = File.createTempFile("epistatsis", VcfOffsetsIndexFactory.INDEX_EXTENSION);
            tmpIndexFile.deleteOnExit();
            new VcfOffsetsIndexFactory().setLogger(LOG).setPredicate(variantFilter).indexVcfFile(vcfFile, tmpIndexFile);
            final VcfList tmpList = VcfList.fromFile(vcfFile, tmpIndexFile);
            variantsCount = tmpList.size();
            tmpList.close();
            inMemoryVariants = null;
        }
        vcfFileReader.close();
        LOG.info("Number of variants: " + variantsCount);
        Result bestResult = null;
        int x = this.start_index_at;
        while (x + 1 < variantsCount) {
            final List<Runner> runners = new Vector<>(this.number_of_jobs);
            while (x + 1 < variantsCount && runners.size() < this.number_of_jobs) {
                LOG.info("starting " + x + "/" + variantsCount);
                runners.add(new Runner(inMemoryVariants == null ? VcfList.fromFile(vcfFile, tmpIndexFile) : new Vector<>(inMemoryVariants), x, caseIndexes, ctrlIndexes));
                ++x;
            }
            final ExecutorService execSvc;
            if (this.number_of_jobs == 1) {
                execSvc = null;
            } else {
                execSvc = Executors.newFixedThreadPool(this.number_of_jobs);
                ;
            }
            if (this.number_of_jobs == 1) {
                runners.get(0).call();
            } else {
                execSvc.invokeAll(runners);
            }
            if (execSvc != null) {
                execSvc.shutdown();
                execSvc.awaitTermination(10000L, TimeUnit.DAYS);
                execSvc.shutdownNow();
            }
            runners.stream().mapToLong(R -> R.duration).min().ifPresent(D -> {
                LOG.info("That took " + (D / 1000f) + " seconds.");
            });
            for (final Runner r : runners) {
                final Result rez = r.result;
                if (rez == null)
                    continue;
                if (bestResult == null || bestResult.score < rez.score) {
                    bestResult = rez;
                    if (this.output_score) {
                        final PrintWriter pw = super.openFileOrStdoutAsPrintWriter(this.outputFile);
                        pw.println(bestResult.score + "\t" + bestResult.toString());
                        pw.flush();
                        pw.close();
                    } else {
                        final VariantContextWriter w = openVariantContextWriter(this.outputFile);
                        final VCFHeader header2 = new VCFHeader(header);
                        header2.addMetaDataLine(new VCFHeaderLine(VcfEpistatis01.class.getName(), bestResult.toString()));
                        w.writeHeader(header2);
                        w.add(bestResult.ctx1);
                        w.add(bestResult.ctx2);
                        w.close();
                    }
                }
            }
            LOG.info("best: " + bestResult);
        }
        if (tmpIndexFile != null)
            tmpIndexFile.delete();
        return 0;
    } catch (final Exception err) {
        err.printStackTrace();
        LOG.error(err);
        return -1;
    } finally {
    }
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) CloseableIterator(htsjdk.samtools.util.CloseableIterator) Allele(htsjdk.variant.variantcontext.Allele) JexlVariantPredicate(com.github.lindenb.jvarkit.util.vcf.JexlVariantPredicate) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) Callable(java.util.concurrent.Callable) Function(java.util.function.Function) ArrayList(java.util.ArrayList) Vector(java.util.Vector) Pedigree(com.github.lindenb.jvarkit.util.Pedigree) Map(java.util.Map) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VCFReaderFactory(com.github.lindenb.jvarkit.variant.vcf.VCFReaderFactory) ExecutorService(java.util.concurrent.ExecutorService) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintWriter(java.io.PrintWriter) Predicate(java.util.function.Predicate) Logger(com.github.lindenb.jvarkit.util.log.Logger) VCFReader(htsjdk.variant.vcf.VCFReader) Collectors(java.util.stream.Collectors) File(java.io.File) Executors(java.util.concurrent.Executors) VcfOffsetsIndexFactory(com.github.lindenb.jvarkit.tools.vcflist.VcfOffsetsIndexFactory) TimeUnit(java.util.concurrent.TimeUnit) List(java.util.List) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VariantContext(htsjdk.variant.variantcontext.VariantContext) Collections(java.util.Collections) VcfList(com.github.lindenb.jvarkit.tools.vcflist.VcfList) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) VariantContext(htsjdk.variant.variantcontext.VariantContext) VcfOffsetsIndexFactory(com.github.lindenb.jvarkit.tools.vcflist.VcfOffsetsIndexFactory) VCFReader(htsjdk.variant.vcf.VCFReader) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) Vector(java.util.Vector) PrintWriter(java.io.PrintWriter) Pedigree(com.github.lindenb.jvarkit.util.Pedigree) ExecutorService(java.util.concurrent.ExecutorService) File(java.io.File) VcfList(com.github.lindenb.jvarkit.tools.vcflist.VcfList)

Example 13 with CloseableIterator

use of htsjdk.samtools.util.CloseableIterator in project jvarkit by lindenb.

the class VcfGeneEpistasis method doWork.

@Override
public int doWork(final List<String> args) {
    if (this.geneBed == null) {
        LOG.error("gene file bed undefined");
        return -1;
    }
    if (this.outputFile == null) {
        LOG.error("output file undefined");
        return -1;
    }
    CloseableIterator<VariantContext> iter = null;
    try {
        final File vcfFile = new File(oneAndOnlyOneFile(args));
        this.vcfFileReader = VCFReaderFactory.makeDefault().open(vcfFile.toPath(), true);
        final VCFHeader header = this.vcfFileReader.getHeader();
        final Pedigree pedigree;
        if (this.pedigreeFile != null) {
            pedigree = new Pedigree.Parser().parse(this.pedigreeFile);
        } else {
            pedigree = new Pedigree.Parser().parse(header);
        }
        if (pedigree == null || pedigree.isEmpty() || !pedigree.hasAffected() || !pedigree.hasUnaffected()) {
            LOG.error("empty ped or no case/ctrl");
            return -1;
        }
        pedigree.verifyPersonsHaveUniqueNames();
        for (final Pedigree.Person p : pedigree.getPersons().stream().filter(P -> P.isAffected() || P.isUnaffected()).filter(P -> header.getSampleNamesInOrder().contains(P.getId())).collect(Collectors.toSet())) {
            this.id2samples.put(p.getId(), p);
        }
        this.vcfTools = new VcfTools(header);
        List<Interval> geneList;
        if (!this.geneBed.exists()) {
            final Map<String, Interval> gene2interval = new HashMap<>(50000);
            LOG.info("building gene file" + this.geneBed);
            iter = this.vcfFileReader.iterator();
            // iter = this.vcfFileReader.query("chr3",1,300_000_000);
            final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header).logger(LOG);
            while (iter.hasNext()) {
                final VariantContext ctx = progress.watch(iter.next());
                if (!accept(ctx))
                    continue;
                for (final String geneName : getGenes(ctx)) {
                    final Interval old = gene2interval.get(geneName);
                    if (old == null) {
                        gene2interval.put(geneName, new Interval(ctx.getContig(), ctx.getStart(), ctx.getEnd(), false, geneName));
                        LOG.info("adding " + geneName + ". number of genes: " + gene2interval.size());
                    } else if (!old.getContig().equals(ctx.getContig())) {
                        LOG.error("boum :" + geneName + ": on chrom " + ctx.getContig() + " vs " + old);
                        return -1;
                    } else {
                        gene2interval.put(geneName, new Interval(ctx.getContig(), Math.min(ctx.getStart(), old.getStart()), Math.max(ctx.getEnd(), old.getEnd()), false, geneName));
                    }
                }
            }
            iter.close();
            iter = null;
            progress.finish();
            geneList = new ArrayList<>(gene2interval.values());
            PrintWriter pw = new PrintWriter(this.geneBed);
            for (final Interval g : geneList) {
                pw.println(g.getContig() + "\t" + (g.getStart() - 1) + "\t" + (g.getEnd()) + "\t" + g.getName());
            }
            pw.flush();
            pw.close();
            pw = null;
        } else {
            try (BedLineReader r = new BedLineReader(geneBed)) {
                geneList = r.stream().filter(B -> B != null).map(B -> new Interval(B.getContig(), B.getStart(), B.getEnd(), true, B.get(3))).collect(Collectors.toList());
            }
        }
        if (geneList.isEmpty()) {
            LOG.error("gene List is empty");
            return -1;
        }
        final Comparator<VariantContext> ctxSorter = VCFUtils.createTidPosRefComparator(header.getSequenceDictionary());
        final Function<Interval, List<VariantContext>> loadVariants = (R) -> {
            List<VariantContext> L = new ArrayList<>();
            CloseableIterator<VariantContext> r = this.vcfFileReader.query(R.getContig(), R.getStart(), R.getEnd());
            while (r.hasNext()) {
                final VariantContext ctx = r.next();
                if (!accept(ctx))
                    continue;
                if (!getGenes(ctx).contains(R.getName()))
                    continue;
                L.add(ctx);
            }
            r.close();
            return L;
        };
        final SkatExecutor executor = this.skatFactory.build();
        Double bestSkat = null;
        LOG.info("number of genes : " + geneList.size());
        final int list_end_index = (this.user_end_index < 0 ? geneList.size() : Math.min(geneList.size(), this.user_end_index));
        for (int x = this.user_begin_index; x < list_end_index; ++x) {
            final Interval intervalx = geneList.get(x);
            final List<VariantContext> variantsx = loadVariants.apply(intervalx);
            if (variantsx.isEmpty())
                continue;
            for (int y = x; y < geneList.size(); /* pas list_end_index */
            ++y) {
                final Interval intervaly;
                final List<VariantContext> merge;
                if (y == x) {
                    // we-re testing gene 1 only
                    intervaly = intervalx;
                    merge = variantsx;
                } else {
                    intervaly = geneList.get(y);
                    if (intervaly.intersects(intervalx))
                        continue;
                    final List<VariantContext> variantsy = loadVariants.apply(intervaly);
                    if (variantsy.isEmpty())
                        continue;
                    merge = new MergedList<>(variantsx, variantsy);
                }
                LOG.info("testing : [" + x + "]" + intervalx + " [" + y + "]" + intervaly + " N:" + geneList.size() + " best: " + bestSkat);
                final Double skat = eval(executor, merge);
                if (skat == null)
                    continue;
                if (bestSkat == null || skat.compareTo(bestSkat) < 0) {
                    bestSkat = skat;
                    LOG.info("best " + bestSkat + " " + intervalx + " " + intervaly);
                    if (this.outputFile.getName().endsWith(".vcf") || this.outputFile.getName().endsWith(".vcf.gz")) {
                        final VCFHeader header2 = new VCFHeader(header);
                        header2.addMetaDataLine(new VCFHeaderLine(VcfGeneEpistasis.class.getName(), intervalx.getName() + " " + intervaly.getName() + " " + bestSkat));
                        final VariantContextWriter w = VCFUtils.createVariantContextWriter(outputFile);
                        w.writeHeader(header2);
                        merge.stream().sorted(ctxSorter).forEach(V -> w.add(V));
                        w.close();
                    } else {
                        final PrintWriter w = super.openFileOrStdoutAsPrintWriter(outputFile);
                        w.println(String.valueOf(bestSkat) + "\t" + intervalx.getName() + "\t" + intervaly.getName());
                        w.flush();
                        w.close();
                    }
                }
            }
        }
        return 0;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(iter);
        CloserUtil.close(this.vcfFileReader);
    }
}
Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) CloseableIterator(htsjdk.samtools.util.CloseableIterator) VCFUtils(com.github.lindenb.jvarkit.util.vcf.VCFUtils) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) AbstractList(java.util.AbstractList) HashMap(java.util.HashMap) Function(java.util.function.Function) ParametersDelegate(com.beust.jcommander.ParametersDelegate) ArrayList(java.util.ArrayList) BedLineReader(com.github.lindenb.jvarkit.bed.BedLineReader) Interval(htsjdk.samtools.util.Interval) SkatResult(com.github.lindenb.jvarkit.tools.skat.SkatFactory.SkatResult) StringUtil(htsjdk.samtools.util.StringUtil) SkatExecutor(com.github.lindenb.jvarkit.tools.skat.SkatFactory.SkatExecutor) Pedigree(com.github.lindenb.jvarkit.util.Pedigree) Map(java.util.Map) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VCFReaderFactory(com.github.lindenb.jvarkit.variant.vcf.VCFReaderFactory) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintWriter(java.io.PrintWriter) SkatFactory(com.github.lindenb.jvarkit.tools.skat.SkatFactory) Logger(com.github.lindenb.jvarkit.util.log.Logger) VCFReader(htsjdk.variant.vcf.VCFReader) Set(java.util.Set) Collectors(java.util.stream.Collectors) File(java.io.File) List(java.util.List) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VariantContext(htsjdk.variant.variantcontext.VariantContext) VcfTools(com.github.lindenb.jvarkit.util.vcf.VcfTools) Comparator(java.util.Comparator) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) HashMap(java.util.HashMap) VariantContext(htsjdk.variant.variantcontext.VariantContext) BedLineReader(com.github.lindenb.jvarkit.bed.BedLineReader) AbstractList(java.util.AbstractList) ArrayList(java.util.ArrayList) List(java.util.List) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) PrintWriter(java.io.PrintWriter) CloseableIterator(htsjdk.samtools.util.CloseableIterator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) VcfTools(com.github.lindenb.jvarkit.util.vcf.VcfTools) Pedigree(com.github.lindenb.jvarkit.util.Pedigree) SkatExecutor(com.github.lindenb.jvarkit.tools.skat.SkatFactory.SkatExecutor) File(java.io.File) Interval(htsjdk.samtools.util.Interval)

Example 14 with CloseableIterator

use of htsjdk.samtools.util.CloseableIterator in project jvarkit by lindenb.

the class IbdToVcf method doWork.

@Override
public int doWork(final List<String> args) {
    SortingCollection<Data> sorting = null;
    try {
        final List<Path> inputs = IOUtils.unrollPaths(args);
        if (inputs.isEmpty()) {
            LOG.info("IBD file(s) missing");
            return -1;
        }
        this.dictionary = SequenceDictionaryUtils.extractRequired(this.dictPath);
        this.loadMarkers();
        this.loadPedigree();
        sorting = SortingCollection.newInstance(Data.class, new DataCodec(), (A, B) -> A.compare1(B), writingSortingCollection.getMaxRecordsInRam(), writingSortingCollection.getTmpPaths());
        for (final Path ibdPath : inputs) {
            loadIbd(sorting, ibdPath);
        }
        sorting.doneAdding();
        sorting.setDestructiveIteration(true);
        final Set<VCFHeaderLine> metaData = new HashSet<>();
        VCFFormatHeaderLine ibdHeaderLine = new VCFFormatHeaderLine("I", 1, VCFHeaderLineType.Float, "IBD Value 0");
        metaData.add(ibdHeaderLine);
        ibdHeaderLine = new VCFFormatHeaderLine("J", 1, VCFHeaderLineType.Float, "IBD Value 1");
        metaData.add(ibdHeaderLine);
        ibdHeaderLine = new VCFFormatHeaderLine("K", 1, VCFHeaderLineType.Float, "IBD Value 2");
        metaData.add(ibdHeaderLine);
        final VCFHeader header = new VCFHeader(metaData, this.all_pairs.stream().map(P -> P.toString()).collect(Collectors.toList()));
        header.setSequenceDictionary(this.dictionary);
        JVarkitVersion.getInstance().addMetaData(this, header);
        try (VariantContextWriter vcw = super.openVariantContextWriter(null)) {
            vcw.writeHeader(header);
            final List<Allele> alleles = Collections.singletonList(Allele.REF_N);
            try (CloseableIterator<Data> iter = sorting.iterator()) {
                final EqualRangeIterator<Data> eqiter = new EqualRangeIterator<>(iter, (A, B) -> A.compare2(B));
                while (eqiter.hasNext()) {
                    final List<Data> row = eqiter.next();
                    final Marker marker = this.all_markers.get(row.get(0).marker_id);
                    final List<Genotype> genotypes = new ArrayList<>(row.size());
                    for (int i = 0; i < row.size(); i++) {
                        final Pair pair = this.all_pairs.get(row.get(i).pair_id);
                        final GenotypeBuilder gb = new GenotypeBuilder(pair.getName());
                        gb.attribute("I", row.get(i).ibd[0]);
                        gb.attribute("J", row.get(i).ibd[1]);
                        gb.attribute("K", row.get(i).ibd[2]);
                        genotypes.add(gb.make());
                    }
                    final VariantContextBuilder vcb = new VariantContextBuilder(null, marker.getContig(), marker.getPos(), marker.getPos(), alleles).genotypes(genotypes).id(marker.getName());
                    vcw.add(vcb.make());
                }
                eqiter.close();
            }
        }
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    }
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) DataInputStream(java.io.DataInputStream) CloseableIterator(htsjdk.samtools.util.CloseableIterator) Allele(htsjdk.variant.variantcontext.Allele) CharSplitter(com.github.lindenb.jvarkit.lang.CharSplitter) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashMap(java.util.HashMap) ParametersDelegate(com.beust.jcommander.ParametersDelegate) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) ContigDictComparator(com.github.lindenb.jvarkit.util.samtools.ContigDictComparator) DataOutputStream(java.io.DataOutputStream) AbstractDataCodec(com.github.lindenb.jvarkit.util.picard.AbstractDataCodec) Map(java.util.Map) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) Path(java.nio.file.Path) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) Locatable(htsjdk.samtools.util.Locatable) SortingCollection(htsjdk.samtools.util.SortingCollection) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) Logger(com.github.lindenb.jvarkit.util.log.Logger) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Set(java.util.Set) IOException(java.io.IOException) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) EOFException(java.io.EOFException) Collectors(java.util.stream.Collectors) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) List(java.util.List) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) EqualRangeIterator(com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) Optional(java.util.Optional) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine) BufferedReader(java.io.BufferedReader) Pattern(java.util.regex.Pattern) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) Collections(java.util.Collections) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) ArrayList(java.util.ArrayList) EqualRangeIterator(com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator) AbstractDataCodec(com.github.lindenb.jvarkit.util.picard.AbstractDataCodec) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashSet(java.util.HashSet) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine) Path(java.nio.file.Path) Genotype(htsjdk.variant.variantcontext.Genotype) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) Allele(htsjdk.variant.variantcontext.Allele) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder)

Example 15 with CloseableIterator

use of htsjdk.samtools.util.CloseableIterator in project jvarkit by lindenb.

the class IgvReview method start.

@Override
public void start(final Stage stage) throws Exception {
    stage.setTitle(getClass().getSimpleName());
    Predicate<VariantContext> ctxFilter;
    Map<String, String> params = super.getParameters().getNamed();
    if (params.containsKey("--filter")) {
        ctxFilter = JexlVariantPredicate.create(params.get("--filter"));
    } else {
        ctxFilter = V -> true;
    }
    final List<String> args = super.getParameters().getUnnamed();
    final File configFile;
    if (args.isEmpty()) {
        final FileChooser fc = new FileChooser();
        final String lastDirStr = preferences.get(LAST_USED_DIR_KEY, null);
        if (lastDirStr != null && !lastDirStr.isEmpty()) {
            fc.setInitialDirectory(new File(lastDirStr));
        }
        fc.getExtensionFilters().addAll(Collections.singletonList(new FileChooser.ExtensionFilter("Config file", "*.config", "*.cfg", "*.list")));
        configFile = fc.showOpenDialog(stage);
    } else if (args.size() == 1) {
        configFile = new File(args.get(0));
    } else {
        configFile = null;
    }
    if (configFile == null || !configFile.exists()) {
        JfxUtils.dialog().cause("Illegal number of arguments or file doesn't exists.").show(stage);
        Platform.exit();
        return;
    }
    if (configFile.isFile() && configFile.getParentFile() != null) {
        this.preferences.put(LAST_USED_DIR_KEY, configFile.getParentFile().getPath());
    }
    final List<String> configLines = Files.readAllLines(configFile.toPath());
    final Predicate<String> ignoreComment = (S) -> !S.startsWith("#");
    final Predicate<String> predVcf = S -> S.endsWith(".vcf") || S.endsWith(".vcf.gz");
    if (configLines.stream().filter(ignoreComment).filter(predVcf).count() != 1) {
        JfxUtils.dialog().cause("Found more than one vcf file in " + configFile).show(stage);
        Platform.exit();
        return;
    }
    final File vcfFile = configLines.stream().filter(ignoreComment).filter(predVcf).map(S -> new File(S)).findFirst().get();
    LOG.info("Opening vcf file and loading in memory");
    VCFReader vfr = null;
    CloseableIterator<VariantContext> iter = null;
    final Set<String> sampleNames;
    try {
        this.variants.clear();
        vfr = VCFReaderFactory.makeDefault().open(vcfFile.toPath(), false);
        this.vcfHeader = vfr.getHeader();
        sampleNames = new HashSet<>(this.vcfHeader.getSampleNamesInOrder());
        if (sampleNames.isEmpty()) {
            JfxUtils.dialog().cause("No Genotypes in " + vcfFile).show(stage);
            Platform.exit();
            return;
        }
        iter = vfr.iterator();
        this.variants.addAll(iter.stream().filter(ctxFilter).filter(CTX -> CTX.isVariant()).collect(Collectors.toList()));
    } catch (final Exception err) {
        JfxUtils.dialog().cause(err).show(stage);
        Platform.exit();
        return;
    } finally {
        CloserUtil.close(iter);
        CloserUtil.close(vfr);
    }
    if (this.variants.isEmpty()) {
        JfxUtils.dialog().cause("No Variants").show(stage);
        Platform.exit();
        return;
    }
    final SAMSequenceDictionary dict = this.vcfHeader.getSequenceDictionary();
    if (dict == null || dict.isEmpty()) {
        JfxUtils.dialog().cause(JvarkitException.VcfDictionaryMissing.getMessage(vcfFile.getPath())).show(stage);
        Platform.exit();
        return;
    }
    final SamReaderFactory srf = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT);
    configLines.stream().filter(ignoreComment).filter(S -> S.endsWith(FileExtensions.BAM)).map(S -> Paths.get(S)).forEach(F -> {
        final SamReader samIn = srf.open(F);
        final SAMFileHeader header = samIn.getFileHeader();
        CloserUtil.close(samIn);
        String sample = null;
        for (final SAMReadGroupRecord rg : header.getReadGroups()) {
            String s = rg.getSample();
            if (s == null)
                continue;
            if (sample == null) {
                sample = s;
            } else if (!sample.equals(s)) {
                JfxUtils.dialog().cause("Two samples in " + F).show(stage);
                Platform.exit();
                return;
            }
        }
        if (sample == null) {
            JfxUtils.dialog().cause("No sample in " + F + ". Ignoring").show(stage);
            return;
        }
        if (!sampleNames.contains(sample)) {
            JfxUtils.dialog().cause("Not in VCF header " + sample + " / " + F + ". Ignoring").show(stage);
            return;
        }
        this.sample2bamFile.put(sample, F);
    });
    if (this.sample2bamFile.isEmpty()) {
        JfxUtils.dialog().cause("No valid bam file in " + configFile).show(stage);
        return;
    }
    sampleNames.retainAll(this.sample2bamFile.keySet());
    if (sampleNames.isEmpty()) {
        JfxUtils.dialog().cause("No Sample associated to bam").show(stage);
        return;
    }
    ObservableList<VariantAndGenotype> genotypes = FXCollections.observableArrayList(this.variants.stream().flatMap(CTX -> CTX.getGenotypes().stream().filter(G -> sampleNames.contains(G.getSampleName())).map(G -> new VariantAndGenotype(CTX, G))).collect(Collectors.toList()));
    if (genotypes.isEmpty()) {
        JfxUtils.dialog().cause("No Genotype to show").show(stage);
        return;
    }
    Menu menu = new Menu("File");
    MenuItem menuItem = new MenuItem("Save as...");
    menuItem.setOnAction(AE -> {
        saveVariantsAs(stage);
    });
    menu.getItems().add(menuItem);
    menuItem = new MenuItem("Save");
    menuItem.setOnAction(AE -> {
        if (this.saveAsFile != null) {
            saveVariants(stage, this.saveAsFile);
        } else {
            saveVariantsAs(stage);
        }
    });
    menu.getItems().add(menuItem);
    menu.getItems().add(new SeparatorMenuItem());
    menuItem = new MenuItem("Quit");
    menuItem.setOnAction(AE -> {
        Platform.exit();
    });
    menu.getItems().add(menuItem);
    MenuBar bar = new MenuBar(menu);
    this.genotypeTable = new TableView<>(genotypes);
    this.genotypeTable.getColumns().add(makeColumn("CHROM", G -> G.ctx.getContig()));
    this.genotypeTable.getColumns().add(makeColumn("POS", G -> G.ctx.getStart()));
    this.genotypeTable.getColumns().add(makeColumn("ID", G -> G.ctx.getID()));
    this.genotypeTable.getColumns().add(makeColumn("REF", G -> G.ctx.getReference().getDisplayString()));
    this.genotypeTable.getColumns().add(makeColumn("ALT", G -> G.ctx.getAlternateAlleles().stream().map(A -> A.getDisplayString()).collect(Collectors.joining(","))));
    this.genotypeTable.getColumns().add(makeColumn("Sample", G -> G.g.getSampleName()));
    this.genotypeTable.getColumns().add(makeColumn("Type", G -> G.g.getType().name()));
    this.genotypeTable.getColumns().add(makeColumn("Alleles", G -> G.g.getAlleles().stream().map(A -> A.getDisplayString()).collect(Collectors.joining(","))));
    TableColumn<VariantAndGenotype, String> reviewCol = new TableColumn<>("Review");
    reviewCol.setCellValueFactory(C -> C.getValue().getReviewProperty());
    reviewCol.setCellFactory(TextFieldTableCell.forTableColumn());
    reviewCol.setOnEditCommit((E) -> {
        int y = E.getTablePosition().getRow();
        this.genotypeTable.getItems().get(y).setReview(E.getNewValue());
    });
    reviewCol.setEditable(true);
    this.genotypeTable.getColumns().add(reviewCol);
    this.genotypeTable.getSelectionModel().cellSelectionEnabledProperty().set(true);
    this.genotypeTable.setEditable(true);
    final ContextMenu cm = new ContextMenu();
    MenuItem mi1 = new MenuItem("Menu 1");
    cm.getItems().add(mi1);
    MenuItem mi2 = new MenuItem("Menu 2");
    cm.getItems().add(mi2);
    this.genotypeTable.setOnMousePressed(event -> {
        if (event.isPrimaryButtonDown() && (event.getClickCount() == 3 || event.isShiftDown())) {
            moveIgvTo(stage, genotypeTable.getSelectionModel().getSelectedItem());
        } else if (event.isSecondaryButtonDown()) {
            cm.show(genotypeTable, event.getScreenX(), event.getScreenY());
        }
    });
    final BorderPane pane2 = new BorderPane(this.genotypeTable);
    pane2.setPadding(new Insets(10, 10, 10, 10));
    VBox vbox1 = new VBox(bar, pane2);
    final Scene scene = new Scene(vbox1, 500, 300);
    stage.setScene(scene);
    stage.show();
}
Also used : JexlVariantPredicate(com.github.lindenb.jvarkit.util.vcf.JexlVariantPredicate) VCFHeader(htsjdk.variant.vcf.VCFHeader) VariantContextWriterBuilder(htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder) VBox(javafx.scene.layout.VBox) SAMFileHeader(htsjdk.samtools.SAMFileHeader) Application(javafx.application.Application) Vector(java.util.Vector) ContextMenu(javafx.scene.control.ContextMenu) IgvConstants(com.github.lindenb.jvarkit.util.igv.IgvConstants) Map(java.util.Map) TableView(javafx.scene.control.TableView) Path(java.nio.file.Path) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintWriter(java.io.PrintWriter) MenuItem(javafx.scene.control.MenuItem) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) Predicate(java.util.function.Predicate) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) Collectors(java.util.stream.Collectors) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) Platform(javafx.application.Platform) SeparatorMenuItem(javafx.scene.control.SeparatorMenuItem) List(java.util.List) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) FileExtensions(htsjdk.samtools.util.FileExtensions) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VariantContext(htsjdk.variant.variantcontext.VariantContext) ObservableList(javafx.collections.ObservableList) BorderPane(javafx.scene.layout.BorderPane) SamReaderFactory(htsjdk.samtools.SamReaderFactory) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) Genotype(htsjdk.variant.variantcontext.Genotype) CloseableIterator(htsjdk.samtools.util.CloseableIterator) Scene(javafx.scene.Scene) Socket(java.net.Socket) SimpleStringProperty(javafx.beans.property.SimpleStringProperty) FXCollections(javafx.collections.FXCollections) HashMap(java.util.HashMap) BackingStoreException(java.util.prefs.BackingStoreException) TextFieldTableCell(javafx.scene.control.cell.TextFieldTableCell) Function(java.util.function.Function) ValidationStringency(htsjdk.samtools.ValidationStringency) ArrayList(java.util.ArrayList) TableColumn(javafx.scene.control.TableColumn) HashSet(java.util.HashSet) Insets(javafx.geometry.Insets) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VCFReaderFactory(com.github.lindenb.jvarkit.variant.vcf.VCFReaderFactory) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) MenuBar(javafx.scene.control.MenuBar) Files(java.nio.file.Files) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) VCFReader(htsjdk.variant.vcf.VCFReader) IOException(java.io.IOException) SamReader(htsjdk.samtools.SamReader) InputStreamReader(java.io.InputStreamReader) File(java.io.File) Preferences(java.util.prefs.Preferences) Menu(javafx.scene.control.Menu) FileChooser(javafx.stage.FileChooser) Stage(javafx.stage.Stage) Paths(java.nio.file.Paths) Closeable(java.io.Closeable) JfxUtils(com.github.lindenb.jvarkit.jfx.components.JfxUtils) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine) Window(javafx.stage.Window) BufferedReader(java.io.BufferedReader) Collections(java.util.Collections) BorderPane(javafx.scene.layout.BorderPane) Insets(javafx.geometry.Insets) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) VariantContext(htsjdk.variant.variantcontext.VariantContext) MenuBar(javafx.scene.control.MenuBar) ContextMenu(javafx.scene.control.ContextMenu) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SamReader(htsjdk.samtools.SamReader) VCFReader(htsjdk.variant.vcf.VCFReader) FileChooser(javafx.stage.FileChooser) ContextMenu(javafx.scene.control.ContextMenu) Menu(javafx.scene.control.Menu) SamReaderFactory(htsjdk.samtools.SamReaderFactory) MenuItem(javafx.scene.control.MenuItem) SeparatorMenuItem(javafx.scene.control.SeparatorMenuItem) SeparatorMenuItem(javafx.scene.control.SeparatorMenuItem) Scene(javafx.scene.Scene) TableColumn(javafx.scene.control.TableColumn) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) BackingStoreException(java.util.prefs.BackingStoreException) IOException(java.io.IOException) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File) VBox(javafx.scene.layout.VBox)

Aggregations

CloseableIterator (htsjdk.samtools.util.CloseableIterator)103 List (java.util.List)86 Logger (com.github.lindenb.jvarkit.util.log.Logger)85 Parameter (com.beust.jcommander.Parameter)82 Program (com.github.lindenb.jvarkit.util.jcommander.Program)78 ArrayList (java.util.ArrayList)73 Collectors (java.util.stream.Collectors)71 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)69 Path (java.nio.file.Path)69 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)66 CloserUtil (htsjdk.samtools.util.CloserUtil)64 Set (java.util.Set)64 VCFHeader (htsjdk.variant.vcf.VCFHeader)59 VariantContext (htsjdk.variant.variantcontext.VariantContext)54 IOException (java.io.IOException)53 SequenceDictionaryUtils (com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils)51 SAMRecord (htsjdk.samtools.SAMRecord)51 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)51 IOUtils (com.github.lindenb.jvarkit.io.IOUtils)50 StringUtils (com.github.lindenb.jvarkit.lang.StringUtils)49