Search in sources :

Example 1 with BedLineReader

use of com.github.lindenb.jvarkit.bed.BedLineReader in project jvarkit by lindenb.

the class VcfGeneEpistasis method doWork.

@Override
public int doWork(final List<String> args) {
    if (this.geneBed == null) {
        LOG.error("gene file bed undefined");
        return -1;
    }
    if (this.outputFile == null) {
        LOG.error("output file undefined");
        return -1;
    }
    CloseableIterator<VariantContext> iter = null;
    try {
        final File vcfFile = new File(oneAndOnlyOneFile(args));
        this.vcfFileReader = VCFReaderFactory.makeDefault().open(vcfFile.toPath(), true);
        final VCFHeader header = this.vcfFileReader.getHeader();
        final Pedigree pedigree;
        if (this.pedigreeFile != null) {
            pedigree = new Pedigree.Parser().parse(this.pedigreeFile);
        } else {
            pedigree = new Pedigree.Parser().parse(header);
        }
        if (pedigree == null || pedigree.isEmpty() || !pedigree.hasAffected() || !pedigree.hasUnaffected()) {
            LOG.error("empty ped or no case/ctrl");
            return -1;
        }
        pedigree.verifyPersonsHaveUniqueNames();
        for (final Pedigree.Person p : pedigree.getPersons().stream().filter(P -> P.isAffected() || P.isUnaffected()).filter(P -> header.getSampleNamesInOrder().contains(P.getId())).collect(Collectors.toSet())) {
            this.id2samples.put(p.getId(), p);
        }
        this.vcfTools = new VcfTools(header);
        List<Interval> geneList;
        if (!this.geneBed.exists()) {
            final Map<String, Interval> gene2interval = new HashMap<>(50000);
            LOG.info("building gene file" + this.geneBed);
            iter = this.vcfFileReader.iterator();
            // iter = this.vcfFileReader.query("chr3",1,300_000_000);
            final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header).logger(LOG);
            while (iter.hasNext()) {
                final VariantContext ctx = progress.watch(iter.next());
                if (!accept(ctx))
                    continue;
                for (final String geneName : getGenes(ctx)) {
                    final Interval old = gene2interval.get(geneName);
                    if (old == null) {
                        gene2interval.put(geneName, new Interval(ctx.getContig(), ctx.getStart(), ctx.getEnd(), false, geneName));
                        LOG.info("adding " + geneName + ". number of genes: " + gene2interval.size());
                    } else if (!old.getContig().equals(ctx.getContig())) {
                        LOG.error("boum :" + geneName + ": on chrom " + ctx.getContig() + " vs " + old);
                        return -1;
                    } else {
                        gene2interval.put(geneName, new Interval(ctx.getContig(), Math.min(ctx.getStart(), old.getStart()), Math.max(ctx.getEnd(), old.getEnd()), false, geneName));
                    }
                }
            }
            iter.close();
            iter = null;
            progress.finish();
            geneList = new ArrayList<>(gene2interval.values());
            PrintWriter pw = new PrintWriter(this.geneBed);
            for (final Interval g : geneList) {
                pw.println(g.getContig() + "\t" + (g.getStart() - 1) + "\t" + (g.getEnd()) + "\t" + g.getName());
            }
            pw.flush();
            pw.close();
            pw = null;
        } else {
            try (BedLineReader r = new BedLineReader(geneBed)) {
                geneList = r.stream().filter(B -> B != null).map(B -> new Interval(B.getContig(), B.getStart(), B.getEnd(), true, B.get(3))).collect(Collectors.toList());
            }
        }
        if (geneList.isEmpty()) {
            LOG.error("gene List is empty");
            return -1;
        }
        final Comparator<VariantContext> ctxSorter = VCFUtils.createTidPosRefComparator(header.getSequenceDictionary());
        final Function<Interval, List<VariantContext>> loadVariants = (R) -> {
            List<VariantContext> L = new ArrayList<>();
            CloseableIterator<VariantContext> r = this.vcfFileReader.query(R.getContig(), R.getStart(), R.getEnd());
            while (r.hasNext()) {
                final VariantContext ctx = r.next();
                if (!accept(ctx))
                    continue;
                if (!getGenes(ctx).contains(R.getName()))
                    continue;
                L.add(ctx);
            }
            r.close();
            return L;
        };
        final SkatExecutor executor = this.skatFactory.build();
        Double bestSkat = null;
        LOG.info("number of genes : " + geneList.size());
        final int list_end_index = (this.user_end_index < 0 ? geneList.size() : Math.min(geneList.size(), this.user_end_index));
        for (int x = this.user_begin_index; x < list_end_index; ++x) {
            final Interval intervalx = geneList.get(x);
            final List<VariantContext> variantsx = loadVariants.apply(intervalx);
            if (variantsx.isEmpty())
                continue;
            for (int y = x; y < geneList.size(); /* pas list_end_index */
            ++y) {
                final Interval intervaly;
                final List<VariantContext> merge;
                if (y == x) {
                    // we-re testing gene 1 only
                    intervaly = intervalx;
                    merge = variantsx;
                } else {
                    intervaly = geneList.get(y);
                    if (intervaly.intersects(intervalx))
                        continue;
                    final List<VariantContext> variantsy = loadVariants.apply(intervaly);
                    if (variantsy.isEmpty())
                        continue;
                    merge = new MergedList<>(variantsx, variantsy);
                }
                LOG.info("testing : [" + x + "]" + intervalx + " [" + y + "]" + intervaly + " N:" + geneList.size() + " best: " + bestSkat);
                final Double skat = eval(executor, merge);
                if (skat == null)
                    continue;
                if (bestSkat == null || skat.compareTo(bestSkat) < 0) {
                    bestSkat = skat;
                    LOG.info("best " + bestSkat + " " + intervalx + " " + intervaly);
                    if (this.outputFile.getName().endsWith(".vcf") || this.outputFile.getName().endsWith(".vcf.gz")) {
                        final VCFHeader header2 = new VCFHeader(header);
                        header2.addMetaDataLine(new VCFHeaderLine(VcfGeneEpistasis.class.getName(), intervalx.getName() + " " + intervaly.getName() + " " + bestSkat));
                        final VariantContextWriter w = VCFUtils.createVariantContextWriter(outputFile);
                        w.writeHeader(header2);
                        merge.stream().sorted(ctxSorter).forEach(V -> w.add(V));
                        w.close();
                    } else {
                        final PrintWriter w = super.openFileOrStdoutAsPrintWriter(outputFile);
                        w.println(String.valueOf(bestSkat) + "\t" + intervalx.getName() + "\t" + intervaly.getName());
                        w.flush();
                        w.close();
                    }
                }
            }
        }
        return 0;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(iter);
        CloserUtil.close(this.vcfFileReader);
    }
}
Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) CloseableIterator(htsjdk.samtools.util.CloseableIterator) VCFUtils(com.github.lindenb.jvarkit.util.vcf.VCFUtils) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) AbstractList(java.util.AbstractList) HashMap(java.util.HashMap) Function(java.util.function.Function) ParametersDelegate(com.beust.jcommander.ParametersDelegate) ArrayList(java.util.ArrayList) BedLineReader(com.github.lindenb.jvarkit.bed.BedLineReader) Interval(htsjdk.samtools.util.Interval) SkatResult(com.github.lindenb.jvarkit.tools.skat.SkatFactory.SkatResult) StringUtil(htsjdk.samtools.util.StringUtil) SkatExecutor(com.github.lindenb.jvarkit.tools.skat.SkatFactory.SkatExecutor) Pedigree(com.github.lindenb.jvarkit.util.Pedigree) Map(java.util.Map) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VCFReaderFactory(com.github.lindenb.jvarkit.variant.vcf.VCFReaderFactory) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintWriter(java.io.PrintWriter) SkatFactory(com.github.lindenb.jvarkit.tools.skat.SkatFactory) Logger(com.github.lindenb.jvarkit.util.log.Logger) VCFReader(htsjdk.variant.vcf.VCFReader) Set(java.util.Set) Collectors(java.util.stream.Collectors) File(java.io.File) List(java.util.List) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VariantContext(htsjdk.variant.variantcontext.VariantContext) VcfTools(com.github.lindenb.jvarkit.util.vcf.VcfTools) Comparator(java.util.Comparator) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) HashMap(java.util.HashMap) VariantContext(htsjdk.variant.variantcontext.VariantContext) BedLineReader(com.github.lindenb.jvarkit.bed.BedLineReader) AbstractList(java.util.AbstractList) ArrayList(java.util.ArrayList) List(java.util.List) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) PrintWriter(java.io.PrintWriter) CloseableIterator(htsjdk.samtools.util.CloseableIterator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) VcfTools(com.github.lindenb.jvarkit.util.vcf.VcfTools) Pedigree(com.github.lindenb.jvarkit.util.Pedigree) SkatExecutor(com.github.lindenb.jvarkit.tools.skat.SkatFactory.SkatExecutor) File(java.io.File) Interval(htsjdk.samtools.util.Interval)

Example 2 with BedLineReader

use of com.github.lindenb.jvarkit.bed.BedLineReader in project jvarkit by lindenb.

the class VcfFisherCombinatorics method doWork.

@Override
public int doWork(final List<String> args) {
    try {
        final Pedigree pedigree = new PedigreeParser().parse(this.pedigreeFile);
        final IntervalTreeMap<List<GeneInfo>> geneInfoMap = new IntervalTreeMap<>();
        final Predicate<Genotype> isGtCnv = G -> G.isHet() || G.isHomVar();
        try (VCFIterator vcfIter = super.openVCFIterator(super.oneFileOrNull(args))) {
            final VCFHeader header = vcfIter.getHeader();
            final ContigNameConverter ctgConverter = ContigNameConverter.fromOneDictionary(SequenceDictionaryUtils.extractRequired(header));
            if (!header.hasGenotypingData()) {
                LOG.error("No Genotype data in " + header);
                return -1;
            }
            final Map<String, Integer> sample2index = header.getSampleNameToOffset();
            final List<Integer> casesIdx = pedigree.getSamplesInVcfHeader(header).filter(S -> S.isAffected()).map(S -> S.getId()).map(S -> sample2index.get(S)).sorted().collect(Collectors.toList());
            LOG.info("cases N=" + casesIdx.size());
            if (casesIdx.isEmpty()) {
                LOG.error("No affected/cases sample in the input VCF");
                return -1;
            }
            final List<Integer> controlsIdx = pedigree.getSamplesInVcfHeader(header).filter(S -> S.isUnaffected()).map(S -> S.getId()).map(S -> sample2index.get(S)).sorted().collect(Collectors.toList());
            LOG.info("controls N=" + controlsIdx.size());
            if (controlsIdx.isEmpty()) {
                LOG.error("No unaffected/controls sample in the input VCF");
                return -1;
            }
            final Predicate<VariantContext> acceptCtx = V -> {
                if (discard_bnd && V.getAttributeAsString(VCFConstants.SVTYPE, "").equals("BND"))
                    return false;
                if (discard_filtered && V.isFiltered())
                    return false;
                if (max_af < 1.0) {
                    final Iterator<Integer> it = Stream.concat(controlsIdx.stream(), casesIdx.stream()).iterator();
                    int ac = 0;
                    int an = 0;
                    while (it.hasNext()) {
                        switch(V.getGenotype(it.next()).getType()) {
                            case HOM_VAR:
                                ac += 2;
                                an += 2;
                                break;
                            case HOM_REF:
                                ac += 0;
                                an += 2;
                                break;
                            case HET:
                                ac += 1;
                                an += 2;
                                break;
                            default:
                                break;
                        }
                    }
                    if (an == 0)
                        return false;
                    if (ac / (double) an > max_af)
                        return false;
                }
                return true;
            };
            try (BedLineReader br = new BedLineReader(this.bedPath)) {
                while (br.hasNext()) {
                    final BedLine bed = br.next();
                    final String ctg = ctgConverter.apply(bed.getContig());
                    if (StringUtil.isBlank(ctg))
                        continue;
                    final GeneInfo geneInfo = new GeneInfo();
                    geneInfo.casesflags = new BitSet(casesIdx.size());
                    geneInfo.controlsflags = new BitSet(controlsIdx.size());
                    geneInfo.contig = ctg;
                    geneInfo.name = bed.getOrDefault(3, "undefined");
                    geneInfo.parts.add(new SimpleInterval(bed).renameContig(ctg));
                    final Interval key = new Interval(geneInfo);
                    List<GeneInfo> L = geneInfoMap.get(key);
                    if (L == null) {
                        L = new ArrayList<>();
                        geneInfoMap.put(key, L);
                    }
                    L.add(geneInfo);
                }
            }
            if (geneInfoMap.isEmpty()) {
                LOG.error("no gene found in " + this.bedPath);
                return -1;
            }
            LOG.info("reading variants...");
            while (vcfIter.hasNext()) {
                final VariantContext ctx = vcfIter.next();
                if (!acceptCtx.test(ctx))
                    continue;
                for (List<GeneInfo> gil : geneInfoMap.getOverlapping(ctx)) {
                    for (GeneInfo gi : gil) {
                        for (int y = 0; y < casesIdx.size(); ++y) {
                            final Genotype gt = ctx.getGenotype(casesIdx.get(y));
                            if (!isGtCnv.test(gt))
                                continue;
                            gi.casesflags.set(y);
                        }
                        for (int y = 0; y < controlsIdx.size(); ++y) {
                            final Genotype gt = ctx.getGenotype(controlsIdx.get(y));
                            if (!isGtCnv.test(gt))
                                continue;
                            gi.controlsflags.set(y);
                        }
                    }
                }
            }
            /* remove Genes without variant , count sample per gene*/
            for (List<GeneInfo> gil : geneInfoMap.values()) {
                gil.removeIf(GI -> GI.casesflags.nextSetBit(0) == -1 && GI.controlsflags.nextSetBit(0) == -1);
            }
            // end remove
            /* load bed file */
            final List<GeneInfo> geneList = geneInfoMap.values().stream().filter(L -> !L.isEmpty()).flatMap(L -> L.stream()).sorted().collect(Collectors.toList());
            if (geneList.isEmpty()) {
                LOG.error("no gene found in " + this.bedPath);
                return -1;
            }
            LOG.info("N Genes=" + geneList.size());
            Solution best = null;
            for (int i = 2; i < Math.min(this.max_num_genes, geneList.size()); i++) {
                LOG.info("sarting loop from " + i);
                best = recursion(geneList, casesIdx, controlsIdx, new ArrayList<>(), i, best);
            }
            try (PrintWriter pw = super.openPathOrStdoutAsPrintWriter(this.outputFile)) {
                pw.println(best);
                pw.flush();
            }
        }
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
    }
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Program(com.github.lindenb.jvarkit.util.jcommander.Program) VCFIterator(htsjdk.variant.vcf.VCFIterator) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) ArrayList(java.util.ArrayList) BedLineReader(com.github.lindenb.jvarkit.bed.BedLineReader) Interval(htsjdk.samtools.util.Interval) StringUtil(htsjdk.samtools.util.StringUtil) FisherExactTest(com.github.lindenb.jvarkit.math.stats.FisherExactTest) PedigreeParser(com.github.lindenb.jvarkit.pedigree.PedigreeParser) Map(java.util.Map) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) Path(java.nio.file.Path) VCFConstants(htsjdk.variant.vcf.VCFConstants) PrintWriter(java.io.PrintWriter) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) Locatable(htsjdk.samtools.util.Locatable) Iterator(java.util.Iterator) Pedigree(com.github.lindenb.jvarkit.pedigree.Pedigree) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap) Predicate(java.util.function.Predicate) Logger(com.github.lindenb.jvarkit.util.log.Logger) Collectors(java.util.stream.Collectors) List(java.util.List) Stream(java.util.stream.Stream) FractionConverter(com.github.lindenb.jvarkit.jcommander.converter.FractionConverter) VariantContext(htsjdk.variant.variantcontext.VariantContext) BitSet(java.util.BitSet) Collections(java.util.Collections) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) BedLineReader(com.github.lindenb.jvarkit.bed.BedLineReader) PedigreeParser(com.github.lindenb.jvarkit.pedigree.PedigreeParser) VCFIterator(htsjdk.variant.vcf.VCFIterator) Iterator(java.util.Iterator) ArrayList(java.util.ArrayList) List(java.util.List) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) VCFHeader(htsjdk.variant.vcf.VCFHeader) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) VCFIterator(htsjdk.variant.vcf.VCFIterator) PrintWriter(java.io.PrintWriter) BitSet(java.util.BitSet) Genotype(htsjdk.variant.variantcontext.Genotype) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) Pedigree(com.github.lindenb.jvarkit.pedigree.Pedigree) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap) Interval(htsjdk.samtools.util.Interval) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval)

Example 3 with BedLineReader

use of com.github.lindenb.jvarkit.bed.BedLineReader in project jvarkit by lindenb.

the class FindGVCFsBlocks method doWork.

@SuppressWarnings("resource")
@Override
public int doWork(final List<String> args) {
    Path tmpBedFile0 = null;
    Path tmpBedFile1 = null;
    try {
        if (this.bedPath != null) {
            IOUtil.assertFileIsReadable(this.bedPath);
        }
        final List<Path> inputs = IOUtils.unrollPaths(args);
        if (inputs.isEmpty()) {
            LOG.error("input missing");
            return -1;
        }
        if (this.outputFile != null) {
            final String fname = this.outputFile.getFileName().toString();
            if (!fname.endsWith(FileExtensions.INTERVAL_LIST) && !fname.endsWith(FileExtensions.COMPRESSED_INTERVAL_LIST)) {
                LOG.error("Output should end with " + FileExtensions.INTERVAL_LIST + " or " + FileExtensions.COMPRESSED_INTERVAL_LIST);
                return -1;
            }
        }
        if (this.tmpDir == null && this.outputFile != null) {
            this.tmpDir = this.outputFile.getParent();
        }
        if (this.tmpDir == null) {
            this.tmpDir = IOUtils.getDefaultTempDir();
        }
        IOUtil.assertDirectoryIsWritable(this.tmpDir);
        tmpBedFile0 = Files.createTempFile(this.tmpDir, "tmp.", ".bed");
        tmpBedFile1 = Files.createTempFile(this.tmpDir, "tmp.", ".bed");
        SAMSequenceDictionary dict = null;
        final long initMilliSec = System.currentTimeMillis();
        for (int i = 0; i < inputs.size(); i++) {
            final long startMilliSec = System.currentTimeMillis();
            LOG.info(inputs.get(i) + " " + (i + 1) + "/" + inputs.size());
            try (GVCFOrIntervalIterator r0 = openInput(inputs.get(i))) {
                if (dict != null) {
                    SequenceUtil.assertSequenceDictionariesEqual(dict, r0.getSAMSequenceDictionary());
                } else {
                    dict = r0.getSAMSequenceDictionary();
                }
                long count_variants = 0L;
                try (IntervalListWriter pw = new IntervalListWriter(tmpBedFile0, dict)) {
                    /* first VCF , just convert to bed */
                    if (i == 0) {
                        while (r0.hasNext()) {
                            final Locatable loc = r0.next();
                            pw.add(loc);
                            count_variants++;
                        }
                    } else /* merge previous bed with current VCF using INFO/END */
                    {
                        Locatable start0 = null;
                        Locatable start1 = null;
                        try (IntervalListIterator r1 = new IntervalListIterator(tmpBedFile1)) {
                            PeekableIterator<Locatable> peek0 = new PeekableIterator<>(r0);
                            PeekableIterator<Locatable> peek1 = new PeekableIterator<>(r1);
                            while (peek0.hasNext() && peek1.hasNext()) {
                                final Locatable loc0 = peek0.peek();
                                final Locatable loc1 = peek1.peek();
                                if (!loc0.contigsMatch(loc1)) {
                                    throw new IllegalStateException("unexpected: not the same contigs " + loc0 + " " + loc1);
                                }
                                if (start0 == null)
                                    start0 = loc0;
                                if (start1 == null)
                                    start1 = loc1;
                                final int end0 = loc0.getEnd();
                                final int end1 = loc1.getEnd();
                                if (end0 < end1) {
                                    peek0.next();
                                    continue;
                                } else if (end0 > end1) {
                                    peek1.next();
                                    continue;
                                } else {
                                    /* end0==end1 */
                                    pw.add(new SimpleInterval(loc0.getContig(), (Math.min(start0.getStart(), start1.getStart())), loc0.getEnd()));
                                    count_variants++;
                                    // consumme
                                    peek0.next();
                                    // consumme
                                    peek1.next();
                                    start0 = null;
                                    start1 = null;
                                }
                            }
                            if (lenient_discordant_end) {
                                while (peek0.hasNext()) {
                                    final Locatable loc = peek0.next();
                                    LOG.warn("extra interval 0 : " + loc);
                                }
                                while (peek1.hasNext()) {
                                    final Locatable loc = peek1.next();
                                    LOG.warn("extra interval 1 : " + loc);
                                }
                            } else {
                                if (peek0.hasNext())
                                    throw new IllegalStateException("peek0 has Next ? " + peek0.next() + " use --lenient ?");
                                if (peek1.hasNext())
                                    throw new IllegalStateException("peek1 has Next ? " + peek1.next() + " use --lenient ?");
                            }
                            peek0.close();
                            peek1.close();
                        }
                    }
                    final long millisecPerVcf = (System.currentTimeMillis() - initMilliSec) / (i + 1L);
                    LOG.info("N=" + count_variants + ". That took: " + StringUtils.niceDuration(System.currentTimeMillis() - startMilliSec) + " Remains: " + StringUtils.niceDuration((inputs.size() - (i + 1)) * millisecPerVcf));
                }
                // end writer
                Files.deleteIfExists(tmpBedFile1);
                Files.move(tmpBedFile0, tmpBedFile1);
            }
        }
        final IntervalTreeMap<Boolean> intervalTreeMap;
        if (this.bedPath != null) {
            try (BedLineReader blr = new BedLineReader(this.bedPath)) {
                intervalTreeMap = blr.toIntervalTreeMap(BED -> Boolean.TRUE);
            }
        } else {
            intervalTreeMap = null;
        }
        try (IntervalListWriter w = new IntervalListWriter(this.outputFile, dict)) {
            try (IntervalListIterator r1 = new IntervalListIterator(tmpBedFile1)) {
                final PeekableIterator<Locatable> peek1 = new PeekableIterator<>(r1);
                while (peek1.hasNext()) {
                    Locatable loc = peek1.next();
                    if (intervalTreeMap != null && !intervalTreeMap.containsOverlapping(loc)) {
                        continue;
                    }
                    while (this.min_block_size > 0 && peek1.hasNext()) {
                        final Locatable loc2 = peek1.peek();
                        if (!loc2.contigsMatch(loc))
                            break;
                        if (CoordMath.getLength(loc.getStart(), loc2.getEnd()) > this.min_block_size)
                            break;
                        loc = new SimpleInterval(loc.getContig(), loc.getStart(), loc2.getEnd());
                        // consumme loc2
                        peek1.next();
                    }
                    w.add(loc);
                }
                peek1.close();
            }
            Files.deleteIfExists(tmpBedFile1);
        }
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
        if (tmpBedFile0 != null)
            try {
                Files.deleteIfExists(tmpBedFile0);
            } catch (Throwable err) {
            }
        if (tmpBedFile1 != null)
            try {
                Files.deleteIfExists(tmpBedFile1);
            } catch (Throwable err) {
            }
    }
}
Also used : Path(java.nio.file.Path) CloseableIterator(htsjdk.samtools.util.CloseableIterator) Allele(htsjdk.variant.variantcontext.Allele) SequenceUtil(htsjdk.samtools.util.SequenceUtil) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) IOUtil(htsjdk.samtools.util.IOUtil) VCFHeader(htsjdk.variant.vcf.VCFHeader) SAMFileHeader(htsjdk.samtools.SAMFileHeader) SAMTextHeaderCodec(htsjdk.samtools.SAMTextHeaderCodec) BedLineReader(com.github.lindenb.jvarkit.bed.BedLineReader) DistanceParser(com.github.lindenb.jvarkit.util.bio.DistanceParser) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VCFReaderFactory(com.github.lindenb.jvarkit.variant.vcf.VCFReaderFactory) OutputStreamWriter(java.io.OutputStreamWriter) Path(java.nio.file.Path) PeekableIterator(htsjdk.samtools.util.PeekableIterator) VCFConstants(htsjdk.variant.vcf.VCFConstants) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) Locatable(htsjdk.samtools.util.Locatable) Files(java.nio.file.Files) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap) BufferedWriter(java.io.BufferedWriter) Logger(com.github.lindenb.jvarkit.util.log.Logger) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) IntervalListCodec(htsjdk.tribble.IntervalList.IntervalListCodec) VCFReader(htsjdk.variant.vcf.VCFReader) AbstractCloseableIterator(com.github.lindenb.jvarkit.iterator.AbstractCloseableIterator) IOException(java.io.IOException) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) BufferedLineReader(htsjdk.samtools.util.BufferedLineReader) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) VariantContextComparator(htsjdk.variant.variantcontext.VariantContextComparator) List(java.util.List) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) FileExtensions(htsjdk.samtools.util.FileExtensions) Closeable(java.io.Closeable) CoordMath(htsjdk.samtools.util.CoordMath) VariantContext(htsjdk.variant.variantcontext.VariantContext) BufferedReader(java.io.BufferedReader) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) BedLineReader(com.github.lindenb.jvarkit.bed.BedLineReader) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) PeekableIterator(htsjdk.samtools.util.PeekableIterator) Locatable(htsjdk.samtools.util.Locatable)

Example 4 with BedLineReader

use of com.github.lindenb.jvarkit.bed.BedLineReader in project jvarkit by lindenb.

the class Biostar77828 method doWork.

@Override
public int doWork(final List<String> args) {
    try {
        String input = oneFileOrNull(args);
        try (BedLineReader in = new BedLineReader(openBufferedReader(input), input)) {
            while (in.hasNext()) {
                final BedLine bedLine = in.next();
                if (bedLine == null)
                    continue;
                if (bedLine.getColumnCount() < 3)
                    throw new IOException("bad BED input " + bedLine);
                final Interval seg = new Interval(bedLine.getContig(), bedLine.getStart(), bedLine.getEnd());
                if (seg.length() == 0)
                    continue;
                this.effective_genome_size += seg.length();
                this.all_segments.add(seg);
            }
            try (PrintWriter pw = super.openPathOrStdoutAsPrintWriter(this.outputFile)) {
                Solution best = null;
                for (long generation = 0; generation < this.N_ITERATIONS; ++generation) {
                    if (generation % 100000 == 0)
                        LOG.info("generation:" + generation + "/" + this.N_ITERATIONS + " " + (int) ((generation / (double) this.N_ITERATIONS) * 100.0) + "% " + String.valueOf(best));
                    Solution sol = createSolution();
                    sol.generation = generation;
                    if (best == null || sol.compareTo(best) < 0) {
                        best = sol;
                    /*
		    			if(LOG.isDebugEnabled())
		    				{
		    				LOG.info("%%generation:"+generation);
		    				best.print(stderr());
		    				}*/
                    }
                }
                if (best != null) {
                    best.print(pw);
                }
                pw.flush();
            }
        }
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
    }
}
Also used : BedLineReader(com.github.lindenb.jvarkit.bed.BedLineReader) IOException(java.io.IOException) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) Interval(htsjdk.samtools.util.Interval) PrintWriter(java.io.PrintWriter)

Example 5 with BedLineReader

use of com.github.lindenb.jvarkit.bed.BedLineReader in project jvarkit by lindenb.

the class Biostar178713 method doWork.

@Override
public int doWork(final List<String> args) {
    if (this.outputFile == null || !outputFile.getFileName().toString().endsWith(".zip")) {
        LOG.error("output file option  must be declared and must en with .zip");
        return -1;
    }
    final Set<String> inputs = IOUtils.unrollFiles(args);
    final List<BedLine> bedLines = new ArrayList<>();
    OutputStream fos = null;
    ZipOutputStream zout = null;
    try {
        if (inputs.isEmpty()) {
            try (BedLineReader r = new BedLineReader(stdin(), "stdin")) {
                bedLines.addAll(r.stream().collect(Collectors.toList()));
            }
        } else
            for (final String input : inputs) {
                try (BedLineReader r = new BedLineReader(IOUtils.openURIForBufferedReading(input), input)) {
                    bedLines.addAll(r.stream().collect(Collectors.toList()));
                }
            }
        LOG.info("sorting " + bedLines.size());
        Collections.sort(bedLines, new Comparator<BedLine>() {

            @Override
            public int compare(BedLine o1, BedLine o2) {
                int i = o1.getContig().compareTo(o2.getContig());
                if (i != 0)
                    return i;
                i = o1.getStart() - o2.getStart();
                if (i != 0)
                    return i;
                i = o1.getEnd() - o2.getEnd();
                return i;
            }
        });
        if (bedLines.isEmpty()) {
            LOG.error("no bed line found");
            return -1;
        }
        LOG.info("creating zip " + this.outputFile);
        fos = Files.newOutputStream(this.outputFile);
        zout = new ZipOutputStream(fos);
        int chunk = 0;
        while (!bedLines.isEmpty()) {
            ++chunk;
            final ZipEntry entry = new ZipEntry("bed" + String.format("%03d", chunk) + ".bed");
            LOG.info("creating " + entry.getName());
            zout.putNextEntry(entry);
            PrintWriter pw = new PrintWriter(zout);
            BedLine prev = bedLines.get(0);
            bedLines.remove(0);
            pw.println(prev.join());
            int n_in_entry = 1;
            int i = 0;
            while (i < bedLines.size()) {
                final BedLine curr = bedLines.get(i);
                final double distance = curr.getStart() - prev.getEnd();
                if (!prev.getContig().equals(curr.getContig()) || distance > this.distancebed) {
                    pw.println(curr.join());
                    prev = curr;
                    bedLines.remove(i);
                    ++n_in_entry;
                } else {
                    ++i;
                }
            }
            pw.flush();
            zout.closeEntry();
            LOG.info("closing " + entry.getName() + " N=" + n_in_entry);
        }
        zout.finish();
        zout.close();
        return 0;
    } catch (Throwable e) {
        LOG.error(e);
        return -1;
    } finally {
        CloserUtil.close(fos);
    }
}
Also used : OutputStream(java.io.OutputStream) ZipOutputStream(java.util.zip.ZipOutputStream) ZipEntry(java.util.zip.ZipEntry) ArrayList(java.util.ArrayList) BedLineReader(com.github.lindenb.jvarkit.bed.BedLineReader) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) ZipOutputStream(java.util.zip.ZipOutputStream) PrintWriter(java.io.PrintWriter)

Aggregations

BedLineReader (com.github.lindenb.jvarkit.bed.BedLineReader)15 PrintWriter (java.io.PrintWriter)11 ArrayList (java.util.ArrayList)11 List (java.util.List)11 Parameter (com.beust.jcommander.Parameter)10 BedLine (com.github.lindenb.jvarkit.util.bio.bed.BedLine)10 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)10 Program (com.github.lindenb.jvarkit.util.jcommander.Program)10 Logger (com.github.lindenb.jvarkit.util.log.Logger)10 SequenceDictionaryUtils (com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils)9 Path (java.nio.file.Path)9 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)8 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)7 Set (java.util.Set)7 IOUtils (com.github.lindenb.jvarkit.io.IOUtils)6 ContigNameConverter (com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter)6 ReferenceSequenceFile (htsjdk.samtools.reference.ReferenceSequenceFile)6 CloseableIterator (htsjdk.samtools.util.CloseableIterator)6 CloserUtil (htsjdk.samtools.util.CloserUtil)6 Locatable (htsjdk.samtools.util.Locatable)6