Search in sources :

Example 21 with SimpleInterval

use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.

the class VcfFisherCombinatorics method doWork.

@Override
public int doWork(final List<String> args) {
    try {
        final Pedigree pedigree = new PedigreeParser().parse(this.pedigreeFile);
        final IntervalTreeMap<List<GeneInfo>> geneInfoMap = new IntervalTreeMap<>();
        final Predicate<Genotype> isGtCnv = G -> G.isHet() || G.isHomVar();
        try (VCFIterator vcfIter = super.openVCFIterator(super.oneFileOrNull(args))) {
            final VCFHeader header = vcfIter.getHeader();
            final ContigNameConverter ctgConverter = ContigNameConverter.fromOneDictionary(SequenceDictionaryUtils.extractRequired(header));
            if (!header.hasGenotypingData()) {
                LOG.error("No Genotype data in " + header);
                return -1;
            }
            final Map<String, Integer> sample2index = header.getSampleNameToOffset();
            final List<Integer> casesIdx = pedigree.getSamplesInVcfHeader(header).filter(S -> S.isAffected()).map(S -> S.getId()).map(S -> sample2index.get(S)).sorted().collect(Collectors.toList());
            LOG.info("cases N=" + casesIdx.size());
            if (casesIdx.isEmpty()) {
                LOG.error("No affected/cases sample in the input VCF");
                return -1;
            }
            final List<Integer> controlsIdx = pedigree.getSamplesInVcfHeader(header).filter(S -> S.isUnaffected()).map(S -> S.getId()).map(S -> sample2index.get(S)).sorted().collect(Collectors.toList());
            LOG.info("controls N=" + controlsIdx.size());
            if (controlsIdx.isEmpty()) {
                LOG.error("No unaffected/controls sample in the input VCF");
                return -1;
            }
            final Predicate<VariantContext> acceptCtx = V -> {
                if (discard_bnd && V.getAttributeAsString(VCFConstants.SVTYPE, "").equals("BND"))
                    return false;
                if (discard_filtered && V.isFiltered())
                    return false;
                if (max_af < 1.0) {
                    final Iterator<Integer> it = Stream.concat(controlsIdx.stream(), casesIdx.stream()).iterator();
                    int ac = 0;
                    int an = 0;
                    while (it.hasNext()) {
                        switch(V.getGenotype(it.next()).getType()) {
                            case HOM_VAR:
                                ac += 2;
                                an += 2;
                                break;
                            case HOM_REF:
                                ac += 0;
                                an += 2;
                                break;
                            case HET:
                                ac += 1;
                                an += 2;
                                break;
                            default:
                                break;
                        }
                    }
                    if (an == 0)
                        return false;
                    if (ac / (double) an > max_af)
                        return false;
                }
                return true;
            };
            try (BedLineReader br = new BedLineReader(this.bedPath)) {
                while (br.hasNext()) {
                    final BedLine bed = br.next();
                    final String ctg = ctgConverter.apply(bed.getContig());
                    if (StringUtil.isBlank(ctg))
                        continue;
                    final GeneInfo geneInfo = new GeneInfo();
                    geneInfo.casesflags = new BitSet(casesIdx.size());
                    geneInfo.controlsflags = new BitSet(controlsIdx.size());
                    geneInfo.contig = ctg;
                    geneInfo.name = bed.getOrDefault(3, "undefined");
                    geneInfo.parts.add(new SimpleInterval(bed).renameContig(ctg));
                    final Interval key = new Interval(geneInfo);
                    List<GeneInfo> L = geneInfoMap.get(key);
                    if (L == null) {
                        L = new ArrayList<>();
                        geneInfoMap.put(key, L);
                    }
                    L.add(geneInfo);
                }
            }
            if (geneInfoMap.isEmpty()) {
                LOG.error("no gene found in " + this.bedPath);
                return -1;
            }
            LOG.info("reading variants...");
            while (vcfIter.hasNext()) {
                final VariantContext ctx = vcfIter.next();
                if (!acceptCtx.test(ctx))
                    continue;
                for (List<GeneInfo> gil : geneInfoMap.getOverlapping(ctx)) {
                    for (GeneInfo gi : gil) {
                        for (int y = 0; y < casesIdx.size(); ++y) {
                            final Genotype gt = ctx.getGenotype(casesIdx.get(y));
                            if (!isGtCnv.test(gt))
                                continue;
                            gi.casesflags.set(y);
                        }
                        for (int y = 0; y < controlsIdx.size(); ++y) {
                            final Genotype gt = ctx.getGenotype(controlsIdx.get(y));
                            if (!isGtCnv.test(gt))
                                continue;
                            gi.controlsflags.set(y);
                        }
                    }
                }
            }
            /* remove Genes without variant , count sample per gene*/
            for (List<GeneInfo> gil : geneInfoMap.values()) {
                gil.removeIf(GI -> GI.casesflags.nextSetBit(0) == -1 && GI.controlsflags.nextSetBit(0) == -1);
            }
            // end remove
            /* load bed file */
            final List<GeneInfo> geneList = geneInfoMap.values().stream().filter(L -> !L.isEmpty()).flatMap(L -> L.stream()).sorted().collect(Collectors.toList());
            if (geneList.isEmpty()) {
                LOG.error("no gene found in " + this.bedPath);
                return -1;
            }
            LOG.info("N Genes=" + geneList.size());
            Solution best = null;
            for (int i = 2; i < Math.min(this.max_num_genes, geneList.size()); i++) {
                LOG.info("sarting loop from " + i);
                best = recursion(geneList, casesIdx, controlsIdx, new ArrayList<>(), i, best);
            }
            try (PrintWriter pw = super.openPathOrStdoutAsPrintWriter(this.outputFile)) {
                pw.println(best);
                pw.flush();
            }
        }
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
    }
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Program(com.github.lindenb.jvarkit.util.jcommander.Program) VCFIterator(htsjdk.variant.vcf.VCFIterator) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) ArrayList(java.util.ArrayList) BedLineReader(com.github.lindenb.jvarkit.bed.BedLineReader) Interval(htsjdk.samtools.util.Interval) StringUtil(htsjdk.samtools.util.StringUtil) FisherExactTest(com.github.lindenb.jvarkit.math.stats.FisherExactTest) PedigreeParser(com.github.lindenb.jvarkit.pedigree.PedigreeParser) Map(java.util.Map) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) Path(java.nio.file.Path) VCFConstants(htsjdk.variant.vcf.VCFConstants) PrintWriter(java.io.PrintWriter) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) Locatable(htsjdk.samtools.util.Locatable) Iterator(java.util.Iterator) Pedigree(com.github.lindenb.jvarkit.pedigree.Pedigree) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap) Predicate(java.util.function.Predicate) Logger(com.github.lindenb.jvarkit.util.log.Logger) Collectors(java.util.stream.Collectors) List(java.util.List) Stream(java.util.stream.Stream) FractionConverter(com.github.lindenb.jvarkit.jcommander.converter.FractionConverter) VariantContext(htsjdk.variant.variantcontext.VariantContext) BitSet(java.util.BitSet) Collections(java.util.Collections) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) BedLineReader(com.github.lindenb.jvarkit.bed.BedLineReader) PedigreeParser(com.github.lindenb.jvarkit.pedigree.PedigreeParser) VCFIterator(htsjdk.variant.vcf.VCFIterator) Iterator(java.util.Iterator) ArrayList(java.util.ArrayList) List(java.util.List) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) VCFHeader(htsjdk.variant.vcf.VCFHeader) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) VCFIterator(htsjdk.variant.vcf.VCFIterator) PrintWriter(java.io.PrintWriter) BitSet(java.util.BitSet) Genotype(htsjdk.variant.variantcontext.Genotype) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) Pedigree(com.github.lindenb.jvarkit.pedigree.Pedigree) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap) Interval(htsjdk.samtools.util.Interval) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval)

Example 22 with SimpleInterval

use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.

the class CompareBamAndBuild method doWork.

@Override
public int doWork(final List<String> args) {
    PrintWriter out = null;
    SortingCollection<Match> database = null;
    if (this.chainFile == null) {
        LOG.error("Chain file is not defined Option");
        return -1;
    }
    if (args.size() != 2) {
        LOG.error("Illegal number of arguments. Expected two indexed BAMS.");
        return -1;
    }
    try {
        LOG.info("load chain file");
        this.liftOver = new LiftOver(this.chainFile);
        database = SortingCollection.newInstance(Match.class, new MatchCodec(), new MatchOrdererInSortingCollection(), this.maxRecordsInRam, this.tmpDir.toPath());
        database.setDestructiveIteration(true);
        for (int currentSamFileIndex = 0; currentSamFileIndex < 2; currentSamFileIndex++) {
            final File samFile = new File(args.get(currentSamFileIndex));
            LOG.info("read " + samFile);
            this.bamFiles[currentSamFileIndex] = samFile;
            SamReader samFileReader = CompareBamAndBuild.this.openSamReader(samFile.getPath());
            final SAMSequenceDictionary dict = samFileReader.getFileHeader().getSequenceDictionary();
            this.sequenceDictionaries[currentSamFileIndex] = dict;
            if (dict.isEmpty()) {
                samFileReader.close();
                LOG.error("Empty Dict  in " + samFile);
                return -1;
            }
            final SimpleInterval interval;
            if (!StringUtils.isBlank(this.REGION)) {
                final Function<String, Optional<SimpleInterval>> intervalParser = IntervalParserFactory.newInstance().dictionary(dict).make();
                interval = intervalParser.apply(this.REGION).orElseThrow(IntervalParserFactory.exception(this.REGION));
            } else {
                interval = null;
            }
            final SAMRecordIterator iter;
            if (interval == null) {
                iter = samFileReader.iterator();
            } else {
                iter = samFileReader.queryOverlapping(interval.getContig(), interval.getStart(), interval.getEnd());
            }
            final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
            while (iter.hasNext()) {
                final SAMRecord rec = progress.watch(iter.next());
                if (rec.isSecondaryOrSupplementary())
                    continue;
                final Match m = new Match();
                m.flag = rec.getFlags();
                m.readName = rec.getReadName();
                m.firstBamFile = currentSamFileIndex == 0;
                if (rec.getReadUnmappedFlag()) {
                    m.tid = -1;
                    m.pos = -1;
                } else {
                    m.tid = rec.getReferenceIndex();
                    m.pos = rec.getAlignmentStart();
                }
                database.add(m);
            }
            iter.close();
            samFileReader.close();
            samFileReader = null;
            LOG.info("Close " + samFile);
        }
        database.doneAdding();
        LOG.info("Writing results....");
        out = super.openFileOrStdoutAsPrintWriter(this.outputFile);
        // compute the differences for each read
        out.print("#READ-Name\tCOMPARE");
        for (final File f : this.bamFiles) {
            out.print("\t" + f);
        }
        out.println();
        /* create an array of set<Match> */
        final MatchComparator match_comparator = new MatchComparator();
        final List<Set<Match>> matches = new ArrayList<Set<CompareBamAndBuild.Match>>(2);
        while (matches.size() < 2) {
            matches.add(new TreeSet<CompareBamAndBuild.Match>(match_comparator));
        }
        CloseableIterator<Match> iter = database.iterator();
        String currReadName = null;
        int curr_num_in_pair = -1;
        for (; ; ) {
            final Match nextMatch;
            if (iter.hasNext()) {
                nextMatch = iter.next();
            } else {
                nextMatch = null;
            }
            if (nextMatch == null || (currReadName != null && !currReadName.equals(nextMatch.readName)) || (curr_num_in_pair != -1 && curr_num_in_pair != nextMatch.indexInPair())) {
                if (currReadName != null) {
                    out.print(currReadName);
                    if (curr_num_in_pair > 0) {
                        out.print("/");
                        out.print(curr_num_in_pair);
                    }
                    out.print("\t");
                    if (same(matches.get(0), matches.get(1))) {
                        out.print("EQ");
                    } else {
                        out.print("NE");
                    }
                    for (int x = 0; x < 2; ++x) {
                        out.print("\t");
                        print(out, matches.get(x));
                    }
                    out.println();
                }
                if (nextMatch == null)
                    break;
                for (final Set<Match> set : matches) set.clear();
            }
            currReadName = nextMatch.readName;
            curr_num_in_pair = nextMatch.indexInPair();
            matches.get(nextMatch.firstBamFile ? 0 : 1).add(nextMatch);
        }
        iter.close();
        out.flush();
        out.close();
        database.cleanup();
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(out);
        this.liftOver = null;
    }
}
Also used : LiftOver(htsjdk.samtools.liftover.LiftOver) TreeSet(java.util.TreeSet) Set(java.util.Set) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) ArrayList(java.util.ArrayList) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SamReader(htsjdk.samtools.SamReader) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) PrintWriter(java.io.PrintWriter) Optional(java.util.Optional) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) IOException(java.io.IOException) SAMRecord(htsjdk.samtools.SAMRecord) File(java.io.File)

Example 23 with SimpleInterval

use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.

the class FindGVCFsBlocks method doWork.

@Override
public int doWork(final List<String> args) {
    try {
        if (this.bedPath != null) {
            IOUtil.assertFileIsReadable(this.bedPath);
        }
        final List<Path> inputs = IOUtils.unrollPaths(args);
        if (inputs.isEmpty()) {
            LOG.error("input missing");
            return -1;
        }
        if (merge_blocks_distance < 1) {
            LOG.error("bad merge size : " + merge_blocks_distance);
            return -1;
        }
        final Predicate<Locatable> inCapture;
        if (this.bedPath != null) {
            final IntervalTreeMap<Boolean> intervalTreeMap;
            try (BedLineReader blr = new BedLineReader(this.bedPath)) {
                intervalTreeMap = blr.toIntervalTreeMap(BED -> Boolean.TRUE);
            }
            inCapture = (L) -> intervalTreeMap.containsOverlapping(L);
        } else {
            inCapture = (L) -> true;
        }
        if (this.outputFile != null) {
            final String fname = this.outputFile.getFileName().toString();
            if (!fname.endsWith(FileExtensions.INTERVAL_LIST) && !fname.endsWith(FileExtensions.COMPRESSED_INTERVAL_LIST)) {
                LOG.error("Output should end with " + FileExtensions.INTERVAL_LIST + " or " + FileExtensions.COMPRESSED_INTERVAL_LIST);
                return -1;
            }
        }
        final SAMSequenceDictionary dict = SequenceDictionaryUtils.extractRequired(inputs.get(0));
        if (!StringUtils.isBlank(the_contig) && dict.getSequence(the_contig) == null)
            throw new JvarkitException.ContigNotFoundInDictionary(the_contig, dict);
        try (IntervalListWriter w = new IntervalListWriter(this.outputFile, dict)) {
            /**
             * loop over chromosomes
             */
            for (final SAMSequenceRecord ssr : dict.getSequences()) {
                ContigBlocks mainBlock = null;
                if (!StringUtils.isBlank(the_contig) && !ssr.getContig().equals(the_contig))
                    continue;
                final long initMilliSec = System.currentTimeMillis();
                for (int i = 0; i < inputs.size(); i++) {
                    final long startMilliSec = System.currentTimeMillis();
                    LOG.info(inputs.get(i) + " " + (i + 1) + "/" + inputs.size());
                    mainBlock = scanVcfFile(mainBlock, dict, ssr, inputs.get(i));
                    final long count_variants = mainBlock.end_positions.size();
                    final long millisecPerVcf = (System.currentTimeMillis() - initMilliSec) / (i + 1L);
                    LOG.info("N=" + count_variants + ". That took: " + StringUtils.niceDuration(System.currentTimeMillis() - startMilliSec) + " Remains: " + StringUtils.niceDuration((inputs.size() - (i + 1)) * millisecPerVcf));
                }
                /* nothing was seen */
                if (mainBlock.minPos == null)
                    continue;
                if (mainBlock.end_positions.isEmpty()) {
                    final Locatable loc = new SimpleInterval(ssr.getContig(), mainBlock.minPos, mainBlock.maxPos);
                    if (inCapture.test(loc)) {
                        w.add(loc);
                    }
                    continue;
                }
                final List<Locatable> intervals = new ArrayList<>(mainBlock.end_positions.size() + 1);
                final int pos1 = mainBlock.end_positions.stream().mapToInt(P -> P.intValue()).min().getAsInt();
                if (mainBlock.minPos < pos1) {
                    final Locatable loc = new SimpleInterval(ssr.getContig(), mainBlock.minPos, pos1 - 1);
                    if (inCapture.test(loc)) {
                        intervals.add(loc);
                    }
                }
                Integer prev = null;
                for (Integer p : mainBlock.end_positions) {
                    if (prev != null) {
                        final Locatable loc = new SimpleInterval(ssr.getContig(), prev + 1, p);
                        if (inCapture.test(loc)) {
                            intervals.add(loc);
                        }
                    }
                    prev = p;
                }
                final int pos2 = mainBlock.end_positions.stream().mapToInt(P -> P.intValue()).max().getAsInt();
                if (mainBlock.maxPos > pos2) {
                    final Locatable loc = new SimpleInterval(ssr.getContig(), pos2 + 1, mainBlock.maxPos);
                    if (inCapture.test(loc)) {
                        intervals.add(loc);
                    }
                }
                Collections.sort(intervals, (A, B) -> Integer.compare(A.getStart(), B.getStart()));
                while (!intervals.isEmpty()) {
                    Locatable loc = intervals.remove(0);
                    while (this.min_block_size > 0 && !intervals.isEmpty()) {
                        final Locatable loc2 = intervals.get(0);
                        if (!loc2.withinDistanceOf(loc, merge_blocks_distance))
                            break;
                        if (CoordMath.getLength(loc.getStart(), loc2.getEnd()) > this.min_block_size)
                            break;
                        // consumme loc2
                        intervals.remove(0);
                        loc = new SimpleInterval(loc.getContig(), loc.getStart(), loc2.getEnd());
                    }
                    w.add(loc);
                }
            }
        }
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
    }
}
Also used : Path(java.nio.file.Path) CloseableIterator(htsjdk.samtools.util.CloseableIterator) Allele(htsjdk.variant.variantcontext.Allele) SequenceUtil(htsjdk.samtools.util.SequenceUtil) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) IOUtil(htsjdk.samtools.util.IOUtil) VCFHeader(htsjdk.variant.vcf.VCFHeader) SAMFileHeader(htsjdk.samtools.SAMFileHeader) TreeSet(java.util.TreeSet) SAMTextHeaderCodec(htsjdk.samtools.SAMTextHeaderCodec) ArrayList(java.util.ArrayList) BedLineReader(com.github.lindenb.jvarkit.bed.BedLineReader) DistanceParser(com.github.lindenb.jvarkit.util.bio.DistanceParser) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VCFReaderFactory(com.github.lindenb.jvarkit.variant.vcf.VCFReaderFactory) OutputStreamWriter(java.io.OutputStreamWriter) Path(java.nio.file.Path) VCFConstants(htsjdk.variant.vcf.VCFConstants) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) Locatable(htsjdk.samtools.util.Locatable) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap) BufferedWriter(java.io.BufferedWriter) Predicate(java.util.function.Predicate) Logger(com.github.lindenb.jvarkit.util.log.Logger) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) VCFReader(htsjdk.variant.vcf.VCFReader) Set(java.util.Set) IOException(java.io.IOException) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) VariantContextComparator(htsjdk.variant.variantcontext.VariantContextComparator) List(java.util.List) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) FileExtensions(htsjdk.samtools.util.FileExtensions) Closeable(java.io.Closeable) CoordMath(htsjdk.samtools.util.CoordMath) VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) Collections(java.util.Collections) ArrayList(java.util.ArrayList) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) BedLineReader(com.github.lindenb.jvarkit.bed.BedLineReader) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) Locatable(htsjdk.samtools.util.Locatable)

Example 24 with SimpleInterval

use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.

the class VcfBurdenSlidingWindow method runBurden.

@Override
protected void runBurden(final PrintWriter pw, final VCFReader vcfReader, final VariantContextWriter vcw) throws IOException {
    if (this.window_size <= 0)
        throw new IllegalArgumentException("bad window size: " + this.window_size);
    if (this.window_shift <= 0)
        throw new IllegalArgumentException("bad window shift:" + this.window_shift);
    final SAMSequenceDictionary vcfDict = SequenceDictionaryUtils.extractRequired(vcfReader.getHeader());
    pw.print("#chrom");
    pw.print("\t");
    pw.print("start0");
    pw.print("\t");
    pw.print("end");
    pw.print("\t");
    pw.print("name");
    pw.print("\t");
    pw.print("length");
    pw.print("\t");
    pw.print("p-value");
    pw.print("\t");
    pw.print("affected_alt");
    pw.print("\t");
    pw.print("affected_hom");
    pw.print("\t");
    pw.print("unaffected_alt");
    pw.print("\t");
    pw.print("unaffected_hom");
    pw.print("\t");
    pw.print("variants.count");
    pw.println();
    final ProgressFactory.Watcher<Locatable> progress = ProgressFactory.newInstance().logger(LOG).dictionary(vcfDict).build();
    CloseableIterator<VariantContext> iter;
    if (!StringUtils.isBlank(this.limitContig)) {
        final SAMSequenceRecord ssr = vcfDict.getSequence(this.limitContig);
        if (ssr == null)
            throw new JvarkitException.ContigNotFoundInDictionary(limitContig, vcfDict);
        iter = vcfReader.query(ssr);
    } else {
        iter = vcfReader.iterator();
    }
    final SlidingWindowIterator<VariantContext> iter2 = new SlidingWindowIterator<>(iter.stream().filter(CTX -> accept(CTX)).iterator(), this.window_size, this.window_shift);
    while (iter2.hasNext()) {
        final Map.Entry<? extends Locatable, List<VariantContext>> entry = iter2.next();
        final Locatable W = progress.apply(entry.getKey());
        final FisherResult fisher = runFisher(entry.getValue());
        if (fisher.p_value > this.fisherTreshold)
            continue;
        pw.print(W.getContig());
        pw.print("\t");
        pw.print(W.getStart() - 1);
        pw.print("\t");
        pw.print(W.getEnd());
        pw.print("\t");
        pw.print(new SimpleInterval(W).toString());
        pw.print("\t");
        pw.print(W.getLengthOnReference());
        pw.print("\t");
        pw.print(fisher.p_value);
        pw.print("\t");
        pw.print(fisher.affected_alt);
        pw.print("\t");
        pw.print(fisher.affected_hom);
        pw.print("\t");
        pw.print(fisher.unaffected_alt);
        pw.print("\t");
        pw.print(fisher.unaffected_hom);
        pw.print("\t");
        pw.print(entry.getValue().size());
        pw.println();
        if (vcw != null) {
            for (final VariantContext ctx : entry.getValue()) {
                vcw.add(new VariantContextBuilder(ctx).attribute(BURDEN_KEY, VCFUtils.escapeInfoField(W.toString())).make());
            }
        }
    }
    iter.close();
    progress.close();
}
Also used : ProgressFactory(com.github.lindenb.jvarkit.util.log.ProgressFactory) VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SlidingWindowIterator(com.github.lindenb.jvarkit.iterator.SlidingWindowIterator) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) List(java.util.List) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) Map(java.util.Map) Locatable(htsjdk.samtools.util.Locatable)

Example 25 with SimpleInterval

use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.

the class VcfToBed method scan.

private int scan(String uriOrNull, final VCFIterator iter, final PrintWriter pw) {
    try {
        final SAMSequenceDictionary dictIn = iter.getHeader().getSequenceDictionary();
        final IntervalExtender extender = IntervalExtender.of(dictIn, this.extendsStr);
        if (extender.isShriking()) {
            throw new IllegalArgumentException("shrinking interval are not supported " + extender);
        }
        final ContigNameConverter ctgNameConverter;
        final Function<String, SAMSequenceRecord> funGetSSR;
        if (this.samSequenceDictionary != null) {
            ctgNameConverter = ContigNameConverter.fromOneDictionary(this.samSequenceDictionary);
            funGetSSR = C -> samSequenceDictionary.getSequence(C);
        } else if (dictIn != null) {
            ctgNameConverter = ContigNameConverter.fromOneDictionary(dictIn);
            funGetSSR = C -> dictIn.getSequence(C);
        } else {
            ctgNameConverter = null;
            funGetSSR = null;
        }
        while (iter.hasNext()) {
            final VariantContext ctx = iter.next();
            int start = ctx.getStart();
            int end = ctx.getEnd();
            if (!this.ignoreCi && (ctx.hasAttribute("CIPOS") || ctx.hasAttribute("CIEND"))) {
                if (ctx.hasAttribute("CIPOS")) {
                    int x = 0;
                    try {
                        x = ctx.getAttributeAsIntList("CIPOS", 0).get(0);
                    } catch (final Throwable err) {
                        LOG.error(err);
                        x = 0;
                    }
                    start += x;
                }
                if (ctx.hasAttribute("CIEND")) {
                    int x = 0;
                    try {
                        x = ctx.getAttributeAsIntList("CIEND", 0).get(1);
                    } catch (final Throwable err) {
                        LOG.error(err);
                        x = 0;
                    }
                    end += x;
                }
                // it happens for SV=BND
                if (start > end) {
                    final int tmp = start;
                    start = end;
                    end = tmp;
                }
            }
            if (extender.isChanging()) {
                final Locatable extended = extender.apply(new SimpleInterval(ctx.getContig(), start, end));
                start = extended.getStart();
                end = extended.getEnd();
            }
            final String newtContig;
            if (ctgNameConverter != null) {
                newtContig = ctgNameConverter.apply(ctx.getContig());
                if (StringUtil.isBlank(newtContig)) {
                    this.contigsNotFound.add(ctx.getContig());
                    continue;
                }
            } else {
                newtContig = ctx.getContig();
            }
            if (funGetSSR != null) {
                final SAMSequenceRecord ssr = funGetSSR.apply(newtContig);
                if (ssr != null) {
                    end = Math.min(ssr.getSequenceLength(), end);
                }
            }
            start = Math.max(1, start);
            final SimpleInterval interval = new SimpleInterval(newtContig, start, end);
            if (interval.getStart() > interval.getEnd()) {
                LOG.info("negative interval " + interval + " for " + ctx + " in " + uriOrNull);
                continue;
            }
            if (this.minLength != -1 && interval.getLengthOnReference() < this.minLength)
                continue;
            if (this.maxLength != -1 && interval.getLengthOnReference() > this.maxLength)
                continue;
            switch(this.outputFormat) {
                case bed:
                    {
                        pw.print(interval.getContig());
                        pw.print("\t");
                        pw.print(interval.getStart() - 1);
                        pw.print("\t");
                        pw.print(interval.getEnd());
                        pw.print("\t");
                        pw.print(ctx.hasID() ? ctx.getID() : ctx.getAlleles().stream().map(A -> A.getDisplayString()).collect(Collectors.joining(",")));
                        pw.print("\t");
                        pw.print(ctx.hasLog10PError() ? Math.min(1000, (int) ctx.getPhredScaledQual()) : ".");
                        /*
						pw.print("\t");
						pw.print("+");
						pw.print("\t");
						pw.print(interval.getStart()-1);
						pw.print("\t");
						pw.print(interval.getEnd());
						pw.print("\t");
						pw.print("0,0,0");
						*/
                        pw.println();
                        break;
                    }
                case interval:
                    {
                        pw.print(interval.getContig());
                        pw.print(":");
                        pw.print(interval.getStart());
                        pw.print("-");
                        pw.print(interval.getEnd());
                        pw.println();
                        break;
                    }
                default:
                    {
                        throw new IllegalStateException("" + this.outputFormat);
                    }
            }
        }
        return 0;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    }
}
Also used : IntervalExtender(com.github.lindenb.jvarkit.samtools.util.IntervalExtender) GZIPInputStream(java.util.zip.GZIPInputStream) BufferedInputStream(java.io.BufferedInputStream) ZipInputStream(java.util.zip.ZipInputStream) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Program(com.github.lindenb.jvarkit.util.jcommander.Program) VCFIterator(htsjdk.variant.vcf.VCFIterator) Parameter(com.beust.jcommander.Parameter) TarArchiveInputStream(org.apache.commons.compress.archivers.tar.TarArchiveInputStream) Function(java.util.function.Function) SAMFileHeader(htsjdk.samtools.SAMFileHeader) TreeSet(java.util.TreeSet) SAMTextHeaderCodec(htsjdk.samtools.SAMTextHeaderCodec) NoCloseInputStream(com.github.lindenb.jvarkit.io.NoCloseInputStream) DistanceParser(com.github.lindenb.jvarkit.util.bio.DistanceParser) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) TarArchiveEntry(org.apache.commons.compress.archivers.tar.TarArchiveEntry) StringUtil(htsjdk.samtools.util.StringUtil) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) Path(java.nio.file.Path) ZipEntry(java.util.zip.ZipEntry) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintWriter(java.io.PrintWriter) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) Locatable(htsjdk.samtools.util.Locatable) Files(java.nio.file.Files) Logger(com.github.lindenb.jvarkit.util.log.Logger) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Set(java.util.Set) VCFIteratorBuilder(htsjdk.variant.vcf.VCFIteratorBuilder) Collectors(java.util.stream.Collectors) List(java.util.List) Paths(java.nio.file.Paths) FileExtensions(htsjdk.samtools.util.FileExtensions) VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) InputStream(java.io.InputStream) VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) IntervalExtender(com.github.lindenb.jvarkit.samtools.util.IntervalExtender) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Locatable(htsjdk.samtools.util.Locatable)

Aggregations

SimpleInterval (com.github.lindenb.jvarkit.samtools.util.SimpleInterval)71 ArrayList (java.util.ArrayList)49 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)47 List (java.util.List)47 Locatable (htsjdk.samtools.util.Locatable)46 Path (java.nio.file.Path)46 Parameter (com.beust.jcommander.Parameter)43 Program (com.github.lindenb.jvarkit.util.jcommander.Program)43 Logger (com.github.lindenb.jvarkit.util.log.Logger)43 SequenceDictionaryUtils (com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils)39 Collectors (java.util.stream.Collectors)38 Set (java.util.Set)37 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)36 IOUtils (com.github.lindenb.jvarkit.io.IOUtils)35 ContigNameConverter (com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter)35 SAMFileHeader (htsjdk.samtools.SAMFileHeader)34 CloserUtil (htsjdk.samtools.util.CloserUtil)34 CloseableIterator (htsjdk.samtools.util.CloseableIterator)33 StringUtils (com.github.lindenb.jvarkit.lang.StringUtils)32 SamReader (htsjdk.samtools.SamReader)32