Search in sources :

Example 46 with SimpleInterval

use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.

the class KnownDeletion method doWork.

@Override
public int doWork(final List<String> args) {
    if (this.extendDistance <= 0) {
        LOG.error("bad extend factor " + this.extendDistance);
        return -1;
    }
    final Function<SAMRecord, Integer> mateEnd = REC -> SAMUtils.getMateCigar(REC) != null ? SAMUtils.getMateAlignmentEnd(REC) : REC.getMateAlignmentStart();
    PrintWriter pw = null;
    SAMFileWriter sfw = null;
    try {
        pw = super.openPathOrStdoutAsPrintWriter(this.outputFile);
        final SamReaderFactory srf = SamReaderFactory.makeDefault().referenceSequence(this.faidx).validationStringency(ValidationStringency.LENIENT);
        final List<String> filenames = IOUtils.unrollStrings2018(args);
        if (this.bamOut != null && !filenames.isEmpty()) {
            SAMSequenceDictionary theDict = null;
            final Set<String> samples = new TreeSet<>();
            for (final String filename : filenames) {
                final SamInputResource sri = SamInputResource.of(filename);
                try (final SamReader samReader = srf.open(sri)) {
                    final SAMFileHeader header = samReader.getFileHeader();
                    final SAMSequenceDictionary dict = SequenceDictionaryUtils.extractRequired(header);
                    if (theDict == null) {
                        theDict = dict;
                    } else if (!SequenceUtil.areSequenceDictionariesEqual(theDict, dict)) {
                        throw new JvarkitException.DictionariesAreNotTheSame(theDict, dict);
                    }
                    final String sampleName = header.getReadGroups().stream().map(R -> R.getSample()).filter(S -> !StringUtil.isBlank(S)).findFirst().orElse(filename);
                    samples.add(sampleName);
                }
            }
            final SAMFileHeader header = new SAMFileHeader(theDict);
            header.setSortOrder(SAMFileHeader.SortOrder.unsorted);
            JVarkitVersion.getInstance().addMetaData(this, header);
            for (final String sn : samples) {
                final SAMReadGroupRecord srg = new SAMReadGroupRecord(sn);
                srg.setSample(sn);
                header.addReadGroup(srg);
            }
            final SAMFileWriterFactory swf = new SAMFileWriterFactory();
            sfw = swf.makeSAMOrBAMWriter(header, true, this.bamOut);
        }
        for (final String filename : IOUtils.unrollStrings2018(args)) {
            final SamInputResource sri = SamInputResource.of(filename);
            try (final SamReader samReader = srf.open(sri)) {
                final SAMFileHeader header = samReader.getFileHeader();
                final SAMSequenceDictionary dict = header.getSequenceDictionary();
                final String sampleName = header.getReadGroups().stream().map(R -> R.getSample()).filter(S -> !StringUtil.isBlank(S)).findFirst().orElse(filename);
                final Function<String, Optional<SimpleInterval>> parser = IntervalParserFactory.newInstance().dictionary(dict).make();
                final SimpleInterval interval = parser.apply(this.regionStr).orElseThrow(IntervalParserFactory.exception(this.regionStr));
                final SAMSequenceRecord ssr = Objects.requireNonNull(dict.getSequence(interval.getContig()));
                final int tid = ssr.getSequenceIndex();
                final QueryInterval qi1 = new QueryInterval(tid, Math.max(0, interval.getStart() - this.extendDistance), Math.min(interval.getStart() + this.extendDistance, ssr.getSequenceLength()));
                final QueryInterval qi2 = new QueryInterval(tid, Math.max(0, interval.getEnd() - this.extendDistance), Math.min(interval.getEnd() + this.extendDistance, ssr.getSequenceLength()));
                if (CoordMath.overlaps(qi1.start, qi1.end, qi2.start, qi2.end)) {
                    LOG.error("after extending the boundaries with " + this.extendDistance + ". They both overlap...");
                    return -1;
                }
                long count_disc = 0L;
                long count_split = 0L;
                long count_del = 0L;
                final QueryInterval[] intervals = QueryInterval.optimizeIntervals(new QueryInterval[] { qi1, qi2 });
                try (final CloseableIterator<SAMRecord> iter = samReader.query(intervals, false)) {
                    while (iter.hasNext()) {
                        final SAMRecord rec = iter.next();
                        if (rec.getReadUnmappedFlag())
                            continue;
                        if (this.samRecordFilter.filterOut(rec))
                            continue;
                        if (rec.getStart() <= qi1.end && rec.getEnd() >= qi2.start) {
                            count_del++;
                            if (sfw != null) {
                                rec.setAttribute("RG", sampleName);
                                sfw.addAlignment(rec);
                            }
                            continue;
                        }
                        if (rec.getReadPairedFlag() && !rec.getMateUnmappedFlag() && rec.getMateReferenceIndex().equals(tid) && ((CoordMath.overlaps(rec.getStart(), rec.getEnd(), qi1.start, qi1.end) && CoordMath.overlaps(rec.getMateAlignmentStart(), mateEnd.apply(rec), qi2.start, qi2.end)) || (CoordMath.overlaps(rec.getStart(), rec.getEnd(), qi2.start, qi2.end) && CoordMath.overlaps(rec.getMateAlignmentStart(), mateEnd.apply(rec), qi1.start, qi1.end)))) {
                            count_disc++;
                            if (sfw != null) {
                                rec.setAttribute("RG", sampleName);
                                sfw.addAlignment(rec);
                            }
                            continue;
                        }
                        for (final SAMRecord rec2 : SAMUtils.getOtherCanonicalAlignments(rec)) {
                            if (rec2.getReferenceIndex().equals(tid) && ((CoordMath.overlaps(rec.getStart(), rec.getEnd(), qi1.start, qi1.end) && CoordMath.overlaps(rec2.getStart(), rec2.getEnd(), qi2.start, qi2.end)) || (CoordMath.overlaps(rec.getStart(), rec.getEnd(), qi2.start, qi2.end) && CoordMath.overlaps(rec2.getStart(), rec2.getEnd(), qi1.start, qi1.end)))) {
                                count_split++;
                                if (sfw != null) {
                                    rec.setAttribute("RG", sampleName);
                                    sfw.addAlignment(rec);
                                }
                                break;
                            }
                        }
                    }
                }
                pw.println(filename + "\t" + sampleName + "\t" + this.regionStr + "\t" + count_disc + "\t" + count_split + "\t" + count_del + "\t" + (count_del + count_disc + count_split));
            }
        }
        pw.flush();
        if (sfw != null)
            sfw.close();
        sfw = null;
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(sfw);
    }
}
Also used : CloseableIterator(htsjdk.samtools.util.CloseableIterator) SequenceUtil(htsjdk.samtools.util.SequenceUtil) Program(com.github.lindenb.jvarkit.util.jcommander.Program) SAMUtils(htsjdk.samtools.SAMUtils) Parameter(com.beust.jcommander.Parameter) IntervalParserFactory(com.github.lindenb.jvarkit.samtools.util.IntervalParserFactory) Function(java.util.function.Function) SAMFileHeader(htsjdk.samtools.SAMFileHeader) ValidationStringency(htsjdk.samtools.ValidationStringency) TreeSet(java.util.TreeSet) DistanceParser(com.github.lindenb.jvarkit.util.bio.DistanceParser) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) StringUtil(htsjdk.samtools.util.StringUtil) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) Path(java.nio.file.Path) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintWriter(java.io.PrintWriter) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) Logger(com.github.lindenb.jvarkit.util.log.Logger) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Set(java.util.Set) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SamReader(htsjdk.samtools.SamReader) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) SAMRecord(htsjdk.samtools.SAMRecord) Objects(java.util.Objects) SamRecordFilter(htsjdk.samtools.filter.SamRecordFilter) List(java.util.List) SamInputResource(htsjdk.samtools.SamInputResource) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) QueryInterval(htsjdk.samtools.QueryInterval) CoordMath(htsjdk.samtools.util.CoordMath) SamRecordFilterFactory(com.github.lindenb.jvarkit.util.bio.samfilter.SamRecordFilterFactory) Optional(java.util.Optional) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) QueryInterval(htsjdk.samtools.QueryInterval) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SamInputResource(htsjdk.samtools.SamInputResource) SamReader(htsjdk.samtools.SamReader) TreeSet(java.util.TreeSet) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) PrintWriter(java.io.PrintWriter) SamReaderFactory(htsjdk.samtools.SamReaderFactory) Optional(java.util.Optional) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Example 47 with SimpleInterval

use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.

the class SamShortInvertion method dump.

private void dump(final SAMSequenceDictionary dict, final IntervalTreeMap<List<Arc>> database, final VariantContextWriter vcw, final Set<String> samples, final Integer before) {
    if (this.debug)
        LOG.debug("dump");
    final Allele REF = Allele.create("N", true);
    final Allele SPLIT = Allele.create("<INV>", false);
    final Comparator<Locatable> cmp = new ContigDictComparator(dict).createLocatableComparator();
    final List<SimpleInterval> intervals = database.keySet().stream().filter(R -> (before == null ? true : R.getEnd() > before.intValue())).map(R -> new SimpleInterval(R)).sorted(cmp).collect(Collectors.toList());
    for (final SimpleInterval interval0 : intervals) {
        final List<Arc> arcs = database.getOverlapping(interval0).stream().flatMap(L -> L.stream()).filter(A -> !A.consummed).filter(A -> testOverlapping(interval0, A)).collect(Collectors.toList());
        if (arcs.isEmpty())
            continue;
        if (!keep_flag)
            arcs.forEach(A -> A.consummed = true);
        int maxdp = 0;
        final VariantContextBuilder vcb = new VariantContextBuilder();
        final Set<Allele> alleles = new HashSet<>();
        alleles.add(REF);
        final List<Genotype> genotypes = new ArrayList<>(samples.size());
        vcb.chr(dict.getSequence(arcs.get(0).tid).getSequenceName());
        final int chromStart = arcs.stream().mapToInt(A -> A.chromStart).min().getAsInt();
        vcb.start(chromStart);
        final int chromEnd = arcs.stream().mapToInt(A -> A.chromEnd).max().getAsInt();
        vcb.stop(chromEnd);
        vcb.attribute(VCFConstants.END_KEY, chromEnd);
        vcb.attribute("SVLEN", CoordMath.getLength(chromStart, chromEnd));
        int depth = 0;
        final Set<String> gotSamples = new TreeSet<>();
        for (final String sample : samples) {
            final List<Arc> sampleArcs = arcs.stream().filter(A -> A.sample.equals(sample)).collect(Collectors.toList());
            if (sampleArcs.isEmpty()) {
                genotypes.add(GenotypeBuilder.createMissing(sample, 2));
            } else {
                final GenotypeBuilder gb = new GenotypeBuilder(sample);
                alleles.add(SPLIT);
                gb.alleles(Arrays.asList(REF, SPLIT));
                final int countCat1 = (int) sampleArcs.stream().filter(A -> A.type == SUPPORTING_LEFT).count();
                final int countCat2 = (int) sampleArcs.stream().filter(A -> A.type == SUPPORTING_RIGHT).count();
                gb.DP(countCat1 + countCat2);
                gb.attribute("N5", countCat1);
                gb.attribute("N3", countCat2);
                maxdp = Math.max(maxdp, countCat1 + countCat2);
                depth += countCat1 + countCat2;
                genotypes.add(gb.make());
                gotSamples.add(sample);
            }
        }
        if (depth <= this.min_supporting_reads)
            continue;
        vcb.genotypes(genotypes);
        vcb.alleles(alleles);
        vcb.attribute(VCFConstants.DEPTH_KEY, depth);
        vcb.attribute("NSAMPLES", gotSamples.size());
        vcb.attribute("SAMPLES", new ArrayList<>(gotSamples));
        vcb.attribute(VCFConstants.SVTYPE, "INV");
        vcb.attribute("DPMAX", maxdp);
        vcw.add(vcb.make());
    }
}
Also used : WritingVariantsDelegate(com.github.lindenb.jvarkit.variant.variantcontext.writer.WritingVariantsDelegate) Allele(htsjdk.variant.variantcontext.Allele) Arrays(java.util.Arrays) Program(com.github.lindenb.jvarkit.util.jcommander.Program) VCFStandardHeaderLines(htsjdk.variant.vcf.VCFStandardHeaderLines) VCFHeader(htsjdk.variant.vcf.VCFHeader) IntervalListProvider(com.github.lindenb.jvarkit.samtools.util.IntervalListProvider) SAMRecordPartition(com.github.lindenb.jvarkit.util.samtools.SAMRecordPartition) SAMFileHeader(htsjdk.samtools.SAMFileHeader) DistanceParser(com.github.lindenb.jvarkit.util.bio.DistanceParser) StringUtil(htsjdk.samtools.util.StringUtil) DuplicateReadFilter(htsjdk.samtools.filter.DuplicateReadFilter) Map(java.util.Map) Path(java.nio.file.Path) CloserUtil(htsjdk.samtools.util.CloserUtil) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) FilteringSamIterator(htsjdk.samtools.filter.FilteringSamIterator) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) Collectors(java.util.stream.Collectors) SAMRecord(htsjdk.samtools.SAMRecord) SamFileHeaderMerger(htsjdk.samtools.SamFileHeaderMerger) List(java.util.List) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) CoordMath(htsjdk.samtools.util.CoordMath) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VCFHeaderLineCount(htsjdk.variant.vcf.VCFHeaderLineCount) SamReaderFactory(htsjdk.samtools.SamReaderFactory) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) Genotype(htsjdk.variant.variantcontext.Genotype) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) Cigar(htsjdk.samtools.Cigar) CloseableIterator(htsjdk.samtools.util.CloseableIterator) MergingSamRecordIterator(htsjdk.samtools.MergingSamRecordIterator) SAMUtils(htsjdk.samtools.SAMUtils) SecondaryOrSupplementaryFilter(htsjdk.samtools.filter.SecondaryOrSupplementaryFilter) Parameter(com.beust.jcommander.Parameter) AggregateFilter(htsjdk.samtools.filter.AggregateFilter) HashMap(java.util.HashMap) ValidationStringency(htsjdk.samtools.ValidationStringency) TreeSet(java.util.TreeSet) ParametersDelegate(com.beust.jcommander.ParametersDelegate) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) ContigDictComparator(com.github.lindenb.jvarkit.util.samtools.ContigDictComparator) SAMTag(htsjdk.samtools.SAMTag) Interval(htsjdk.samtools.util.Interval) ToIntBiFunction(java.util.function.ToIntBiFunction) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VCFConstants(htsjdk.variant.vcf.VCFConstants) Locatable(htsjdk.samtools.util.Locatable) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) ProgressFactory(com.github.lindenb.jvarkit.util.log.ProgressFactory) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) SamReader(htsjdk.samtools.SamReader) Consumer(java.util.function.Consumer) SamRecordFilter(htsjdk.samtools.filter.SamRecordFilter) FractionConverter(com.github.lindenb.jvarkit.jcommander.converter.FractionConverter) QueryInterval(htsjdk.samtools.QueryInterval) FailsVendorReadQualityFilter(htsjdk.samtools.filter.FailsVendorReadQualityFilter) MappingQualityFilter(htsjdk.samtools.filter.MappingQualityFilter) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine) Comparator(java.util.Comparator) ArrayList(java.util.ArrayList) Genotype(htsjdk.variant.variantcontext.Genotype) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) ContigDictComparator(com.github.lindenb.jvarkit.util.samtools.ContigDictComparator) Allele(htsjdk.variant.variantcontext.Allele) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) TreeSet(java.util.TreeSet) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) Locatable(htsjdk.samtools.util.Locatable) HashSet(java.util.HashSet)

Example 48 with SimpleInterval

use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.

the class VCFBed method doVcfToVcf.

@Override
protected int doVcfToVcf(final String inputName, final VCFIterator vcfin, final VariantContextWriter out) {
    final VCFHeader header = vcfin.getHeader();
    final VCFHeader h2 = new VCFHeader(header);
    final VCFInfoHeaderLine infoHeader = new VCFInfoHeaderLine(this.infoName, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "metadata added from " + this.inputBedFile + " . Format was " + this.formatPattern.replaceAll("[\"\'\\\\]", " "));
    final VCFInfoHeaderLine infoCountBasicOverlap = new VCFInfoHeaderLine(this.infoName + "_N", 1, VCFHeaderLineType.Integer, "Number of raw overlap within distance " + this.within_distance + "bp.");
    final VCFInfoHeaderLine infoCountFinerOverlap = new VCFInfoHeaderLine(this.infoName + "_C", 1, VCFHeaderLineType.Integer, "Number of finer overlap within distance " + inputBedFile + " and overlap bed:" + this.min_overlap_bed_fraction + " and overlap vcf: " + this.min_overlap_vcf_fraction);
    if (infoCountBasicOverlap != null)
        h2.addMetaDataLine(infoCountBasicOverlap);
    if (infoCountFinerOverlap != null)
        h2.addMetaDataLine(infoCountFinerOverlap);
    h2.addMetaDataLine(infoHeader);
    JVarkitVersion.getInstance().addMetaData(this, h2);
    out.writeHeader(h2);
    while (vcfin.hasNext()) {
        final VariantContext ctx = vcfin.next();
        final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
        vcb.rmAttribute(infoCountBasicOverlap.getID());
        vcb.rmAttribute(infoCountFinerOverlap.getID());
        vcb.rmAttribute(infoHeader.getID());
        if (this.ignoreFILTERed && ctx.isFiltered()) {
            out.add(vcb.make());
            continue;
        }
        final String normalizedContig = this.contigNameConverter.apply(ctx.getContig());
        if (StringUtil.isBlank(normalizedContig)) {
            out.add(ctx);
            continue;
        }
        final Optional<? extends Locatable> bndOpt = this.disable_bnd_eval ? Optional.empty() : Breakend.parseInterval(ctx);
        final SimpleInterval theInterval;
        if (bndOpt.isPresent()) {
            theInterval = new SimpleInterval(normalizedContig, bndOpt.get().getStart(), bndOpt.get().getEnd());
        } else {
            theInterval = new SimpleInterval(normalizedContig, ctx.getStart(), ctx.getEnd());
        }
        final SimpleInterval extendedInterval;
        if (this.within_distance <= 0) {
            extendedInterval = theInterval;
        } else {
            extendedInterval = new SimpleInterval(theInterval.getContig(), Math.max(1, theInterval.getStart() + this.within_distance), theInterval.getEnd() + this.within_distance);
        }
        int count_basic_overlap = 0;
        int count_finer_overlap = 0;
        final Set<String> annotations = new LinkedHashSet<>();
        if (this.intervalTreeMap != null) {
            for (final Set<BedLine> bedLines : this.intervalTreeMap.getOverlapping(extendedInterval)) {
                for (final BedLine bedLine : bedLines) {
                    count_basic_overlap++;
                    if (!testFinerIntersection(theInterval, bedLine))
                        continue;
                    count_finer_overlap++;
                    final String newannot = this.bedJexlToString.apply(new BedJEXLContext(bedLine, ctx));
                    if (!StringUtil.isBlank(newannot)) {
                        annotations.add(VCFUtils.escapeInfoField(newannot));
                    }
                }
            }
        } else {
            try (CloseableIterator<BedLine> iter = this.bedReader.iterator(theInterval.getContig(), Math.max(0, theInterval.getStart() - 1), theInterval.getEnd() + 1)) {
                while (iter.hasNext()) {
                    final BedLine bedLine = iter.next();
                    if (!theInterval.contigsMatch(bedLine))
                        continue;
                    if (!extendedInterval.withinDistanceOf(bedLine, this.within_distance))
                        continue;
                    count_basic_overlap++;
                    if (!testFinerIntersection(theInterval, bedLine))
                        continue;
                    count_finer_overlap++;
                    final String newannot = this.bedJexlToString.apply(new BedJEXLContext(bedLine, ctx));
                    if (!StringUtil.isBlank(newannot))
                        annotations.add(VCFUtils.escapeInfoField(newannot));
                }
            } catch (final IOException ioe) {
                LOG.error(ioe);
                throw new RuntimeIOException(ioe);
            }
        }
        if (!annotations.isEmpty()) {
            vcb.attribute(infoHeader.getID(), annotations.toArray());
        }
        vcb.attribute(infoCountBasicOverlap.getID(), count_basic_overlap);
        vcb.attribute(infoCountFinerOverlap.getID(), count_finer_overlap);
        out.add(vcb.make());
    }
    out.close();
    return 0;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) VariantContext(htsjdk.variant.variantcontext.VariantContext) JexlToString(com.github.lindenb.jvarkit.jexl.JexlToString) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) IOException(java.io.IOException) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) VCFHeader(htsjdk.variant.vcf.VCFHeader)

Example 49 with SimpleInterval

use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.

the class VcfGnomad method findOverlapping.

/**
 * find matching variant in tabix file, use a buffer to avoid multiple random accesses
 */
private List<VariantContext> findOverlapping(final VariantContext userVariantCtx) {
    final String normContig = this.ctgNameConverter.apply(userVariantCtx.getContig());
    final Locatable loc;
    if (StringUtil.isBlank(normContig))
        return Collections.emptyList();
    if (normContig.equals(userVariantCtx.getContig())) {
        loc = userVariantCtx;
    } else {
        loc = new SimpleInterval(normContig, userVariantCtx.getStart(), userVariantCtx.getEnd());
    }
    final List<VariantContext> list = new ArrayList<>();
    try (CloseableIterator<VariantContext> iter = this.gnomadReader.query(loc)) {
        while (iter.hasNext()) {
            final VariantContext ctx = iter.next();
            list.add(ctx);
        }
    }
    return list;
}
Also used : ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) Locatable(htsjdk.samtools.util.Locatable)

Example 50 with SimpleInterval

use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.

the class VcfGnomadExomeVsGenome method isVariantIn.

boolean isVariantIn(final VariantContext ctx, final BufferedVCFReader reader) {
    final String ctg = this.ctgNameConverter.apply(ctx.getContig());
    if (StringUtils.isBlank(ctg))
        return false;
    try (CloseableIterator<VariantContext> iterE = reader.query(new SimpleInterval(ctg, ctx.getStart(), ctx.getEnd()))) {
        while (iterE.hasNext()) {
            final VariantContext ctx2 = iterE.next();
            if (ctx.getStart() != ctx2.getStart())
                continue;
            if (!ctx.getReference().equals(ctx2.getReference()))
                continue;
            final Set<Allele> alt2 = ctx2.getAlternateAlleles().stream().filter(A -> AcidNucleics.isATGC(A)).collect(Collectors.toSet());
            if (ctx.getAlternateAlleles().stream().anyMatch(A -> alt2.contains(A)))
                return true;
        }
    }
    return false;
}
Also used : CloseableIterator(htsjdk.samtools.util.CloseableIterator) Allele(htsjdk.variant.variantcontext.Allele) SequenceUtil(htsjdk.samtools.util.SequenceUtil) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Program(com.github.lindenb.jvarkit.util.jcommander.Program) VCFIterator(htsjdk.variant.vcf.VCFIterator) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) AcidNucleics(com.github.lindenb.jvarkit.util.bio.AcidNucleics) BufferedVCFReader(com.github.lindenb.jvarkit.variant.vcf.BufferedVCFReader) UnaryOperator(java.util.function.UnaryOperator) DistanceParser(com.github.lindenb.jvarkit.util.bio.DistanceParser) VCFReaderFactory(com.github.lindenb.jvarkit.variant.vcf.VCFReaderFactory) Path(java.nio.file.Path) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) Logger(com.github.lindenb.jvarkit.util.log.Logger) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) VCFReader(htsjdk.variant.vcf.VCFReader) Set(java.util.Set) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) Collectors(java.util.stream.Collectors) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VariantContext(htsjdk.variant.variantcontext.VariantContext) OnePassVcfLauncher(com.github.lindenb.jvarkit.jcommander.OnePassVcfLauncher) Collections(java.util.Collections) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) Allele(htsjdk.variant.variantcontext.Allele) VariantContext(htsjdk.variant.variantcontext.VariantContext) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval)

Aggregations

SimpleInterval (com.github.lindenb.jvarkit.samtools.util.SimpleInterval)71 ArrayList (java.util.ArrayList)49 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)47 List (java.util.List)47 Locatable (htsjdk.samtools.util.Locatable)46 Path (java.nio.file.Path)46 Parameter (com.beust.jcommander.Parameter)43 Program (com.github.lindenb.jvarkit.util.jcommander.Program)43 Logger (com.github.lindenb.jvarkit.util.log.Logger)43 SequenceDictionaryUtils (com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils)39 Collectors (java.util.stream.Collectors)38 Set (java.util.Set)37 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)36 IOUtils (com.github.lindenb.jvarkit.io.IOUtils)35 ContigNameConverter (com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter)35 SAMFileHeader (htsjdk.samtools.SAMFileHeader)34 CloserUtil (htsjdk.samtools.util.CloserUtil)34 CloseableIterator (htsjdk.samtools.util.CloseableIterator)33 StringUtils (com.github.lindenb.jvarkit.lang.StringUtils)32 SamReader (htsjdk.samtools.SamReader)32