Search in sources :

Example 41 with SAMSequenceDictionaryProgress

use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.

the class VcfIn method scanUsingTabix.

private int scanUsingTabix(final VariantContextWriter vcw, final String databaseFile, final VcfIterator in2) {
    VCFFileReader tabix = null;
    try {
        tabix = new VCFFileReader(new File(databaseFile), true);
        final VCFHeader header1 = new VCFHeader(in2.getHeader());
        this.addMetaData(header1);
        vcw.writeHeader(header1);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header1.getSequenceDictionary()).logger(LOG);
        while (in2.hasNext() && !vcw.checkError()) {
            final VariantContext userCtx = progress.watch(in2.next());
            final CloseableIterator<VariantContext> iter = tabix.query(userCtx.getContig(), Math.max(1, userCtx.getStart() - 1), userCtx.getEnd() + 1);
            boolean keep = false;
            while (iter.hasNext()) {
                final VariantContext dbctx = iter.next();
                if (!sameContext(userCtx, dbctx))
                    continue;
                if (!allUserAltFoundInDatabase(userCtx, dbctx))
                    continue;
                keep = true;
                break;
            }
            iter.close();
            addVariant(vcw, userCtx, keep);
            if (vcw.checkError())
                break;
        }
        progress.finish();
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(tabix);
        CloserUtil.close(in2);
    }
}
Also used : SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VariantContext(htsjdk.variant.variantcontext.VariantContext) VCFHeader(htsjdk.variant.vcf.VCFHeader) File(java.io.File) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException)

Example 42 with SAMSequenceDictionaryProgress

use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.

the class VcfTreePack method scan.

void scan(final VcfIterator iter) {
    final VCFHeader header = iter.getHeader();
    super.bindings.put("header", header);
    final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header.getSequenceDictionary());
    while (iter.hasNext()) {
        final VariantContext ctx = progress.watch(iter.next());
        bindings.put("variant", ctx);
        super.nodeFactoryChain.watch(super.rootNode, ctx);
    }
    progress.finish();
}
Also used : SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) VariantContext(htsjdk.variant.variantcontext.VariantContext) VCFHeader(htsjdk.variant.vcf.VCFHeader)

Example 43 with SAMSequenceDictionaryProgress

use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.

the class VcfLiftOver method doVcfToVcf.

@Override
protected int doVcfToVcf(String inputName, VcfIterator in, VariantContextWriter out) {
    VariantContextWriter failed = null;
    GenomicSequence genomicSequence = null;
    try {
        final VCFHeader inputHeader = in.getHeader();
        final Set<VCFHeaderLine> headerLines = inputHeader.getMetaDataInInputOrder().stream().filter(V -> {
            if (!(V instanceof VCFInfoHeaderLine))
                return true;
            final VCFInfoHeaderLine vih = VCFInfoHeaderLine.class.cast(V);
            if (removeInfo.contains(vih.getID()))
                return false;
            return true;
        }).collect(Collectors.toSet());
        if (this.failedFile != null) {
            final VCFHeader header2 = new VCFHeader(headerLines, inputHeader.getSampleNamesInOrder());
            header2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
            header2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
            header2.addMetaDataLine(new VCFInfoHeaderLine(this.failedinfoTag, 1, VCFHeaderLineType.String, "Why the liftOver failed."));
            failed = super.openVariantContextWriter(failedFile);
            failed.writeHeader(header2);
        }
        final VCFHeader header3 = new VCFHeader(headerLines, inputHeader.getSampleNamesInOrder());
        header3.setSequenceDictionary(this.indexedFastaSequenceFile.getSequenceDictionary());
        header3.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
        header3.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
        header3.addMetaDataLine(new VCFInfoHeaderLine(this.infoTag, 1, VCFHeaderLineType.String, "Chromosome|Position before liftOver."));
        out.writeHeader(header3);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(in.getHeader());
        while (in.hasNext()) {
            VariantContext ctx = progress.watch(in.next());
            if (!this.removeInfo.isEmpty()) {
                VariantContextBuilder vcb = new VariantContextBuilder(ctx);
                for (final String tag : this.removeInfo) vcb.rmAttribute(tag);
                ctx = vcb.make();
            }
            if (ctx.isIndel() && this.ignoreIndels) {
                if (failed != null)
                    failed.add(new VariantContextBuilder(ctx).attribute(this.failedinfoTag, "Indel").make());
                continue;
            }
            if (adaptivematch) {
                double minAlleleLength = Math.min(0, ctx.getAlleles().stream().mapToInt(A -> A.length()).min().orElse(0));
                double maxAlleleLength = Math.max(1, ctx.getAlleles().stream().mapToInt(A -> A.length()).max().orElse(1));
                this.liftOver.setLiftOverMinMatch(minAlleleLength / maxAlleleLength);
            }
            final Interval lifted = liftOver.liftOver(new Interval(ctx.getContig(), ctx.getStart(), ctx.getEnd(), // negative strand
            false, String.join("|", ctx.getContig(), String.valueOf(ctx.getStart()), ctx.getReference().toString())));
            if (lifted == null) {
                if (failed != null)
                    failed.add(new VariantContextBuilder(ctx).attribute(this.failedinfoTag, "LiftOverFailed").make());
            } else if (this.indexedFastaSequenceFile.getSequenceDictionary().getSequence(lifted.getContig()) == null) {
                if (failed != null)
                    failed.add(new VariantContextBuilder(ctx).attribute(this.failedinfoTag, "ContigMissingDictionary|" + lifted.getContig()).make());
            } else {
                boolean alleleAreValidatedVsRef = true;
                // part of the code was copied from picard/liftovervcf
                final Map<Allele, Allele> reverseComplementAlleleMap = new HashMap<>();
                final List<Allele> alleles = new ArrayList<Allele>();
                for (final Allele oldAllele : ctx.getAlleles()) {
                    final Allele fixedAllele;
                    if (oldAllele.isSymbolic() || oldAllele.isNoCall() || oldAllele.equals(Allele.SPAN_DEL)) {
                        alleles.add(oldAllele);
                        continue;
                    } else if (lifted.isPositiveStrand()) {
                        fixedAllele = oldAllele;
                        alleles.add(oldAllele);
                    } else {
                        fixedAllele = Allele.create(SequenceUtil.reverseComplement(oldAllele.getBaseString()), oldAllele.isReference());
                        alleles.add(fixedAllele);
                        reverseComplementAlleleMap.put(oldAllele, fixedAllele);
                    }
                    if (this.checkAlleleSequence) {
                        if (genomicSequence == null || !genomicSequence.getChrom().equals(lifted.getContig())) {
                            genomicSequence = new GenomicSequence(this.indexedFastaSequenceFile, lifted.getContig());
                        }
                        final String alleleStr = fixedAllele.getBaseString();
                        int x = 0;
                        while (x < alleleStr.length() && lifted.getStart() - 1 + x < genomicSequence.length()) {
                            final char refChar = genomicSequence.charAt(lifted.getStart() - 1 + x);
                            if (Character.toLowerCase(refChar) != Character.toLowerCase(alleleStr.charAt(x))) {
                                alleleAreValidatedVsRef = false;
                                break;
                            }
                            ++x;
                        }
                        if (x != alleleStr.length()) {
                            alleleAreValidatedVsRef = false;
                            break;
                        }
                    }
                }
                if (!alleleAreValidatedVsRef) {
                    if (failed != null)
                        failed.add(new VariantContextBuilder(ctx).attribute(this.failedinfoTag, "AlleleMismatchRef").make());
                    continue;
                }
                if (lifted.getEnd() - lifted.getStart() != ctx.getEnd() - ctx.getStart()) {
                    if (failed != null)
                        failed.add(new VariantContextBuilder(ctx).attribute(this.failedinfoTag, "AlleleBadLength|" + lifted.length()).make());
                    continue;
                }
                final VariantContextBuilder vcb = new VariantContextBuilder(ctx.getSource(), lifted.getContig(), lifted.getStart(), lifted.getEnd(), alleles);
                vcb.id(ctx.getID());
                vcb.attributes(ctx.getAttributes());
                vcb.attribute(this.infoTag, ctx.getContig() + "|" + ctx.getStart() + "|" + ctx.getReference().getDisplayString());
                vcb.filters(ctx.getFilters());
                vcb.log10PError(ctx.getLog10PError());
                final GenotypesContext genotypeContext = ctx.getGenotypes();
                final GenotypesContext fixedGenotypes = GenotypesContext.create(genotypeContext.size());
                for (final Genotype genotype : genotypeContext) {
                    final List<Allele> fixedAlleles = new ArrayList<Allele>();
                    for (final Allele allele : genotype.getAlleles()) {
                        final Allele fixedAllele = reverseComplementAlleleMap.containsKey(allele) ? reverseComplementAlleleMap.get(allele) : allele;
                        fixedAlleles.add(fixedAllele);
                    }
                    fixedGenotypes.add(new GenotypeBuilder(genotype).alleles(fixedAlleles).make());
                }
                vcb.genotypes(fixedGenotypes);
                out.add(vcb.make());
            }
        }
        if (failed != null) {
            failed.close();
            failed = null;
        }
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(failed);
    }
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) Allele(htsjdk.variant.variantcontext.Allele) SequenceUtil(htsjdk.samtools.util.SequenceUtil) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) HashMap(java.util.HashMap) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) LiftOver(htsjdk.samtools.liftover.LiftOver) Interval(htsjdk.samtools.util.Interval) Map(java.util.Map) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) CloserUtil(htsjdk.samtools.util.CloserUtil) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) Logger(com.github.lindenb.jvarkit.util.log.Logger) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) Set(java.util.Set) Collectors(java.util.stream.Collectors) File(java.io.File) GenotypesContext(htsjdk.variant.variantcontext.GenotypesContext) List(java.util.List) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VariantContext(htsjdk.variant.variantcontext.VariantContext) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) GenotypesContext(htsjdk.variant.variantcontext.GenotypesContext) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) VariantContext(htsjdk.variant.variantcontext.VariantContext) Genotype(htsjdk.variant.variantcontext.Genotype) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) Allele(htsjdk.variant.variantcontext.Allele) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) ArrayList(java.util.ArrayList) List(java.util.List) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashMap(java.util.HashMap) Map(java.util.Map) Interval(htsjdk.samtools.util.Interval)

Example 44 with SAMSequenceDictionaryProgress

use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.

the class BamClipToInsertion method doWork.

@Override
public int doWork(final List<String> args) {
    SAMRecordIterator iter = null;
    SamReader sfr = null;
    SAMFileWriter sfw = null;
    try {
        sfr = super.openSamReader(oneFileOrNull(args));
        final SAMFileHeader header1 = sfr.getFileHeader();
        if (header1 == null) {
            LOG.error("File header missing");
            return -1;
        }
        if (header1.getSortOrder() != SortOrder.coordinate) {
            LOG.error("Input is not sorted on coordinate.");
            return -1;
        }
        final SAMFileHeader header2 = header1.clone();
        header2.addComment(getProgramName() + ":" + getVersion() + ":" + getProgramCommandLine());
        header2.setSortOrder(SortOrder.unsorted);
        sfw = this.writingBamArgs.openSAMFileWriter(output, header2, true);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header1);
        iter = sfr.iterator();
        String curContig = null;
        LinkedList<SamAndCigar> buffer = new LinkedList<>();
        for (; ; ) {
            SAMRecord rec = null;
            if (iter.hasNext()) {
                rec = progress.watch(iter.next());
                // ignore unmapped reads
                if (rec.getReadUnmappedFlag()) {
                    sfw.addAlignment(rec);
                    continue;
                }
            }
            if (rec == null || (curContig != null && !curContig.equals(rec.getReferenceName()))) {
                for (final SamAndCigar r : buffer) sfw.addAlignment(r.getSAMRecord());
                buffer.clear();
                // we're done
                if (rec == null)
                    break;
                curContig = rec.getReferenceName();
            }
            final SamAndCigar sac = new SamAndCigar(rec);
            if (!sac.containsIorS) {
                sfw.addAlignment(rec);
                continue;
            }
            buffer.add(sac);
            int i = 0;
            while (i < buffer.size()) {
                final SamAndCigar ri = buffer.get(i);
                if (ri.getSAMRecord().getUnclippedEnd() < rec.getUnclippedStart()) {
                    for (int j = 0; j < buffer.size(); ++j) {
                        if (i == j)
                            continue;
                        ri.merge(buffer.get(j));
                    }
                    sfw.addAlignment(ri.build());
                    buffer.remove(i);
                } else {
                    ++i;
                }
            }
        }
        progress.finish();
        LOG.info("done");
        return RETURN_OK;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(iter);
        CloserUtil.close(sfr);
        CloserUtil.close(sfw);
    }
}
Also used : SamReader(htsjdk.samtools.SamReader) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) LinkedList(java.util.LinkedList)

Example 45 with SAMSequenceDictionaryProgress

use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.

the class BamTile method doWork.

@Override
public int doWork(final List<String> args) {
    SAMRecordIterator iter = null;
    SamReader sfr = null;
    SAMFileWriter sfw = null;
    try {
        sfr = openSamReader(oneFileOrNull(args));
        final SAMFileHeader header1 = sfr.getFileHeader();
        if (header1 == null) {
            LOG.error("File header missing");
            return -1;
        }
        if (header1.getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
            LOG.error("File header not sorted on coordinate");
            return -1;
        }
        final SAMFileHeader header2 = header1.clone();
        header2.addComment("BamTile:" + getVersion() + ":" + getProgramCommandLine());
        sfw = this.writingBamArgs.openSAMFileWriter(this.outputFile, header2, true);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header1);
        iter = sfr.iterator();
        final LinkedList<SAMRecord> buffer = new LinkedList<>();
        for (; ; ) {
            SAMRecord rec = null;
            if (iter.hasNext()) {
                rec = progress.watch(iter.next());
                if (rec.getReadUnmappedFlag())
                    continue;
                if (this.filterOut.filterOut(rec))
                    continue;
                if (!buffer.isEmpty()) {
                    final SAMRecord last = buffer.getLast();
                    if (this.no_overlap) {
                        if (last.getReferenceIndex() == rec.getReferenceIndex() && last.getEnd() >= rec.getStart()) {
                            continue;
                        }
                    } else {
                        if (last.getReferenceIndex() == rec.getReferenceIndex() && last.getAlignmentStart() <= rec.getAlignmentStart() && last.getAlignmentEnd() >= rec.getAlignmentEnd()) {
                            continue;
                        }
                    }
                }
            }
            if (rec == null || (!buffer.isEmpty() && buffer.getLast().getReferenceIndex() != rec.getReferenceIndex())) {
                while (!buffer.isEmpty()) {
                    sfw.addAlignment(buffer.removeFirst());
                }
                if (rec == null)
                    break;
            }
            buffer.add(rec);
            if (!this.no_overlap && buffer.size() > 2) {
                final int index = buffer.size();
                final SAMRecord prev = buffer.get(index - 3);
                final SAMRecord curr = buffer.get(index - 2);
                final SAMRecord next = buffer.get(index - 1);
                if (prev.getAlignmentEnd() >= next.getAlignmentStart() || curr.getAlignmentEnd() <= prev.getAlignmentEnd()) {
                    buffer.remove(index - 2);
                } else if (curr.getAlignmentStart() == prev.getAlignmentStart() && curr.getAlignmentEnd() > prev.getAlignmentEnd()) {
                    buffer.remove(index - 3);
                }
            }
            while (buffer.size() > 3) {
                sfw.addAlignment(buffer.removeFirst());
            }
        }
        progress.finish();
        sfw.close();
        sfw = null;
        iter.close();
        iter = null;
        sfr.close();
        sfr = null;
        LOG.info("done");
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(iter);
        CloserUtil.close(sfr);
        CloserUtil.close(sfw);
    }
}
Also used : SamReader(htsjdk.samtools.SamReader) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) LinkedList(java.util.LinkedList)

Aggregations

SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)146 ArrayList (java.util.ArrayList)64 VariantContext (htsjdk.variant.variantcontext.VariantContext)59 VCFHeader (htsjdk.variant.vcf.VCFHeader)57 SAMRecord (htsjdk.samtools.SAMRecord)54 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)54 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)48 IOException (java.io.IOException)48 File (java.io.File)47 SamReader (htsjdk.samtools.SamReader)40 SAMFileHeader (htsjdk.samtools.SAMFileHeader)38 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)37 HashSet (java.util.HashSet)34 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)32 VcfIterator (com.github.lindenb.jvarkit.util.vcf.VcfIterator)30 List (java.util.List)30 VCFHeaderLine (htsjdk.variant.vcf.VCFHeaderLine)29 HashMap (java.util.HashMap)28 Parameter (com.beust.jcommander.Parameter)27 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)27