Search in sources :

Example 91 with SAMSequenceDictionaryProgress

use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.

the class SamToPsl method scan.

private void scan(final SamReader in) {
    final SAMSequenceDictionary dict = in.getFileHeader().getSequenceDictionary();
    if (dict == null)
        throw new RuntimeException("Sequence dictionary missing...");
    final SAMRecordIterator iter = in.iterator();
    final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
    while (iter.hasNext() && !this.out.checkError()) {
        final SAMRecord rec = progress.watch(iter.next());
        if (rec.getReadUnmappedFlag())
            continue;
        for (final PslAlign a : makePslAlign(rec, dict)) {
            out.println(toString(a, rec));
        }
    }
    progress.finish();
    iter.close();
}
Also used : PslAlign(com.github.lindenb.jvarkit.util.ucsc.PslAlign) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMRecord(htsjdk.samtools.SAMRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary)

Example 92 with SAMSequenceDictionaryProgress

use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.

the class VcfRenameSamples method doVcfToVcf.

@Override
protected int doVcfToVcf(String inputName, VcfIterator in, VariantContextWriter out) {
    final VCFHeader header1 = in.getHeader();
    final Set<String> samples1 = new LinkedHashSet<String>(header1.getSampleNamesInOrder());
    final List<String> newHeader = new ArrayList<String>(samples1);
    for (int i = 0; i < newHeader.size(); ++i) {
        final String destName = this.oldNameToNewName.get(newHeader.get(i));
        if (destName == null)
            continue;
        newHeader.set(i, destName);
    }
    if (newHeader.size() != new HashSet<String>(newHeader).size()) {
        throw new RuntimeException("Error in input : there are some diplicates in the resulting new VCF header: " + newHeader);
    }
    for (final String srcName : this.oldNameToNewName.keySet()) {
        if (!samples1.contains(srcName)) {
            if (missing_user_name_is_error) {
                LOG.error("Source Sample " + srcName + " missing in " + samples1 + ". Use option -E to ignore");
                return -1;
            } else {
                LOG.warning("Missing src-sample:" + srcName);
            }
        }
    }
    final VCFHeader header2 = new VCFHeader(header1.getMetaDataInInputOrder(), newHeader);
    header2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
    header2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
    header2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "HtsJdkVersion", HtsjdkVersion.getVersion()));
    header2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "HtsJdkHome", HtsjdkVersion.getHome()));
    out.writeHeader(header2);
    final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header1);
    while (in.hasNext()) {
        final VariantContext ctx = progress.watch(in.next());
        final VariantContextBuilder b = new VariantContextBuilder(ctx);
        final List<Genotype> genotypes = new ArrayList<Genotype>();
        for (final String oldName : samples1) {
            Genotype g = ctx.getGenotype(oldName);
            final String destName = this.oldNameToNewName.get(oldName);
            if (destName != null) {
                final GenotypeBuilder gb = new GenotypeBuilder(g);
                gb.name(destName);
                g = gb.make();
            }
            genotypes.add(g);
        }
        b.genotypes(genotypes);
        out.add(b.make());
    }
    progress.finish();
    return 0;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) Genotype(htsjdk.variant.variantcontext.Genotype) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet)

Example 93 with SAMSequenceDictionaryProgress

use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.

the class FindNewSpliceSites method scan.

private void scan(SamReader in) {
    SAMSequenceDictionary dict = in.getFileHeader().getSequenceDictionary();
    if (dict == null)
        throw new RuntimeException("Sequence dictionary missing");
    SAMRecordIterator iter = in.iterator();
    SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
    while (iter.hasNext()) {
        final SAMRecord rec = iter.next();
        if (rec.getReadUnmappedFlag())
            continue;
        if (rec.isSecondaryOrSupplementary())
            continue;
        progress.watch(rec);
        if (isWeird(rec, dict)) {
            this.weird.addAlignment(rec);
            continue;
        }
        for (CigarElement ce : rec.getCigar().getCigarElements()) {
            if (ce.getOperator().equals(CigarOperator.N)) {
                scanRead(rec, dict);
                break;
            }
        }
    }
    iter.close();
    progress.finish();
}
Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMRecord(htsjdk.samtools.SAMRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) CigarElement(htsjdk.samtools.CigarElement)

Example 94 with SAMSequenceDictionaryProgress

use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.

the class SamFixCigar method doWork.

@Override
public int doWork(List<String> args) {
    if (this.faidx == null) {
        LOG.error("Reference was not specified.");
        return -1;
    }
    GenomicSequence genomicSequence = null;
    SamReader sfr = null;
    SAMFileWriter sfw = null;
    try {
        this.indexedFastaSequenceFile = new IndexedFastaSequenceFile(faidx);
        sfr = openSamReader(oneFileOrNull(args));
        final SAMFileHeader header = sfr.getFileHeader();
        sfw = this.writingBamArgs.setReferenceFile(this.faidx).openSAMFileWriter(outputFile, header, true);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
        final List<CigarElement> newCigar = new ArrayList<CigarElement>();
        final SAMRecordIterator iter = sfr.iterator();
        while (iter.hasNext()) {
            final SAMRecord rec = progress.watch(iter.next());
            Cigar cigar = rec.getCigar();
            byte[] bases = rec.getReadBases();
            if (rec.getReadUnmappedFlag() || cigar == null || cigar.getCigarElements().isEmpty() || bases == null) {
                sfw.addAlignment(rec);
                continue;
            }
            if (genomicSequence == null || genomicSequence.getSAMSequenceRecord().getSequenceIndex() != rec.getReferenceIndex()) {
                genomicSequence = new GenomicSequence(indexedFastaSequenceFile, rec.getReferenceName());
            }
            newCigar.clear();
            int refPos1 = rec.getAlignmentStart();
            int readPos0 = 0;
            for (final CigarElement ce : cigar.getCigarElements()) {
                final CigarOperator op = ce.getOperator();
                if (op.equals(CigarOperator.M)) {
                    for (int i = 0; i < ce.getLength(); ++i) {
                        char c1 = Character.toUpperCase((char) bases[readPos0]);
                        char c2 = Character.toUpperCase(refPos1 - 1 < genomicSequence.length() ? genomicSequence.charAt(refPos1 - 1) : '*');
                        if (c2 == 'N' || c1 == c2) {
                            newCigar.add(new CigarElement(1, CigarOperator.EQ));
                        } else {
                            newCigar.add(new CigarElement(1, CigarOperator.X));
                        }
                        refPos1++;
                        readPos0++;
                    }
                } else {
                    newCigar.add(ce);
                    if (op.consumesReadBases())
                        readPos0 += ce.getLength();
                    if (op.consumesReferenceBases())
                        refPos1 += ce.getLength();
                }
            }
            int i = 0;
            while (i < newCigar.size()) {
                final CigarOperator op1 = newCigar.get(i).getOperator();
                final int length1 = newCigar.get(i).getLength();
                if (i + 1 < newCigar.size() && newCigar.get(i + 1).getOperator() == op1) {
                    final CigarOperator op2 = newCigar.get(i + 1).getOperator();
                    int length2 = newCigar.get(i + 1).getLength();
                    newCigar.set(i, new CigarElement(length1 + length2, op2));
                    newCigar.remove(i + 1);
                } else {
                    ++i;
                }
            }
            cigar = new Cigar(newCigar);
            // info("changed "+rec.getCigarString()+" to "+newCigarStr+" "+rec.getReadName()+" "+rec.getReadString());
            rec.setCigar(cigar);
            sfw.addAlignment(rec);
        }
        progress.finish();
        return RETURN_OK;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(this.indexedFastaSequenceFile);
        CloserUtil.close(sfr);
        CloserUtil.close(sfw);
    }
}
Also used : SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) SAMFileWriter(htsjdk.samtools.SAMFileWriter) ArrayList(java.util.ArrayList) CigarOperator(htsjdk.samtools.CigarOperator) CigarElement(htsjdk.samtools.CigarElement) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) SamReader(htsjdk.samtools.SamReader) Cigar(htsjdk.samtools.Cigar) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Example 95 with SAMSequenceDictionaryProgress

use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.

the class VcfCompareCallers method doWork.

@SuppressWarnings("resource")
@Override
public int doWork(final List<String> args) {
    if (this.archiveFile == null) {
        LOG.error("undefined output");
        return -1;
    }
    if (!this.archivePrefix.isEmpty() && !archivePrefix.endsWith(".")) {
        this.archivePrefix = this.archivePrefix + ".";
    }
    if (!this.archiveFile.getName().endsWith(".zip")) {
        IOUtil.assertDirectoryIsWritable(this.archiveFile);
    }
    PeekVCF vcfIterator1 = null;
    PeekVCF vcfIterator2 = null;
    ArchiveFactory archiveFactory = null;
    IntervalTreeMap<Boolean> capture = null;
    PrintWriter makefileWriter = null;
    try {
        if (args.size() == 1) {
            LOG.info("Reading from VCF1=stdin and VCF2=" + args.get(0));
            vcfIterator1 = new PeekVCF(VCFUtils.createVcfIteratorFromInputStream(stdin()), "<STDIN>");
            vcfIterator2 = new PeekVCF(VCFUtils.createVcfIterator(args.get(0)), args.get(0));
        } else if (args.size() == 2) {
            LOG.info("Reading from VCF1=" + args.get(0) + " and VCF2=" + args.get(1));
            vcfIterator1 = new PeekVCF(VCFUtils.createVcfIterator(args.get(0)), args.get(0));
            vcfIterator2 = new PeekVCF(VCFUtils.createVcfIterator(args.get(1)), args.get(1));
        } else {
            LOG.error("illegal number of arguments");
            return -1;
        }
        if (this.captureFile != null) {
            LOG.info("Reading " + this.captureFile);
            capture = super.readBedFileAsBooleanIntervalTreeMap(this.captureFile);
        }
        this.global_dictionary = vcfIterator1.dict;
        if (!SequenceUtil.areSequenceDictionariesEqual(vcfIterator1.dict, vcfIterator2.dict)) {
            throw new JvarkitException.DictionariesAreNotTheSame(vcfIterator1.dict, vcfIterator2.dict);
        }
        for (int side = 0; side < 2; ++side) {
            final List<String> jexlExprStrings = (side == 0 ? this.jexlExprStrings1 : this.jexlExprStrings2);
            final PeekVCF peek = (side == 0 ? vcfIterator1 : vcfIterator2);
            // initialize JEXL map
            if (!jexlExprStrings.isEmpty())
                continue;
            final ArrayList<String> dummyNames = new ArrayList<String>(jexlExprStrings.size());
            for (int expCount = 1; expCount < jexlExprStrings.size(); ++expCount) {
                dummyNames.add(String.format("vce%d", expCount));
            }
            peek.jexlVCMatchExps.addAll(VariantContextUtils.initializeMatchExps(dummyNames, jexlExprStrings));
        }
        /* samples */
        final Set<String> samples0 = new HashSet<>(vcfIterator1.header.getSampleNamesInOrder());
        final Set<String> samples1 = new HashSet<>(vcfIterator2.header.getSampleNamesInOrder());
        final Set<String> commonSamples = new TreeSet<>(samples0);
        commonSamples.retainAll(samples1);
        if (commonSamples.size() != samples0.size() || commonSamples.size() != samples1.size()) {
            LOG.warn("Warning: Not the same samples set. Using intersection of both lists.");
        }
        if (commonSamples.isEmpty()) {
            LOG.error("No common samples");
            return -1;
        }
        final Map<String, SampleInfo> sample2info = new HashMap<>(commonSamples.size());
        for (final String sampleName : commonSamples) {
            sample2info.put(sampleName, new SampleInfo(sampleName));
        }
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(this.global_dictionary);
        for (; ; ) {
            VariantContext ctx0 = vcfIterator1.peek();
            VariantContext ctx1 = vcfIterator2.peek();
            final VariantContext smallest;
            if (ctx0 == null && ctx1 == null) {
                smallest = null;
                break;
            } else if (ctx0 == null && ctx1 != null) {
                smallest = ctx1;
            } else if (ctx0 != null && ctx1 == null) {
                smallest = ctx0;
            } else {
                final int diff = this.compareChromPosRef.compare(ctx0, ctx1);
                if (diff < 0) {
                    smallest = ctx0;
                    ctx1 = null;
                } else if (diff > 0) {
                    smallest = ctx1;
                    ctx0 = null;
                } else {
                    smallest = ctx0;
                }
            }
            progress.watch(smallest);
            vcfIterator1.reset(smallest);
            vcfIterator2.reset(smallest);
            if (capture != null) {
                final Interval interval = new Interval(smallest.getContig(), smallest.getStart(), smallest.getEnd());
                if (!capture.containsOverlapping(interval))
                    continue;
            }
            for (final String sampleName : sample2info.keySet()) {
                sample2info.get(sampleName).visit(ctx0, ctx1);
            }
        }
        progress.finish();
        vcfIterator1.close();
        vcfIterator2.close();
        archiveFactory = ArchiveFactory.open(this.archiveFile);
        makefileWriter = archiveFactory.openWriter(this.archivePrefix + "Makefile");
        makefileWriter.println(".PHONY:all all2");
        makefileWriter.println("ALL_TARGETS=");
        makefileWriter.println("all:all2");
        for (final SampleInfo sampleInfo : sample2info.values()) {
            for (final SampleCategory sampleCat : sampleInfo.sampleCat.values()) {
                final String basename = this.archivePrefix + sampleInfo.sampleName + "." + sampleCat.variantCatName.replace(' ', '_');
                final String tsv = basename + ".tsv";
                PrintWriter dataW = archiveFactory.openWriter(tsv);
                for (final String sk : new TreeSet<String>(sampleCat.counter.keySet())) {
                    dataW.print(escapeUnderscore(sk));
                    dataW.print("\t");
                    dataW.print(sampleCat.counter.count(sk));
                    dataW.println();
                }
                dataW.flush();
                dataW.close();
                final String png = "$(addsuffix .png," + basename + ")";
                makefileWriter.println("ALL_TARGETS+=" + png);
                makefileWriter.println(png + ":" + tsv);
                makefileWriter.println("\techo '" + "set ylabel \"Number of Genotypes  " + escapeUnderscore(sampleInfo.sampleName) + "\";" + "set yrange [0:];" + "set xlabel \"Category " + escapeUnderscore(vcf1Name) + ": " + vcfIterator1.count + ", " + escapeUnderscore(vcf2Name) + ": " + vcfIterator2.count + " variants  \";" + "set xtic rotate by 90 right;" + "set size  ratio 0.618;" + "set title \"" + escapeUnderscore(vcf1Name) + " vs " + escapeUnderscore(vcf2Name) + " : Genotypes " + escapeUnderscore(sampleInfo.sampleName) + " / Variants: " + escapeUnderscore(sampleCat.variantCatName) + " \";" + "set key  off;" + "set datafile separator \"\t\";" + "set auto x;" + "set style histogram;" + "set style data histogram;" + "set style fill solid border -1;" + "set terminal png truecolor  size " + (500 + sampleCat.counter.getCountCategories() * 50) + ", 1500;" + "set output \"$@\";" + "plot \"$<\" using 2:xtic(1) ti \"\";' | " + "gnuplot");
            }
        }
        makefileWriter.println("all2:${ALL_TARGETS}");
        makefileWriter.close();
        makefileWriter = null;
        archiveFactory.close();
        archiveFactory = null;
        return RETURN_OK;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(makefileWriter);
        CloserUtil.close(archiveFactory);
        CloserUtil.close(vcfIterator1);
        CloserUtil.close(vcfIterator1);
    }
}
Also used : ArchiveFactory(com.github.lindenb.jvarkit.io.ArchiveFactory) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) IOException(java.io.IOException) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) TreeSet(java.util.TreeSet) PrintWriter(java.io.PrintWriter) HashSet(java.util.HashSet) Interval(htsjdk.samtools.util.Interval)

Aggregations

SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)146 ArrayList (java.util.ArrayList)64 VariantContext (htsjdk.variant.variantcontext.VariantContext)59 VCFHeader (htsjdk.variant.vcf.VCFHeader)57 SAMRecord (htsjdk.samtools.SAMRecord)54 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)54 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)48 IOException (java.io.IOException)48 File (java.io.File)47 SamReader (htsjdk.samtools.SamReader)40 SAMFileHeader (htsjdk.samtools.SAMFileHeader)38 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)37 HashSet (java.util.HashSet)34 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)32 VcfIterator (com.github.lindenb.jvarkit.util.vcf.VcfIterator)30 List (java.util.List)30 VCFHeaderLine (htsjdk.variant.vcf.VCFHeaderLine)29 HashMap (java.util.HashMap)28 Parameter (com.beust.jcommander.Parameter)27 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)27