Search in sources :

Example 71 with SAMReadGroupRecord

use of htsjdk.samtools.SAMReadGroupRecord in project jvarkit by lindenb.

the class PcrSliceReads method run.

@SuppressWarnings("resource")
private int run(SamReader reader) {
    ReadClipper readClipper = new ReadClipper();
    SAMFileHeader header1 = reader.getFileHeader();
    SAMFileHeader header2 = header1.clone();
    header2.addComment(getProgramName() + " " + getVersion() + ": Processed with " + getProgramCommandLine());
    header2.setSortOrder(SortOrder.unsorted);
    for (SAMReadGroupRecord srg : header1.getReadGroups()) {
        for (Interval i : this.bedIntervals.keySet()) {
            // create new read group for this interval
            SAMReadGroupRecord rg = new SAMReadGroupRecord(srg.getId() + "_" + this.bedIntervals.get(i).getName(), srg);
            header2.addReadGroup(rg);
        }
    }
    SAMFileWriter sw = null;
    SAMRecordIterator iter = null;
    try {
        sw = writingBamArgs.openSAMFileWriter(outputFile, header2, false);
        SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header1);
        iter = reader.iterator();
        while (iter.hasNext()) {
            SAMRecord rec = progress.watch(iter.next());
            if (rec.getReadUnmappedFlag()) {
                sw.addAlignment(rec);
                continue;
            }
            if (!rec.getReadPairedFlag()) {
                // @doc if the read is not a paired-end read ,  then the quality of the read is set to zero
                rec.setMappingQuality(0);
                sw.addAlignment(rec);
                continue;
            }
            if (rec.getMateUnmappedFlag()) {
                // @doc if the MATE is not mapped ,  then the quality of the current read is set to zero
                rec.setMappingQuality(0);
                sw.addAlignment(rec);
                continue;
            }
            if (!rec.getProperPairFlag()) {
                // @doc if the properly-paired flag is not set,  then the quality of the current read is set to zero
                rec.setMappingQuality(0);
                sw.addAlignment(rec);
                continue;
            }
            if (rec.getMateReferenceIndex() != rec.getReferenceIndex()) {
                // @doc if the read and the mate are not mapped on the same chromosome,  then the quality of the current read is set to zero
                rec.setMappingQuality(0);
                sw.addAlignment(rec);
                continue;
            }
            if (rec.getReadNegativeStrandFlag() == rec.getMateNegativeStrandFlag()) {
                // @doc if the read and the mate are mapped on the same strand,  then the quality of the current read is set to zero
                rec.setMappingQuality(0);
                sw.addAlignment(rec);
                continue;
            }
            int chromStart;
            int chromEnd;
            if (rec.getAlignmentStart() < rec.getMateAlignmentStart()) {
                if (rec.getReadNegativeStrandFlag()) {
                    // @doc if the read(POS) < mate(POS) and read is mapped on the negative strand, then the quality of the current read is set to zero
                    rec.setMappingQuality(0);
                    sw.addAlignment(rec);
                    continue;
                } else {
                    chromStart = rec.getAlignmentStart();
                    chromEnd = rec.getMateAlignmentStart();
                }
            } else {
                if (!rec.getReadNegativeStrandFlag()) {
                    // @doc if the read(POS) > mate(POS) and read is mapped on the positive strand, then the quality of the current read is set to zero
                    rec.setMappingQuality(0);
                    sw.addAlignment(rec);
                    continue;
                } else {
                    chromStart = rec.getMateAlignmentStart();
                    // don't use getAlignmentEnd, to be consistent with mate data
                    chromEnd = rec.getAlignmentStart();
                }
            }
            List<Interval> fragments = findIntervals(rec.getContig(), chromStart, chromEnd);
            if (fragments.isEmpty()) {
                // @doc if no BED fragment is found overlapping the read, then the quality of the read is set to zero
                rec.setMappingQuality(0);
                sw.addAlignment(rec);
                continue;
            }
            Interval best = null;
            if (fragments.size() == 1) {
                best = fragments.get(0);
            } else
                switch(this.ambiguityStrategy) {
                    case random:
                        {
                            best = fragments.get(this.random.nextInt(fragments.size()));
                            break;
                        }
                    case zero:
                        {
                            rec.setMappingQuality(0);
                            sw.addAlignment(rec);
                            continue;
                        }
                    case closer:
                        {
                            int best_distance = Integer.MAX_VALUE;
                            for (Interval frag : fragments) {
                                int distance = Math.abs(chromStart - frag.getStart()) + Math.abs(frag.getEnd() - chromEnd);
                                if (best == null || distance < best_distance) {
                                    best = frag;
                                    best_distance = distance;
                                }
                            }
                            break;
                        }
                    default:
                        throw new IllegalStateException();
                }
            if (clip_reads) {
                rec = readClipper.clip(rec, best);
                if (rec.getMappingQuality() == 0) {
                    sw.addAlignment(rec);
                    continue;
                }
            }
            SAMReadGroupRecord rg = rec.getReadGroup();
            if (rg == null) {
                throw new IOException("Read " + rec + " is missing a Read-Group ID . See http://broadinstitute.github.io/picard/ http://broadinstitute.github.io/picard/command-line-overview.html#AddOrReplaceReadGroups");
            }
            rec.setAttribute("RG", rg.getId() + "_" + best.getName());
            sw.addAlignment(rec);
        }
        progress.finish();
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(iter);
        CloserUtil.close(sw);
    }
}
Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) IOException(java.io.IOException) IOException(java.io.IOException) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) Interval(htsjdk.samtools.util.Interval)

Example 72 with SAMReadGroupRecord

use of htsjdk.samtools.SAMReadGroupRecord in project jvarkit by lindenb.

the class VcfToBam method run.

private void run(VcfIterator vcfIterator) throws IOException {
    long id_generator = 0L;
    SAMFileWriter samFileWriter = null;
    final VCFHeader header = vcfIterator.getHeader();
    SAMSequenceDictionary dict = header.getSequenceDictionary();
    if (dict == null)
        throw new IOException("Sequence Dictionary missing in VCF");
    if (!SequenceUtil.areSequenceDictionariesEqual(dict, indexedFastaSequenceFile.getSequenceDictionary())) {
        LOG.warning("REF/VCF: not the same Sequence dictonary " + dict + " " + indexedFastaSequenceFile.getSequenceDictionary());
    }
    final SAMFileHeader samHeader = new SAMFileHeader();
    samHeader.setSequenceDictionary(dict);
    for (final String sample : new TreeSet<>(header.getSampleNamesInOrder())) {
        final SAMReadGroupRecord rg = new SAMReadGroupRecord(sample);
        rg.setSample(sample);
        rg.setLibrary(sample);
        rg.setDescription(sample);
        rg.setLibrary("illumina");
        samHeader.addReadGroup(rg);
    }
    samHeader.addComment("Generated with " + getProgramCommandLine());
    samHeader.setSortOrder(SortOrder.unsorted);
    samFileWriter = this.writingBamArgs.setReferenceFile(this.faidx).openSAMFileWriter(this.outputFile, samHeader, true);
    /* looping over sequences */
    for (final SAMSequenceRecord ssr : dict.getSequences()) {
        final LinkedList<VariantContext> variantBuffer = new LinkedList<>();
        GenomicSequence genomicSequence = new GenomicSequence(this.indexedFastaSequenceFile, ssr.getSequenceName());
        int x = 1;
        while (x + this.fragmentSize <= ssr.getSequenceLength()) {
            // pop on left
            while (!variantBuffer.isEmpty() && (variantBuffer.getFirst().getStart() + this.fragmentSize * 2) < x) {
                variantBuffer.removeFirst();
            }
            // refill buffer
            while (vcfIterator.hasNext()) {
                // buffer is already by enough
                if (!variantBuffer.isEmpty() && variantBuffer.getLast().getStart() > x + 2 * fragmentSize) {
                    break;
                }
                final VariantContext ctx = vcfIterator.peek();
                if (ctx == null)
                    break;
                if (ctx.isIndel() || !ctx.isVariant()) {
                    LOG.warning("Cannot handle " + ctx.getReference() + "/" + ctx.getAlternateAlleles());
                    // consumme
                    vcfIterator.next();
                    continue;
                }
                final SAMSequenceRecord variantContig = dict.getSequence(ctx.getContig());
                if (variantContig == null)
                    throw new IOException("Unknown contig " + ctx.getContig());
                if (variantContig.getSequenceIndex() < ssr.getSequenceIndex()) {
                    // just consumme !
                    // https://github.com/lindenb/jvarkit/issues/86#issuecomment-326986654
                    // consumme
                    vcfIterator.next();
                    continue;
                } else if (variantContig.getSequenceIndex() > ssr.getSequenceIndex()) {
                    break;
                } else {
                    variantBuffer.add(vcfIterator.next());
                }
            }
            if (variantBuffer.isEmpty()) {
                LOG.info("no variant ?");
                // no variant on this chromosome
                break;
            }
            if (x < variantBuffer.getFirst().getStart() - 2 * fragmentSize) {
                x = variantBuffer.getFirst().getStart() - 2 * fragmentSize;
            }
            for (int depth = 0; depth < 1; ++depth) {
                for (String sample : header.getSampleNamesInOrder()) {
                    // loop over genomic strand
                    for (int g_strand = 0; g_strand < 2; ++g_strand) {
                        SAMRecord[] records = { new SAMRecord(samHeader), new SAMRecord(samHeader) };
                        String readName = String.format("%010d", ++id_generator);
                        for (int R = 0; R < 2; ++R) {
                            records[R].setReferenceName(ssr.getSequenceName());
                            records[R].setReadPairedFlag(true);
                            records[R].setReadName(readName);
                            records[R].setMappingQuality(60);
                            records[R].setProperPairFlag(true);
                            records[R].setMateReferenceName(ssr.getSequenceName());
                            records[R].setMateUnmappedFlag(false);
                            records[R].setReadUnmappedFlag(false);
                            records[R].setAttribute("RG", sample);
                            records[R].setReadNegativeStrandFlag(R == 1);
                            StringBuilder sequence = new StringBuilder(this.readSize);
                            int readLen = 0;
                            int refPos1 = (R == 0 ? x : x + this.fragmentSize - this.readSize);
                            records[R].setAlignmentStart(refPos1);
                            List<CigarElement> cigarElements = new ArrayList<>(this.readSize);
                            int NM = 0;
                            while (readLen < this.readSize) {
                                String base = null;
                                VariantContext overlap = null;
                                for (VariantContext vc : variantBuffer) {
                                    int d = vc.getStart() - (refPos1 + readLen);
                                    if (d == 0) {
                                        overlap = vc;
                                        break;
                                    }
                                    if (d > 0)
                                        break;
                                }
                                if (overlap != null) {
                                    Genotype genotype = overlap.getGenotype(sample);
                                    if (genotype.isCalled() && !genotype.isMixed()) {
                                        List<Allele> alleles = genotype.getAlleles();
                                        if (alleles.size() != 2)
                                            throw new RuntimeException("Not a diploid organism.");
                                        Allele allele = null;
                                        if (genotype.isPhased()) {
                                            allele = alleles.get(g_strand);
                                        } else {
                                            allele = alleles.get(Math.random() < 0.5 ? 0 : 1);
                                        }
                                        if (allele.isSymbolic())
                                            throw new RuntimeException("Cannot handle symbolic alleles");
                                        if (allele.isReference()) {
                                            cigarElements.add(new CigarElement(allele.getBaseString().length(), CigarOperator.EQ));
                                        } else if (allele.getBaseString().length() < overlap.getReference().getBaseString().length()) {
                                            cigarElements.add(new CigarElement(allele.getBaseString().length(), CigarOperator.D));
                                            NM++;
                                        } else if (allele.getBaseString().length() > overlap.getReference().getBaseString().length()) {
                                            cigarElements.add(new CigarElement(allele.getBaseString().length(), CigarOperator.I));
                                            NM++;
                                        } else // same size
                                        {
                                            cigarElements.add(new CigarElement(allele.getBaseString().length(), CigarOperator.X));
                                            NM++;
                                        }
                                        base = allele.getBaseString().toLowerCase();
                                    }
                                }
                                if (base == null) {
                                    base = String.valueOf(genomicSequence.charAt(refPos1 - 1 + readLen));
                                    cigarElements.add(new CigarElement(1, CigarOperator.EQ));
                                }
                                sequence.append(base);
                                readLen += base.length();
                            }
                            while (sequence.length() > this.readSize) sequence.deleteCharAt(sequence.length() - 1);
                            records[R].setReadString(sequence.toString());
                            StringBuilder qual = new StringBuilder(sequence.length());
                            for (int i = 0; i < sequence.length(); ++i) qual.append("I");
                            records[R].setBaseQualityString(qual.toString());
                            // cigar
                            int c = 0;
                            while (c + 1 < cigarElements.size()) {
                                CigarElement c0 = cigarElements.get(c);
                                CigarElement c1 = cigarElements.get(c + 1);
                                if (c0.getOperator().equals(c1.getOperator())) {
                                    cigarElements.set(c, new CigarElement(c0.getLength() + c1.getLength(), c0.getOperator()));
                                    cigarElements.remove(c + 1);
                                } else {
                                    ++c;
                                }
                            }
                            records[R].setCigar(new Cigar(cigarElements));
                            records[R].setAttribute("NM", NM);
                        }
                        if (Math.random() < 0.5) {
                            records[0].setFirstOfPairFlag(true);
                            records[0].setSecondOfPairFlag(false);
                            records[1].setFirstOfPairFlag(false);
                            records[1].setSecondOfPairFlag(true);
                        } else {
                            records[0].setFirstOfPairFlag(false);
                            records[0].setSecondOfPairFlag(true);
                            records[1].setFirstOfPairFlag(true);
                            records[1].setSecondOfPairFlag(false);
                        }
                        records[0].setMateAlignmentStart(records[1].getAlignmentStart());
                        records[1].setMateAlignmentStart(records[0].getAlignmentStart());
                        records[0].setInferredInsertSize(records[1].getAlignmentStart() - records[0].getAlignmentStart());
                        records[1].setInferredInsertSize(records[0].getAlignmentStart() - records[1].getAlignmentStart());
                        samFileWriter.addAlignment(records[0]);
                        samFileWriter.addAlignment(records[1]);
                    }
                }
            }
            ++x;
        }
    }
    samFileWriter.close();
}
Also used : SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) TreeSet(java.util.TreeSet) VCFHeader(htsjdk.variant.vcf.VCFHeader) SAMFileWriter(htsjdk.samtools.SAMFileWriter) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) Genotype(htsjdk.variant.variantcontext.Genotype) IOException(java.io.IOException) CigarElement(htsjdk.samtools.CigarElement) LinkedList(java.util.LinkedList) Allele(htsjdk.variant.variantcontext.Allele) Cigar(htsjdk.samtools.Cigar) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Example 73 with SAMReadGroupRecord

use of htsjdk.samtools.SAMReadGroupRecord in project jvarkit by lindenb.

the class FixVcfMissingGenotypes method doVcfToVcf.

@Override
protected int doVcfToVcf(final String inputName, final VcfIterator in, final VariantContextWriter out) {
    final Set<String> bamFiles = IOUtils.unrollFiles(bamList);
    final Map<String, List<SamReader>> sample2bam = new HashMap<>(bamFiles.size());
    try {
        final VCFHeader header = in.getHeader();
        for (final String bamFile : bamFiles) {
            LOG.info("Reading header for " + bamFile);
            final SamReader reader = super.openSamReader(bamFile);
            if (!reader.hasIndex()) {
                LOG.error("No BAM index available for " + bamFile);
                return -1;
            }
            final SAMFileHeader samHeader = reader.getFileHeader();
            for (final SAMReadGroupRecord g : samHeader.getReadGroups()) {
                if (g.getSample() == null)
                    continue;
                final String sample = g.getSample();
                if (StringUtil.isBlank(sample))
                    continue;
                List<SamReader> readers = sample2bam.get(sample);
                if (readers == null) {
                    readers = new ArrayList<>();
                    sample2bam.put(sample, readers);
                }
                readers.add(reader);
            }
        }
        final VCFHeader h2 = new VCFHeader(header);
        if (h2.getFormatHeaderLine(VCFConstants.DEPTH_KEY) == null) {
            h2.addMetaDataLine(VCFStandardHeaderLines.getFormatLine(VCFConstants.DEPTH_KEY));
        }
        if (h2.getFormatHeaderLine(VCFConstants.GENOTYPE_KEY) == null) {
            h2.addMetaDataLine(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_KEY));
        }
        if (h2.getFormatHeaderLine(VCFConstants.GENOTYPE_FILTER_KEY) == null) {
            h2.addMetaDataLine(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_FILTER_KEY));
        }
        h2.addMetaDataLine(new VCFFormatHeaderLine(this.fixedTag, 1, VCFHeaderLineType.Integer, "Genotype was set as homozygous (min depth =" + this.minDepth + ")"));
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
        this.recalculator.setHeader(h2);
        out.writeHeader(h2);
        while (in.hasNext()) {
            final VariantContext ctx = progress.watch(in.next());
            boolean somethingWasChanged = false;
            final List<Genotype> genotypes = new ArrayList<>(ctx.getNSamples());
            for (int i = 0; i < ctx.getNSamples(); ++i) {
                Genotype genotype = ctx.getGenotype(i);
                final String sample = genotype.getSampleName();
                if (!genotype.isCalled() || (!genotype.hasDP() && this.fixDP)) {
                    List<SamReader> samReaders = sample2bam.get(sample);
                    if (samReaders == null)
                        samReaders = Collections.emptyList();
                    final int depth = fetchDP(ctx, sample, samReaders);
                    if (genotype.isCalled() && !genotype.hasDP()) {
                        genotype = new GenotypeBuilder(genotype).DP(depth).make();
                        somethingWasChanged = true;
                    } else // genotype was not called
                    {
                        if (depth >= this.minDepth) {
                            final List<Allele> homozygous = new ArrayList<>(2);
                            homozygous.add(ctx.getReference());
                            homozygous.add(ctx.getReference());
                            final GenotypeBuilder gb = new GenotypeBuilder(genotype);
                            gb.alleles(homozygous);
                            gb.attribute(this.fixedTag, 1);
                            gb.DP(depth);
                            if (!StringUtil.isBlank(this.fixedGenotypesAreFiltered))
                                gb.filter(this.fixedGenotypesAreFiltered);
                            somethingWasChanged = true;
                            genotype = gb.make();
                        } else if (// cannot fix but we can update DP
                        !genotype.hasDP()) {
                            genotype = new GenotypeBuilder(genotype).DP(depth).make();
                            somethingWasChanged = true;
                        }
                    }
                }
                genotypes.add(genotype);
            }
            if (somethingWasChanged) {
                final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
                vcb.genotypes(genotypes);
                out.add(this.recalculator.apply(vcb.make()));
            } else {
                out.add(ctx);
            }
        }
        progress.finish();
        return 0;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        sample2bam.values().stream().flatMap(L -> L.stream()).forEach(R -> CloserUtil.close(R));
    }
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) Cigar(htsjdk.samtools.Cigar) Allele(htsjdk.variant.variantcontext.Allele) Program(com.github.lindenb.jvarkit.util.jcommander.Program) VCFStandardHeaderLines(htsjdk.variant.vcf.VCFStandardHeaderLines) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) CigarElement(htsjdk.samtools.CigarElement) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) HashMap(java.util.HashMap) SAMFileHeader(htsjdk.samtools.SAMFileHeader) ParametersDelegate(com.beust.jcommander.ParametersDelegate) ArrayList(java.util.ArrayList) StringUtil(htsjdk.samtools.util.StringUtil) Map(java.util.Map) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) CloserUtil(htsjdk.samtools.util.CloserUtil) VCFConstants(htsjdk.variant.vcf.VCFConstants) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) SamFilterParser(com.github.lindenb.jvarkit.util.bio.samfilter.SamFilterParser) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) File(java.io.File) SamRecordFilter(htsjdk.samtools.filter.SamRecordFilter) List(java.util.List) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) VariantAttributesRecalculator(com.github.lindenb.jvarkit.util.vcf.VariantAttributesRecalculator) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VariantContext(htsjdk.variant.variantcontext.VariantContext) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine) Collections(java.util.Collections) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) HashMap(java.util.HashMap) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) SamReader(htsjdk.samtools.SamReader) ArrayList(java.util.ArrayList) List(java.util.List) VCFHeader(htsjdk.variant.vcf.VCFHeader) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) Genotype(htsjdk.variant.variantcontext.Genotype) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) Allele(htsjdk.variant.variantcontext.Allele) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Example 74 with SAMReadGroupRecord

use of htsjdk.samtools.SAMReadGroupRecord in project jvarkit by lindenb.

the class GcPercentAndDepth method doWork.

@Override
public int doWork(final List<String> args) {
    if (this.windowSize <= 0) {
        LOG.error("Bad window size.");
        return -1;
    }
    if (this.windowStep <= 0) {
        LOG.error("Bad window step.");
        return -1;
    }
    if (this.refFile == null) {
        LOG.error("Undefined REF File");
        return -1;
    }
    if (args.isEmpty()) {
        LOG.error("Illegal Number of arguments.");
        return -1;
    }
    ReferenceGenome indexedFastaSequenceFile = null;
    List<SamReader> readers = new ArrayList<SamReader>();
    PrintWriter out = null;
    try {
        LOG.info("Loading " + this.refFile);
        indexedFastaSequenceFile = new ReferenceGenomeFactory().openFastaFile(this.refFile);
        this.samSequenceDictionary = indexedFastaSequenceFile.getDictionary();
        if (this.samSequenceDictionary == null) {
            LOG.error("Cannot get sequence dictionary for " + this.refFile);
            return -1;
        }
        out = super.openFileOrStdoutAsPrintWriter(outPutFile);
        Set<String> all_samples = new TreeSet<String>();
        /* create input, collect sample names */
        for (int optind = 0; optind < args.size(); ++optind) {
            LOG.info("Opening " + args.get(optind));
            final SamReader samFileReaderScan = super.openSamReader(args.get(optind));
            readers.add(samFileReaderScan);
            final SAMFileHeader header = samFileReaderScan.getFileHeader();
            if (!SequenceUtil.areSequenceDictionariesEqual(this.samSequenceDictionary, header.getSequenceDictionary())) {
                LOG.error(JvarkitException.DictionariesAreNotTheSame.getMessage(this.samSequenceDictionary, header.getSequenceDictionary()));
                return -1;
            }
            for (final SAMReadGroupRecord g : header.getReadGroups()) {
                final String sample = this.partition.apply(g);
                if (StringUtil.isBlank(sample)) {
                    LOG.warning("Read group " + g.getId() + " has no sample in merged dictionary");
                    continue;
                }
                all_samples.add(sample);
            }
        }
        LOG.info("N " + this.partition.name() + "=" + all_samples.size());
        /* print header */
        out.print("#");
        if (!this.hide_genomic_index) {
            out.print("id");
            out.print("\t");
        }
        out.print("chrom");
        out.print("\t");
        out.print("start");
        out.print("\t");
        out.print("end");
        out.print("\t");
        out.print("GCPercent");
        for (final String sample : all_samples) {
            out.print("\t");
            out.print(sample);
        }
        out.println();
        final List<RegionCaptured> regionsCaptured = new ArrayList<RegionCaptured>();
        if (bedFile != null) {
            LOG.info("Reading BED:" + bedFile);
            final BedLineCodec bedLineCodec = new BedLineCodec();
            BufferedReader r = IOUtils.openFileForBufferedReading(bedFile);
            r.lines().filter(L -> !L.startsWith("#")).filter(L -> !StringUtil.isBlank(L)).map(L -> bedLineCodec.decode(L)).filter(B -> B != null).forEach(B -> {
                final SAMSequenceRecord ssr = this.samSequenceDictionary.getSequence(B.getContig());
                if (ssr == null) {
                    LOG.warning("Cannot resolve " + B.getContig());
                    return;
                }
                final RegionCaptured roi = new RegionCaptured(ssr, B.getStart() - 1, B.getEnd());
                regionsCaptured.add(roi);
            });
            CloserUtil.close(r);
            LOG.info("end Reading BED:" + bedFile);
            Collections.sort(regionsCaptured);
        } else {
            LOG.info("No capture, peeking everything");
            for (final SAMSequenceRecord ssr : this.samSequenceDictionary.getSequences()) {
                final RegionCaptured roi = new RegionCaptured(ssr, 0, ssr.getSequenceLength());
                regionsCaptured.add(roi);
            }
        }
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(this.samSequenceDictionary).logger(LOG);
        ReferenceContig genomicSequence = null;
        for (final RegionCaptured roi : regionsCaptured) {
            if (genomicSequence == null || !genomicSequence.hasName(roi.getContig())) {
                genomicSequence = indexedFastaSequenceFile.getContig(roi.getContig());
                if (genomicSequence == null) {
                    LOG.error(JvarkitException.ContigNotFoundInDictionary.getMessage(roi.getContig(), this.samSequenceDictionary));
                    return -1;
                }
            }
            Map<String, int[]> sample2depth = new HashMap<String, int[]>();
            Map<String, Double> sample2meanDepth = new HashMap<String, Double>();
            for (final String sample : all_samples) {
                int[] depth = new int[roi.length()];
                Arrays.fill(depth, 0);
                sample2depth.put(sample, depth);
            }
            List<CloseableIterator<SAMRecord>> iterators = new ArrayList<CloseableIterator<SAMRecord>>();
            for (final SamReader r : readers) {
                iterators.add(r.query(roi.getContig(), roi.getStart(), roi.getEnd(), false));
            }
            final MergingIterator<SAMRecord> merginIter = new MergingIterator<>(new SAMRecordCoordinateComparator(), iterators);
            while (merginIter.hasNext()) {
                final SAMRecord rec = merginIter.next();
                if (rec.getReadUnmappedFlag())
                    continue;
                if (this.filter.filterOut(rec))
                    continue;
                final String sample = this.partition.getPartion(rec, null);
                if (sample == null)
                    continue;
                final int[] depth = sample2depth.get(sample);
                if (depth == null)
                    continue;
                final Cigar cigar = rec.getCigar();
                if (cigar == null)
                    continue;
                int refpos1 = rec.getAlignmentStart();
                for (final CigarElement ce : cigar.getCigarElements()) {
                    final CigarOperator op = ce.getOperator();
                    if (!op.consumesReferenceBases())
                        continue;
                    if (op.consumesReadBases()) {
                        for (int i = 0; i < ce.getLength(); ++i) {
                            if (refpos1 + i < roi.getStart())
                                continue;
                            if (refpos1 + i > roi.getEnd())
                                break;
                            depth[refpos1 + i - roi.getStart()]++;
                        }
                    }
                    refpos1 += ce.getLength();
                }
            }
            merginIter.close();
            for (final RegionCaptured.SlidingWindow win : roi) {
                double total = 0f;
                int countN = 0;
                for (int pos1 = win.getStart(); pos1 <= win.getEnd(); ++pos1) {
                    switch(genomicSequence.charAt(pos1 - 1)) {
                        case 'c':
                        case 'C':
                        case 'g':
                        case 'G':
                        case 's':
                        case 'S':
                            {
                                total++;
                                break;
                            }
                        case 'n':
                        case 'N':
                            countN++;
                            break;
                        default:
                            break;
                    }
                }
                if (skip_if_contains_N && countN > 0)
                    continue;
                double GCPercent = total / (double) win.length();
                int max_depth_for_win = 0;
                sample2meanDepth.clear();
                for (final String sample : all_samples) {
                    int[] depth = sample2depth.get(sample);
                    double sum = 0;
                    for (int pos = win.getStart(); pos < win.getEnd() && (pos - roi.getStart()) < depth.length; ++pos) {
                        sum += depth[pos - roi.getStart()];
                    }
                    double mean = (sum / (double) depth.length);
                    max_depth_for_win = Math.max(max_depth_for_win, (int) mean);
                    sample2meanDepth.put(sample, mean);
                }
                if (max_depth_for_win < this.min_depth)
                    continue;
                if (!this.hide_genomic_index) {
                    out.print(win.getGenomicIndex());
                    out.print("\t");
                }
                out.print(win.getContig());
                out.print("\t");
                out.print(win.getStart() - 1);
                out.print("\t");
                out.print(win.getEnd());
                out.print("\t");
                out.printf("%.2f", GCPercent);
                for (String sample : all_samples) {
                    out.print("\t");
                    out.printf("%.2f", (double) sample2meanDepth.get(sample));
                }
                out.println();
            }
        }
        progress.finish();
        out.flush();
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        for (SamReader r : readers) CloserUtil.close(r);
        CloserUtil.close(indexedFastaSequenceFile);
        CloserUtil.close(out);
    }
}
Also used : Cigar(htsjdk.samtools.Cigar) CloseableIterator(htsjdk.samtools.util.CloseableIterator) Arrays(java.util.Arrays) SequenceUtil(htsjdk.samtools.util.SequenceUtil) MergingIterator(htsjdk.samtools.util.MergingIterator) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) CigarElement(htsjdk.samtools.CigarElement) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) CigarOperator(htsjdk.samtools.CigarOperator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) HashMap(java.util.HashMap) SAMRecordPartition(com.github.lindenb.jvarkit.util.samtools.SAMRecordPartition) SAMFileHeader(htsjdk.samtools.SAMFileHeader) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) StringUtil(htsjdk.samtools.util.StringUtil) ReferenceGenomeFactory(com.github.lindenb.jvarkit.util.bio.fasta.ReferenceGenomeFactory) Map(java.util.Map) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintWriter(java.io.PrintWriter) AbstractIterator(htsjdk.samtools.util.AbstractIterator) Locatable(htsjdk.samtools.util.Locatable) Iterator(java.util.Iterator) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) SamReader(htsjdk.samtools.SamReader) ReferenceContig(com.github.lindenb.jvarkit.util.bio.fasta.ReferenceContig) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) File(java.io.File) SAMRecord(htsjdk.samtools.SAMRecord) SamRecordFilter(htsjdk.samtools.filter.SamRecordFilter) List(java.util.List) SamRecordJEXLFilter(com.github.lindenb.jvarkit.util.samtools.SamRecordJEXLFilter) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) SAMRecordCoordinateComparator(htsjdk.samtools.SAMRecordCoordinateComparator) BufferedReader(java.io.BufferedReader) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) ReferenceGenome(com.github.lindenb.jvarkit.util.bio.fasta.ReferenceGenome) Collections(java.util.Collections) ReferenceContig(com.github.lindenb.jvarkit.util.bio.fasta.ReferenceContig) HashMap(java.util.HashMap) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) ArrayList(java.util.ArrayList) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SamReader(htsjdk.samtools.SamReader) SAMRecordCoordinateComparator(htsjdk.samtools.SAMRecordCoordinateComparator) TreeSet(java.util.TreeSet) PrintWriter(java.io.PrintWriter) CloseableIterator(htsjdk.samtools.util.CloseableIterator) ReferenceGenome(com.github.lindenb.jvarkit.util.bio.fasta.ReferenceGenome) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ReferenceGenomeFactory(com.github.lindenb.jvarkit.util.bio.fasta.ReferenceGenomeFactory) CigarOperator(htsjdk.samtools.CigarOperator) CigarElement(htsjdk.samtools.CigarElement) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) MergingIterator(htsjdk.samtools.util.MergingIterator) Cigar(htsjdk.samtools.Cigar) SAMRecord(htsjdk.samtools.SAMRecord) BufferedReader(java.io.BufferedReader) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Example 75 with SAMReadGroupRecord

use of htsjdk.samtools.SAMReadGroupRecord in project jvarkit by lindenb.

the class VCFCombineTwoSnvs method doVcfToVcf.

@Override
protected int doVcfToVcf(final String inputName, File saveAs) {
    BufferedReader bufferedReader = null;
    htsjdk.variant.variantcontext.writer.VariantContextWriter w = null;
    SortingCollection<CombinedMutation> mutations = null;
    CloseableIterator<Variant> varIter = null;
    CloseableIterator<CombinedMutation> mutIter = null;
    Map<String, SamReader> sample2samReader = new HashMap<>();
    try {
        bufferedReader = inputName == null ? IOUtils.openStreamForBufferedReader(stdin()) : IOUtils.openURIForBufferedReading(inputName);
        final VCFUtils.CodecAndHeader cah = VCFUtils.parseHeader(bufferedReader);
        /* get VCF header */
        final VCFHeader header = cah.header;
        final Set<String> sampleNamesInOrder = new HashSet<>(header.getSampleNamesInOrder());
        LOG.info("opening REF:" + referenceFile);
        this.indexedFastaSequenceFile = new IndexedFastaSequenceFile(this.referenceFile);
        final SAMSequenceDictionary dict = this.indexedFastaSequenceFile.getSequenceDictionary();
        if (dict == null)
            throw new IOException("dictionary missing");
        if (this.bamIn != null) {
            /**
             * unroll and open bam file
             */
            for (final File bamFile : IOUtils.unrollFileCollection(Collections.singletonList(this.bamIn))) {
                LOG.info("opening BAM :" + this.bamIn);
                final SamReader samReader = SamReaderFactory.makeDefault().referenceSequence(this.referenceFile).validationStringency(ValidationStringency.LENIENT).open(this.bamIn);
                if (!samReader.hasIndex()) {
                    throw new IOException("Sam file is NOT indexed: " + bamFile);
                }
                final SAMFileHeader samHeader = samReader.getFileHeader();
                if (samHeader.getSequenceDictionary() == null || !SequenceUtil.areSequenceDictionariesEqual(dict, samReader.getFileHeader().getSequenceDictionary())) {
                    throw new IOException(bamFile + " and REF don't have the same Sequence Dictionary.");
                }
                /* get sample name */
                String sampleName = null;
                for (final SAMReadGroupRecord rg : samHeader.getReadGroups()) {
                    if (rg.getSample() == null)
                        continue;
                    if (sampleName != null && !sampleName.equals(rg.getSample())) {
                        samReader.close();
                        throw new IOException(bamFile + " Contains two samples " + sampleName + " " + rg.getSample());
                    }
                    sampleName = rg.getSample();
                }
                if (sampleName == null) {
                    samReader.close();
                    LOG.warn("no sample in " + bamFile);
                    continue;
                }
                if (!sampleNamesInOrder.contains(sampleName)) {
                    samReader.close();
                    LOG.warn("no sample " + sampleName + " in vcf");
                    continue;
                }
                sample2samReader.put(sampleName, samReader);
            }
        }
        loadKnownGenesFromUri();
        this.variants = SortingCollection.newInstance(Variant.class, new VariantCodec(), new VariantComparator(dict), this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
        this.variants.setDestructiveIteration(true);
        SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
        String vcfLine = null;
        while ((vcfLine = bufferedReader.readLine()) != null) {
            final VariantContext ctx = progress.watch(cah.codec.decode(vcfLine));
            /* discard non SNV variant */
            if (!ctx.isVariant() || ctx.isIndel()) {
                continue;
            }
            /* find the overlapping genes : extend the interval of the variant to include the stop codon */
            final Collection<KnownGene> genes = new ArrayList<>();
            for (List<KnownGene> lkg : this.knownGenes.getOverlapping(new Interval(ctx.getContig(), Math.max(1, ctx.getStart() - 3), ctx.getEnd() + 3))) {
                genes.addAll(lkg);
            }
            final List<Allele> alternateAlleles = ctx.getAlternateAlleles();
            /* loop over overlapping genes */
            for (final KnownGene kg : genes) {
                /* loop over available alleles */
                for (int allele_idx = 0; allele_idx < alternateAlleles.size(); ++allele_idx) {
                    final Allele alt = alternateAlleles.get(allele_idx);
                    challenge(ctx, alt, kg, vcfLine);
                }
            }
        }
        progress.finish();
        this.variants.doneAdding();
        mutations = SortingCollection.newInstance(CombinedMutation.class, new MutationCodec(), new MutationComparator(dict), this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
        mutations.setDestructiveIteration(true);
        final VCFFilterHeaderLine vcfFilterHeaderLine = new VCFFilterHeaderLine("TwoHaplotypes", "(number of reads carrying both mutation) < (reads carrying variant 1 + reads carrying variant 2) ");
        varIter = this.variants.iterator();
        progress = new SAMSequenceDictionaryProgress(header);
        final ArrayList<Variant> buffer = new ArrayList<>();
        for (; ; ) {
            Variant variant = null;
            if (varIter.hasNext()) {
                variant = varIter.next();
                progress.watch(variant.contig, variant.genomicPosition1);
            }
            if (variant == null || !(!buffer.isEmpty() && buffer.get(0).contig.equals(variant.contig) && buffer.get(0).transcriptName.equals(variant.transcriptName))) {
                if (!buffer.isEmpty()) {
                    for (int i = 0; i < buffer.size(); ++i) {
                        final Variant v1 = buffer.get(i);
                        for (int j = i + 1; j < buffer.size(); ++j) {
                            final Variant v2 = buffer.get(j);
                            if (v1.codonStart() != v2.codonStart())
                                continue;
                            if (v1.positionInCodon() == v2.positionInCodon())
                                continue;
                            if (!v1.wildCodon.equals(v2.wildCodon)) {
                                throw new IllegalStateException();
                            }
                            final StringBuilder combinedCodon = new StringBuilder(v1.wildCodon);
                            combinedCodon.setCharAt(v1.positionInCodon(), v1.mutCodon.charAt(v1.positionInCodon()));
                            combinedCodon.setCharAt(v2.positionInCodon(), v2.mutCodon.charAt(v2.positionInCodon()));
                            final String pwild = new ProteinCharSequence(v1.wildCodon).getString();
                            final String p1 = new ProteinCharSequence(v1.mutCodon).getString();
                            final String p2 = new ProteinCharSequence(v2.mutCodon).getString();
                            final String pCombined = new ProteinCharSequence(combinedCodon).getString();
                            final String combinedSO;
                            final String combinedType;
                            /* both AA are synonymous, while combined is not */
                            if (!pCombined.equals(pwild) && p1.equals(pwild) && p2.equals(pwild)) {
                                combinedType = "combined_is_nonsynonymous";
                                if (pCombined.equals("*")) {
                                    /* http://www.sequenceontology.org/browser/current_svn/term/SO:0001587 */
                                    combinedSO = "stop_gained";
                                } else if (pwild.equals("*")) {
                                    /* http://www.sequenceontology.org/browser/current_svn/term/SO:0002012 */
                                    combinedSO = "stop_lost";
                                } else {
                                    /* http://www.sequenceontology.org/miso/current_svn/term/SO:0001992 */
                                    combinedSO = "nonsynonymous_variant";
                                }
                            } else if (!pCombined.equals(p1) && !pCombined.equals(p2) && !pCombined.equals(pwild)) {
                                combinedType = "combined_is_new";
                                if (pCombined.equals("*")) {
                                    /* http://www.sequenceontology.org/browser/current_svn/term/SO:0001587 */
                                    combinedSO = "stop_gained";
                                } else {
                                    /* http://www.sequenceontology.org/miso/current_svn/term/SO:0001992 */
                                    combinedSO = "nonsynonymous_variant";
                                }
                            } else {
                                combinedType = null;
                                combinedSO = null;
                            }
                            /**
                             * ok, there is something interesting here ,
                             * create two new Mutations carrying the
                             * two variants
                             */
                            if (combinedSO != null) {
                                /**
                                 * grantham score is max found combined vs (p1/p2/wild)
                                 */
                                int grantham_score = GranthamScore.score(pCombined.charAt(0), pwild.charAt(0));
                                grantham_score = Math.max(grantham_score, GranthamScore.score(pCombined.charAt(0), p1.charAt(0)));
                                grantham_score = Math.max(grantham_score, GranthamScore.score(pCombined.charAt(0), p2.charAt(0)));
                                /**
                                 * info that will be displayed in the vcf
                                 */
                                final Map<String, Object> info1 = v1.getInfo(v2);
                                final Map<String, Object> info2 = v2.getInfo(v1);
                                // filter for this combined: default it fails the filter
                                String filter = vcfFilterHeaderLine.getID();
                                final Map<String, Object> combinedMap = new LinkedHashMap<>();
                                combinedMap.put("CombinedCodon", combinedCodon);
                                combinedMap.put("CombinedAA", pCombined);
                                combinedMap.put("CombinedSO", combinedSO);
                                combinedMap.put("CombinedType", combinedType);
                                combinedMap.put("GranthamScore", grantham_score);
                                info1.putAll(combinedMap);
                                info2.putAll(combinedMap);
                                final Map<String, CoverageInfo> sample2coverageInfo = new HashMap<>(sample2samReader.size());
                                final int chromStart = Math.min(v1.genomicPosition1, v2.genomicPosition1);
                                final int chromEnd = Math.max(v1.genomicPosition1, v2.genomicPosition1);
                                /* get phasing info for each sample*/
                                for (final String sampleName : sample2samReader.keySet()) {
                                    final SamReader samReader = sample2samReader.get(sampleName);
                                    final CoverageInfo covInfo = new CoverageInfo();
                                    sample2coverageInfo.put(sampleName, covInfo);
                                    SAMRecordIterator iter = null;
                                    try {
                                        iter = samReader.query(v1.contig, chromStart, chromEnd, false);
                                        while (iter.hasNext()) {
                                            final SAMRecord rec = iter.next();
                                            if (rec.getReadUnmappedFlag())
                                                continue;
                                            if (rec.isSecondaryOrSupplementary())
                                                continue;
                                            if (rec.getDuplicateReadFlag())
                                                continue;
                                            if (rec.getReadFailsVendorQualityCheckFlag())
                                                continue;
                                            // get DEPTh for variant 1
                                            if (rec.getAlignmentStart() <= v1.genomicPosition1 && v1.genomicPosition1 <= rec.getAlignmentEnd()) {
                                                covInfo.depth1++;
                                            }
                                            // get DEPTh for variant 2
                                            if (rec.getAlignmentStart() <= v2.genomicPosition1 && v2.genomicPosition1 <= rec.getAlignmentEnd()) {
                                                covInfo.depth2++;
                                            }
                                            if (rec.getAlignmentEnd() < chromEnd)
                                                continue;
                                            if (rec.getAlignmentStart() > chromStart)
                                                continue;
                                            final Cigar cigar = rec.getCigar();
                                            if (cigar == null)
                                                continue;
                                            final byte[] bases = rec.getReadBases();
                                            if (bases == null)
                                                continue;
                                            int refpos1 = rec.getAlignmentStart();
                                            int readpos = 0;
                                            boolean found_variant1_on_this_read = false;
                                            boolean found_variant2_on_this_read = false;
                                            /**
                                             * loop over cigar
                                             */
                                            for (final CigarElement ce : cigar.getCigarElements()) {
                                                final CigarOperator op = ce.getOperator();
                                                switch(op) {
                                                    case P:
                                                        continue;
                                                    case S:
                                                    case I:
                                                        readpos += ce.getLength();
                                                        break;
                                                    case D:
                                                    case N:
                                                        refpos1 += ce.getLength();
                                                        break;
                                                    case H:
                                                        continue;
                                                    case EQ:
                                                    case M:
                                                    case X:
                                                        for (int x = 0; x < ce.getLength(); ++x) {
                                                            if (refpos1 == v1.genomicPosition1 && same(bases[readpos], v1.altAllele)) {
                                                                found_variant1_on_this_read = true;
                                                            } else if (refpos1 == v2.genomicPosition1 && same(bases[readpos], v2.altAllele)) {
                                                                found_variant2_on_this_read = true;
                                                            }
                                                            refpos1++;
                                                            readpos++;
                                                        }
                                                        break;
                                                    default:
                                                        throw new IllegalStateException(op.name());
                                                }
                                                /* skip remaining bases after last variant */
                                                if (refpos1 > chromEnd)
                                                    break;
                                            }
                                            /* sum-up what we found */
                                            if (found_variant1_on_this_read && found_variant2_on_this_read) {
                                                covInfo.count_reads_having_both_variants++;
                                            } else if (!found_variant1_on_this_read && !found_variant2_on_this_read) {
                                                covInfo.count_reads_having_no_variants++;
                                            } else if (found_variant1_on_this_read) {
                                                covInfo.count_reads_having_variant1++;
                                            } else if (found_variant2_on_this_read) {
                                                covInfo.count_reads_having_variant2++;
                                            }
                                        }
                                    /* end of loop over reads */
                                    } finally {
                                        iter.close();
                                        iter = null;
                                    }
                                    info1.put("N_READS_BOTH_VARIANTS_" + sampleName, covInfo.count_reads_having_both_variants);
                                    info2.put("N_READS_BOTH_VARIANTS_" + sampleName, covInfo.count_reads_having_both_variants);
                                    info1.put("N_READS_NO_VARIANTS_" + sampleName, covInfo.count_reads_having_no_variants);
                                    info2.put("N_READS_NO_VARIANTS_" + sampleName, covInfo.count_reads_having_no_variants);
                                    info1.put("N_READS_TOTAL_" + sampleName, covInfo.count_reads_having_both_variants + covInfo.count_reads_having_no_variants + covInfo.count_reads_having_variant1 + covInfo.count_reads_having_variant2);
                                    info2.put("N_READS_TOTAL_" + sampleName, covInfo.count_reads_having_both_variants + covInfo.count_reads_having_no_variants + covInfo.count_reads_having_variant1 + covInfo.count_reads_having_variant2);
                                    // count for variant 1
                                    info1.put("N_READS_ONLY_1_" + sampleName, covInfo.count_reads_having_variant1);
                                    info1.put("N_READS_ONLY_2_" + sampleName, covInfo.count_reads_having_variant2);
                                    info1.put("DEPTH_1_" + sampleName, covInfo.depth1);
                                    // inverse previous count
                                    info2.put("N_READS_ONLY_1_" + sampleName, covInfo.count_reads_having_variant2);
                                    info2.put("N_READS_ONLY_2_" + sampleName, covInfo.count_reads_having_variant1);
                                    info2.put("DEPTH_2_" + sampleName, covInfo.depth2);
                                    /* number of reads with both variant is greater than
									 * reads carrying only one variant: reset the filter 
									 */
                                    if (2 * covInfo.count_reads_having_both_variants > (covInfo.count_reads_having_variant1 + covInfo.count_reads_having_variant2)) {
                                        /* reset filter */
                                        filter = VCFConstants.UNFILTERED;
                                        info1.put("FILTER_1_" + sampleName, ".");
                                        info2.put("FILTER_2_" + sampleName, ".");
                                    } else {
                                        info1.put("FILTER_1_" + sampleName, vcfFilterHeaderLine.getID());
                                        info2.put("FILTER_2_" + sampleName, vcfFilterHeaderLine.getID());
                                    }
                                }
                                /* end of loop over bams */
                                final CombinedMutation m1 = new CombinedMutation();
                                m1.contig = v1.contig;
                                m1.genomicPosition1 = v1.genomicPosition1;
                                m1.id = v1.id;
                                m1.refAllele = v1.refAllele;
                                m1.altAllele = v1.altAllele;
                                m1.vcfLine = v1.vcfLine;
                                m1.info = mapToString(info1);
                                m1.filter = filter;
                                m1.grantham_score = grantham_score;
                                m1.sorting_id = ID_GENERATOR++;
                                mutations.add(m1);
                                final CombinedMutation m2 = new CombinedMutation();
                                m2.contig = v2.contig;
                                m2.genomicPosition1 = v2.genomicPosition1;
                                m2.id = v2.id;
                                m2.refAllele = v2.refAllele;
                                m2.altAllele = v2.altAllele;
                                m2.vcfLine = v2.vcfLine;
                                m2.info = mapToString(info2);
                                m2.filter = filter;
                                m2.grantham_score = grantham_score;
                                m2.sorting_id = ID_GENERATOR++;
                                mutations.add(m2);
                            }
                        }
                    }
                }
                buffer.clear();
                if (variant == null)
                    break;
            }
            buffer.add(variant);
        }
        progress.finish();
        mutations.doneAdding();
        varIter.close();
        varIter = null;
        variants.cleanup();
        variants = null;
        final ArrayList<CombinedMutation> mBuffer = new ArrayList<>();
        final VCFHeader header2 = new VCFHeader(header);
        header2.addMetaDataLine(new VCFHeaderLine(getProgramName() + "AboutQUAL", "QUAL is filled with Grantham Score  http://www.ncbi.nlm.nih.gov/pubmed/4843792"));
        final StringBuilder infoDesc = new StringBuilder("Variant affected by two distinct mutation. Format is defined in the INFO column. ");
        final VCFInfoHeaderLine infoHeaderLine = new VCFInfoHeaderLine("CodonVariant", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, infoDesc.toString());
        super.addMetaData(header2);
        header2.addMetaDataLine(infoHeaderLine);
        if (!sample2samReader.isEmpty()) {
            header2.addMetaDataLine(vcfFilterHeaderLine);
        }
        w = super.openVariantContextWriter(saveAs);
        w.writeHeader(header2);
        progress = new SAMSequenceDictionaryProgress(header);
        mutIter = mutations.iterator();
        for (; ; ) {
            CombinedMutation mutation = null;
            if (mutIter.hasNext()) {
                mutation = mutIter.next();
                progress.watch(mutation.contig, mutation.genomicPosition1);
            }
            if (mutation == null || !(!mBuffer.isEmpty() && mBuffer.get(0).contig.equals(mutation.contig) && mBuffer.get(0).genomicPosition1 == mutation.genomicPosition1 && mBuffer.get(0).refAllele.equals(mutation.refAllele))) {
                if (!mBuffer.isEmpty()) {
                    // default grantham score used in QUAL
                    int grantham_score = -1;
                    // default filter fails
                    String filter = vcfFilterHeaderLine.getID();
                    final CombinedMutation first = mBuffer.get(0);
                    final Set<String> info = new HashSet<>();
                    final VariantContext ctx = cah.codec.decode(first.vcfLine);
                    final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
                    vcb.chr(first.contig);
                    vcb.start(first.genomicPosition1);
                    vcb.stop(first.genomicPosition1 + first.refAllele.length() - 1);
                    if (!first.id.equals(VCFConstants.EMPTY_ID_FIELD))
                        vcb.id(first.id);
                    for (final CombinedMutation m : mBuffer) {
                        info.add(m.info);
                        grantham_score = Math.max(grantham_score, m.grantham_score);
                        if (VCFConstants.UNFILTERED.equals(m.filter)) {
                            // at least one SNP is ok one this line
                            filter = null;
                        }
                    }
                    vcb.unfiltered();
                    if (filter != null && !sample2samReader.isEmpty()) {
                        vcb.filter(filter);
                    } else {
                        vcb.passFilters();
                    }
                    vcb.attribute(infoHeaderLine.getID(), new ArrayList<String>(info));
                    if (grantham_score > 0) {
                        vcb.log10PError(grantham_score / -10.0);
                    } else {
                        vcb.log10PError(VariantContext.NO_LOG10_PERROR);
                    }
                    w.add(vcb.make());
                }
                mBuffer.clear();
                if (mutation == null)
                    break;
            }
            mBuffer.add(mutation);
        }
        progress.finish();
        mutIter.close();
        mutations.cleanup();
        mutations = null;
        return RETURN_OK;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(this.indexedFastaSequenceFile);
        CloserUtil.close(mutIter);
        CloserUtil.close(varIter);
        if (this.variants != null)
            this.variants.cleanup();
        if (mutations != null)
            mutations.cleanup();
        this.variants = null;
        for (SamReader r : sample2samReader.values()) CloserUtil.close(r);
        CloserUtil.close(w);
        CloserUtil.close(bufferedReader);
    }
}
Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) LinkedHashMap(java.util.LinkedHashMap) HashSet(java.util.HashSet) CigarOperator(htsjdk.samtools.CigarOperator) CigarElement(htsjdk.samtools.CigarElement) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) SAMRecord(htsjdk.samtools.SAMRecord) KnownGene(com.github.lindenb.jvarkit.util.ucsc.KnownGene) SAMFileHeader(htsjdk.samtools.SAMFileHeader) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) File(java.io.File) Interval(htsjdk.samtools.util.Interval) VCFUtils(com.github.lindenb.jvarkit.util.vcf.VCFUtils) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SamReader(htsjdk.samtools.SamReader) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeader(htsjdk.variant.vcf.VCFHeader) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) IOException(java.io.IOException) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) IOException(java.io.IOException) Allele(htsjdk.variant.variantcontext.Allele) Cigar(htsjdk.samtools.Cigar) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) BufferedReader(java.io.BufferedReader)

Aggregations

SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)81 SAMFileHeader (htsjdk.samtools.SAMFileHeader)48 SAMRecord (htsjdk.samtools.SAMRecord)33 Test (org.testng.annotations.Test)31 SamReader (htsjdk.samtools.SamReader)29 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)26 File (java.io.File)23 ArrayList (java.util.ArrayList)22 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)20 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)20 HashMap (java.util.HashMap)18 CigarElement (htsjdk.samtools.CigarElement)17 Cigar (htsjdk.samtools.Cigar)16 HashSet (java.util.HashSet)16 SAMFileWriter (htsjdk.samtools.SAMFileWriter)15 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)15 CigarOperator (htsjdk.samtools.CigarOperator)14 IOException (java.io.IOException)14 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)13 List (java.util.List)12