Search in sources :

Example 56 with SAMFileWriter

use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.

the class PcrSliceReads method run.

@SuppressWarnings("resource")
private int run(SamReader reader) {
    ReadClipper readClipper = new ReadClipper();
    SAMFileHeader header1 = reader.getFileHeader();
    SAMFileHeader header2 = header1.clone();
    header2.addComment(getProgramName() + " " + getVersion() + ": Processed with " + getProgramCommandLine());
    header2.setSortOrder(SortOrder.unsorted);
    for (SAMReadGroupRecord srg : header1.getReadGroups()) {
        for (Interval i : this.bedIntervals.keySet()) {
            // create new read group for this interval
            SAMReadGroupRecord rg = new SAMReadGroupRecord(srg.getId() + "_" + this.bedIntervals.get(i).getName(), srg);
            header2.addReadGroup(rg);
        }
    }
    SAMFileWriter sw = null;
    SAMRecordIterator iter = null;
    try {
        sw = writingBamArgs.openSAMFileWriter(outputFile, header2, false);
        SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header1);
        iter = reader.iterator();
        while (iter.hasNext()) {
            SAMRecord rec = progress.watch(iter.next());
            if (rec.getReadUnmappedFlag()) {
                sw.addAlignment(rec);
                continue;
            }
            if (!rec.getReadPairedFlag()) {
                // @doc if the read is not a paired-end read ,  then the quality of the read is set to zero
                rec.setMappingQuality(0);
                sw.addAlignment(rec);
                continue;
            }
            if (rec.getMateUnmappedFlag()) {
                // @doc if the MATE is not mapped ,  then the quality of the current read is set to zero
                rec.setMappingQuality(0);
                sw.addAlignment(rec);
                continue;
            }
            if (!rec.getProperPairFlag()) {
                // @doc if the properly-paired flag is not set,  then the quality of the current read is set to zero
                rec.setMappingQuality(0);
                sw.addAlignment(rec);
                continue;
            }
            if (rec.getMateReferenceIndex() != rec.getReferenceIndex()) {
                // @doc if the read and the mate are not mapped on the same chromosome,  then the quality of the current read is set to zero
                rec.setMappingQuality(0);
                sw.addAlignment(rec);
                continue;
            }
            if (rec.getReadNegativeStrandFlag() == rec.getMateNegativeStrandFlag()) {
                // @doc if the read and the mate are mapped on the same strand,  then the quality of the current read is set to zero
                rec.setMappingQuality(0);
                sw.addAlignment(rec);
                continue;
            }
            int chromStart;
            int chromEnd;
            if (rec.getAlignmentStart() < rec.getMateAlignmentStart()) {
                if (rec.getReadNegativeStrandFlag()) {
                    // @doc if the read(POS) < mate(POS) and read is mapped on the negative strand, then the quality of the current read is set to zero
                    rec.setMappingQuality(0);
                    sw.addAlignment(rec);
                    continue;
                } else {
                    chromStart = rec.getAlignmentStart();
                    chromEnd = rec.getMateAlignmentStart();
                }
            } else {
                if (!rec.getReadNegativeStrandFlag()) {
                    // @doc if the read(POS) > mate(POS) and read is mapped on the positive strand, then the quality of the current read is set to zero
                    rec.setMappingQuality(0);
                    sw.addAlignment(rec);
                    continue;
                } else {
                    chromStart = rec.getMateAlignmentStart();
                    // don't use getAlignmentEnd, to be consistent with mate data
                    chromEnd = rec.getAlignmentStart();
                }
            }
            List<Interval> fragments = findIntervals(rec.getContig(), chromStart, chromEnd);
            if (fragments.isEmpty()) {
                // @doc if no BED fragment is found overlapping the read, then the quality of the read is set to zero
                rec.setMappingQuality(0);
                sw.addAlignment(rec);
                continue;
            }
            Interval best = null;
            if (fragments.size() == 1) {
                best = fragments.get(0);
            } else
                switch(this.ambiguityStrategy) {
                    case random:
                        {
                            best = fragments.get(this.random.nextInt(fragments.size()));
                            break;
                        }
                    case zero:
                        {
                            rec.setMappingQuality(0);
                            sw.addAlignment(rec);
                            continue;
                        }
                    case closer:
                        {
                            int best_distance = Integer.MAX_VALUE;
                            for (Interval frag : fragments) {
                                int distance = Math.abs(chromStart - frag.getStart()) + Math.abs(frag.getEnd() - chromEnd);
                                if (best == null || distance < best_distance) {
                                    best = frag;
                                    best_distance = distance;
                                }
                            }
                            break;
                        }
                    default:
                        throw new IllegalStateException();
                }
            if (clip_reads) {
                rec = readClipper.clip(rec, best);
                if (rec.getMappingQuality() == 0) {
                    sw.addAlignment(rec);
                    continue;
                }
            }
            SAMReadGroupRecord rg = rec.getReadGroup();
            if (rg == null) {
                throw new IOException("Read " + rec + " is missing a Read-Group ID . See http://broadinstitute.github.io/picard/ http://broadinstitute.github.io/picard/command-line-overview.html#AddOrReplaceReadGroups");
            }
            rec.setAttribute("RG", rg.getId() + "_" + best.getName());
            sw.addAlignment(rec);
        }
        progress.finish();
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(iter);
        CloserUtil.close(sw);
    }
}
Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) IOException(java.io.IOException) IOException(java.io.IOException) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) Interval(htsjdk.samtools.util.Interval)

Example 57 with SAMFileWriter

use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.

the class LocalRealignReads method doWork.

@Override
public int doWork(List<String> args) {
    if (this.faidxFile == null) {
        LOG.error("REFerence file missing;");
        return -1;
    }
    IndexedFastaSequenceFile indexedFastaSequenceFile = null;
    SamReader samReader = null;
    SAMFileWriter w = null;
    SAMRecordIterator iter = null;
    GenomicSequence genomicSequence = null;
    LongestCommonSequence matrix = new LongestCommonSequence();
    try {
        indexedFastaSequenceFile = new IndexedFastaSequenceFile(this.faidxFile);
        samReader = openSamReader(oneFileOrNull(args));
        final SAMFileHeader header1 = samReader.getFileHeader();
        final SAMFileHeader header2 = header1.clone();
        header2.setSortOrder(SortOrder.unsorted);
        w = this.writingBamArgs.setReferenceFile(faidxFile).openSAMFileWriter(outputFile, header2, true);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header1);
        iter = samReader.iterator();
        while (iter.hasNext()) {
            final SAMRecord rec = progress.watch(iter.next());
            if (rec.getReadUnmappedFlag() || rec.isSecondaryOrSupplementary() || rec.getReadFailsVendorQualityCheckFlag() || rec.getDuplicateReadFlag()) {
                w.addAlignment(rec);
                continue;
            }
            final Cigar cigar = rec.getCigar();
            final int nCigarElement = cigar.numCigarElements();
            if (nCigarElement < 2) {
                w.addAlignment(rec);
                continue;
            }
            final CigarElement ce5 = cigar.getCigarElement(0);
            final CigarElement ce3 = cigar.getCigarElement(nCigarElement - 1);
            if (!((ce3.getOperator().equals(CigarOperator.S) && ce3.getLength() >= MIN_ALIGN_LEN) || (ce5.getOperator().equals(CigarOperator.S) && ce5.getLength() >= MIN_ALIGN_LEN))) {
                w.addAlignment(rec);
                continue;
            }
            if (genomicSequence == null || !genomicSequence.getChrom().equals(rec.getReferenceName())) {
                genomicSequence = new GenomicSequence(indexedFastaSequenceFile, rec.getReferenceName());
            }
            final CharSequence readseq = rec.getReadString();
            /**
             * short invert
             */
            if (ce5.getOperator() == CigarOperator.S && ce5.getLength() >= MIN_ALIGN_LEN) {
                CharSequence clipseq = new SubSequence(readseq, 0, ce5.getLength());
                CharSequence revcomp = new RevCompCharSequence(clipseq);
                LongestCommonSequence.Hit hit = matrix.align(genomicSequence, Math.max(0, rec.getUnclippedStart() - rec.getReadLength()), Math.min(rec.getUnclippedEnd() + rec.getReadLength(), genomicSequence.length()), revcomp, 0, revcomp.length());
                if (hit.size() >= MIN_ALIGN_LEN) {
                    System.err.println("REVCOMP5' " + hit.getMatchingSequence() + " " + clipseq + " for " + readseq + " " + rec.getReadName() + " " + rec.getCigarString());
                }
            /*
					
					hit = matrix.align(
							readseq, 0, readseq.length(),
							revcomp,
							0,
							revcomp.length()
							);
					if(hit.size()>=MIN_ALIGN_LEN)
						{
						System.err.println("REVCOMP5' vs ITSELF: "+hit.getMatchingSequence()+" "+clipseq+" for "+readseq+" "+rec.getReadName()+" "+rec.getCigarString());
						}
					*/
            }
            if (ce3.getOperator() == CigarOperator.S && ce3.getLength() >= MIN_ALIGN_LEN) {
                CharSequence clipseq = new SubSequence(readseq, readseq.length() - ce3.getLength(), readseq.length());
                CharSequence revcomp = new RevCompCharSequence(clipseq);
                LongestCommonSequence.Hit hit = matrix.align(genomicSequence, Math.max(0, rec.getUnclippedStart() - rec.getReadLength()), Math.min(rec.getUnclippedEnd() + rec.getReadLength(), genomicSequence.length()), revcomp, 0, revcomp.length());
                if (hit.size() >= MIN_ALIGN_LEN) {
                    System.err.println("REVCOMP3' " + hit.getMatchingSequence() + " " + clipseq + " for " + readseq + " " + rec.getReadName());
                }
            /*
					hit = matrix.align(
							readseq, 0, readseq.length(),
							revcomp,
							0,
							revcomp.length()
							);
					if(hit.size()>=MIN_ALIGN_LEN)
						{
						System.err.println("REVCOMP3' vs ITSELF: "+hit.getMatchingSequence()+" "+clipseq+" for "+readseq+" "+rec.getReadName()+" "+rec.getCigarString());
						}*/
            }
            /* other */
            List<LongestCommonSequence.Hit> hits = new ArrayList<>();
            align(hits, matrix, genomicSequence, Math.max(0, rec.getUnclippedStart() - rec.getReadLength()), Math.min(rec.getUnclippedEnd() + rec.getReadLength(), genomicSequence.length()), readseq, 0, readseq.length(), -1);
            if (hits.size() < 2000)
                continue;
            for (LongestCommonSequence.Hit hit : hits) {
                int readstart = 0;
                boolean in_M = false;
                for (int i = 0; i < nCigarElement; ++i) {
                    final CigarElement ce = cigar.getCigarElement(i);
                    if (ce.getOperator().consumesReadBases()) {
                        int readend = readstart + ce.getLength();
                        if (ce.getOperator() == CigarOperator.M || ce.getOperator() == CigarOperator.X || ce.getOperator() == CigarOperator.EQ) {
                            if (!(hit.getEndY() <= readstart || readend <= hit.getStartY())) {
                                in_M = true;
                                break;
                            }
                        }
                        readstart = readend;
                    }
                }
                if (in_M)
                    continue;
                int align_size = hit.size();
                System.err.println(rec.getReadName() + " " + rec.getCigarString() + " " + rec.getAlignmentStart() + "-" + rec.getAlignmentEnd());
                System.err.println("REF: " + hit.getStartX() + "-" + hit.getEndX());
                System.err.println("READ " + hit.getStartY() + "-" + hit.getEndY());
                System.err.print("REF  :");
                for (int i = 0; i < align_size; ++i) {
                    System.err.print(genomicSequence.charAt(hit.getStartX() + i));
                }
                System.err.println();
                System.err.print("READ :");
                for (int i = 0; i < align_size; ++i) {
                    System.err.print(readseq.charAt(hit.getStartY() + i));
                }
                System.err.println();
            }
            System.err.println();
        }
        progress.finish();
        return RETURN_OK;
    } catch (Exception e) {
        return wrapException(e);
    } finally {
        genomicSequence = null;
        CloserUtil.close(iter);
        CloserUtil.close(samReader);
        CloserUtil.close(w);
        CloserUtil.close(indexedFastaSequenceFile);
        matrix = null;
    }
}
Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMFileWriter(htsjdk.samtools.SAMFileWriter) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) RevCompCharSequence(com.github.lindenb.jvarkit.util.bio.RevCompCharSequence) ArrayList(java.util.ArrayList) LongestCommonSequence(com.github.lindenb.jvarkit.util.align.LongestCommonSequence) CigarElement(htsjdk.samtools.CigarElement) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) SamReader(htsjdk.samtools.SamReader) Cigar(htsjdk.samtools.Cigar) SubSequence(com.github.lindenb.jvarkit.lang.SubSequence) SAMRecord(htsjdk.samtools.SAMRecord) RevCompCharSequence(com.github.lindenb.jvarkit.util.bio.RevCompCharSequence) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Example 58 with SAMFileWriter

use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.

the class ConvertBamChromosomes method doWork.

@Override
public int doWork(List<String> args) {
    SamReader sfr = null;
    SAMFileWriter sfw = null;
    final Set<String> unmappedChromosomes = new HashSet<>();
    try {
        final ContigNameConverter customMapping = ContigNameConverter.fromFile(mappingFile);
        customMapping.setOnNotFound(this.onNotFound);
        sfr = super.openSamReader(oneFileOrNull(args));
        SAMFileHeader header1 = sfr.getFileHeader();
        if (header1 == null) {
            LOG.error("File header missing");
            return -1;
        }
        SAMFileHeader header2 = header1.clone();
        // create new sequence dict
        final SAMSequenceDictionary dict1 = header1.getSequenceDictionary();
        if (dict1 == null) {
            LOG.error("Sequence dict missing");
            return -1;
        }
        final List<SAMSequenceRecord> ssrs = new ArrayList<SAMSequenceRecord>(dict1.size());
        for (int i = 0; i < dict1.size(); ++i) {
            SAMSequenceRecord ssr = dict1.getSequence(i);
            String newName = customMapping.apply(ssr.getSequenceName());
            if (newName == null) {
                // skip unknown chromosomes
                continue;
            }
            ssr = new SAMSequenceRecord(newName, ssr.getSequenceLength());
            ssrs.add(ssr);
        }
        header2.setSequenceDictionary(new SAMSequenceDictionary(ssrs));
        SAMSequenceDictionary dict2 = new SAMSequenceDictionary(ssrs);
        header2.setSequenceDictionary(dict2);
        SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict1);
        sfw = this.writingBamArgs.openSAMFileWriter(output, header2, true);
        long num_ignored = 0L;
        SAMRecordIterator iter = sfr.iterator();
        while (iter.hasNext()) {
            SAMRecord rec1 = iter.next();
            progress.watch(rec1);
            String newName1 = null;
            String newName2 = null;
            if (!SAMRecord.NO_ALIGNMENT_REFERENCE_NAME.equals(rec1.getReferenceName())) {
                newName1 = customMapping.apply(rec1.getReferenceName());
            }
            if (rec1.getReadPairedFlag() && !SAMRecord.NO_ALIGNMENT_REFERENCE_NAME.equals(rec1.getMateReferenceName())) {
                newName2 = customMapping.apply(rec1.getMateReferenceName());
            }
            rec1.setHeader(header2);
            if (!SAMRecord.NO_ALIGNMENT_REFERENCE_NAME.equals(rec1.getReferenceName())) {
                if (newName1 == null) {
                    ++num_ignored;
                    continue;
                }
                rec1.setReferenceName(newName1);
            }
            if (rec1.getReadPairedFlag() && !SAMRecord.NO_ALIGNMENT_REFERENCE_NAME.equals(rec1.getMateReferenceName())) {
                if (newName2 == null) {
                    ++num_ignored;
                    continue;
                }
                rec1.setMateReferenceName(newName2);
            }
            sfw.addAlignment(rec1);
        }
        if (!unmappedChromosomes.isEmpty()) {
            LOG.warning("Unmapped chromosomes: " + unmappedChromosomes);
        }
        LOG.warning("num ignored read:" + num_ignored);
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(sfr);
        CloserUtil.close(sfw);
    }
}
Also used : SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMFileWriter(htsjdk.samtools.SAMFileWriter) ArrayList(java.util.ArrayList) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) HashSet(java.util.HashSet)

Example 59 with SAMFileWriter

use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.

the class VcfToBam method run.

private void run(VcfIterator vcfIterator) throws IOException {
    long id_generator = 0L;
    SAMFileWriter samFileWriter = null;
    final VCFHeader header = vcfIterator.getHeader();
    SAMSequenceDictionary dict = header.getSequenceDictionary();
    if (dict == null)
        throw new IOException("Sequence Dictionary missing in VCF");
    if (!SequenceUtil.areSequenceDictionariesEqual(dict, indexedFastaSequenceFile.getSequenceDictionary())) {
        LOG.warning("REF/VCF: not the same Sequence dictonary " + dict + " " + indexedFastaSequenceFile.getSequenceDictionary());
    }
    final SAMFileHeader samHeader = new SAMFileHeader();
    samHeader.setSequenceDictionary(dict);
    for (final String sample : new TreeSet<>(header.getSampleNamesInOrder())) {
        final SAMReadGroupRecord rg = new SAMReadGroupRecord(sample);
        rg.setSample(sample);
        rg.setLibrary(sample);
        rg.setDescription(sample);
        rg.setLibrary("illumina");
        samHeader.addReadGroup(rg);
    }
    samHeader.addComment("Generated with " + getProgramCommandLine());
    samHeader.setSortOrder(SortOrder.unsorted);
    samFileWriter = this.writingBamArgs.setReferenceFile(this.faidx).openSAMFileWriter(this.outputFile, samHeader, true);
    /* looping over sequences */
    for (final SAMSequenceRecord ssr : dict.getSequences()) {
        final LinkedList<VariantContext> variantBuffer = new LinkedList<>();
        GenomicSequence genomicSequence = new GenomicSequence(this.indexedFastaSequenceFile, ssr.getSequenceName());
        int x = 1;
        while (x + this.fragmentSize <= ssr.getSequenceLength()) {
            // pop on left
            while (!variantBuffer.isEmpty() && (variantBuffer.getFirst().getStart() + this.fragmentSize * 2) < x) {
                variantBuffer.removeFirst();
            }
            // refill buffer
            while (vcfIterator.hasNext()) {
                // buffer is already by enough
                if (!variantBuffer.isEmpty() && variantBuffer.getLast().getStart() > x + 2 * fragmentSize) {
                    break;
                }
                final VariantContext ctx = vcfIterator.peek();
                if (ctx == null)
                    break;
                if (ctx.isIndel() || !ctx.isVariant()) {
                    LOG.warning("Cannot handle " + ctx.getReference() + "/" + ctx.getAlternateAlleles());
                    // consumme
                    vcfIterator.next();
                    continue;
                }
                final SAMSequenceRecord variantContig = dict.getSequence(ctx.getContig());
                if (variantContig == null)
                    throw new IOException("Unknown contig " + ctx.getContig());
                if (variantContig.getSequenceIndex() < ssr.getSequenceIndex()) {
                    // just consumme !
                    // https://github.com/lindenb/jvarkit/issues/86#issuecomment-326986654
                    // consumme
                    vcfIterator.next();
                    continue;
                } else if (variantContig.getSequenceIndex() > ssr.getSequenceIndex()) {
                    break;
                } else {
                    variantBuffer.add(vcfIterator.next());
                }
            }
            if (variantBuffer.isEmpty()) {
                LOG.info("no variant ?");
                // no variant on this chromosome
                break;
            }
            if (x < variantBuffer.getFirst().getStart() - 2 * fragmentSize) {
                x = variantBuffer.getFirst().getStart() - 2 * fragmentSize;
            }
            for (int depth = 0; depth < 1; ++depth) {
                for (String sample : header.getSampleNamesInOrder()) {
                    // loop over genomic strand
                    for (int g_strand = 0; g_strand < 2; ++g_strand) {
                        SAMRecord[] records = { new SAMRecord(samHeader), new SAMRecord(samHeader) };
                        String readName = String.format("%010d", ++id_generator);
                        for (int R = 0; R < 2; ++R) {
                            records[R].setReferenceName(ssr.getSequenceName());
                            records[R].setReadPairedFlag(true);
                            records[R].setReadName(readName);
                            records[R].setMappingQuality(60);
                            records[R].setProperPairFlag(true);
                            records[R].setMateReferenceName(ssr.getSequenceName());
                            records[R].setMateUnmappedFlag(false);
                            records[R].setReadUnmappedFlag(false);
                            records[R].setAttribute("RG", sample);
                            records[R].setReadNegativeStrandFlag(R == 1);
                            StringBuilder sequence = new StringBuilder(this.readSize);
                            int readLen = 0;
                            int refPos1 = (R == 0 ? x : x + this.fragmentSize - this.readSize);
                            records[R].setAlignmentStart(refPos1);
                            List<CigarElement> cigarElements = new ArrayList<>(this.readSize);
                            int NM = 0;
                            while (readLen < this.readSize) {
                                String base = null;
                                VariantContext overlap = null;
                                for (VariantContext vc : variantBuffer) {
                                    int d = vc.getStart() - (refPos1 + readLen);
                                    if (d == 0) {
                                        overlap = vc;
                                        break;
                                    }
                                    if (d > 0)
                                        break;
                                }
                                if (overlap != null) {
                                    Genotype genotype = overlap.getGenotype(sample);
                                    if (genotype.isCalled() && !genotype.isMixed()) {
                                        List<Allele> alleles = genotype.getAlleles();
                                        if (alleles.size() != 2)
                                            throw new RuntimeException("Not a diploid organism.");
                                        Allele allele = null;
                                        if (genotype.isPhased()) {
                                            allele = alleles.get(g_strand);
                                        } else {
                                            allele = alleles.get(Math.random() < 0.5 ? 0 : 1);
                                        }
                                        if (allele.isSymbolic())
                                            throw new RuntimeException("Cannot handle symbolic alleles");
                                        if (allele.isReference()) {
                                            cigarElements.add(new CigarElement(allele.getBaseString().length(), CigarOperator.EQ));
                                        } else if (allele.getBaseString().length() < overlap.getReference().getBaseString().length()) {
                                            cigarElements.add(new CigarElement(allele.getBaseString().length(), CigarOperator.D));
                                            NM++;
                                        } else if (allele.getBaseString().length() > overlap.getReference().getBaseString().length()) {
                                            cigarElements.add(new CigarElement(allele.getBaseString().length(), CigarOperator.I));
                                            NM++;
                                        } else // same size
                                        {
                                            cigarElements.add(new CigarElement(allele.getBaseString().length(), CigarOperator.X));
                                            NM++;
                                        }
                                        base = allele.getBaseString().toLowerCase();
                                    }
                                }
                                if (base == null) {
                                    base = String.valueOf(genomicSequence.charAt(refPos1 - 1 + readLen));
                                    cigarElements.add(new CigarElement(1, CigarOperator.EQ));
                                }
                                sequence.append(base);
                                readLen += base.length();
                            }
                            while (sequence.length() > this.readSize) sequence.deleteCharAt(sequence.length() - 1);
                            records[R].setReadString(sequence.toString());
                            StringBuilder qual = new StringBuilder(sequence.length());
                            for (int i = 0; i < sequence.length(); ++i) qual.append("I");
                            records[R].setBaseQualityString(qual.toString());
                            // cigar
                            int c = 0;
                            while (c + 1 < cigarElements.size()) {
                                CigarElement c0 = cigarElements.get(c);
                                CigarElement c1 = cigarElements.get(c + 1);
                                if (c0.getOperator().equals(c1.getOperator())) {
                                    cigarElements.set(c, new CigarElement(c0.getLength() + c1.getLength(), c0.getOperator()));
                                    cigarElements.remove(c + 1);
                                } else {
                                    ++c;
                                }
                            }
                            records[R].setCigar(new Cigar(cigarElements));
                            records[R].setAttribute("NM", NM);
                        }
                        if (Math.random() < 0.5) {
                            records[0].setFirstOfPairFlag(true);
                            records[0].setSecondOfPairFlag(false);
                            records[1].setFirstOfPairFlag(false);
                            records[1].setSecondOfPairFlag(true);
                        } else {
                            records[0].setFirstOfPairFlag(false);
                            records[0].setSecondOfPairFlag(true);
                            records[1].setFirstOfPairFlag(true);
                            records[1].setSecondOfPairFlag(false);
                        }
                        records[0].setMateAlignmentStart(records[1].getAlignmentStart());
                        records[1].setMateAlignmentStart(records[0].getAlignmentStart());
                        records[0].setInferredInsertSize(records[1].getAlignmentStart() - records[0].getAlignmentStart());
                        records[1].setInferredInsertSize(records[0].getAlignmentStart() - records[1].getAlignmentStart());
                        samFileWriter.addAlignment(records[0]);
                        samFileWriter.addAlignment(records[1]);
                    }
                }
            }
            ++x;
        }
    }
    samFileWriter.close();
}
Also used : SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) TreeSet(java.util.TreeSet) VCFHeader(htsjdk.variant.vcf.VCFHeader) SAMFileWriter(htsjdk.samtools.SAMFileWriter) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) Genotype(htsjdk.variant.variantcontext.Genotype) IOException(java.io.IOException) CigarElement(htsjdk.samtools.CigarElement) LinkedList(java.util.LinkedList) Allele(htsjdk.variant.variantcontext.Allele) Cigar(htsjdk.samtools.Cigar) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Example 60 with SAMFileWriter

use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.

the class SamMaskAlignedBases method doWork.

@Override
public int doWork(List<String> args) {
    final byte RESET_CHAR = (byte) 'N';
    final byte RESET_QUAL = (byte) SAMUtils.fastqToPhred('#');
    long nRecords = 0L;
    long nRecordMasked = 0L;
    long nBasesMasked = 0L;
    long nBases = 0L;
    SAMRecordIterator iter = null;
    SamReader sfr = null;
    SAMFileWriter sfw = null;
    try {
        sfr = openSamReader(oneFileOrNull(args));
        final SAMFileHeader header1 = sfr.getFileHeader();
        if (header1 == null) {
            LOG.error("File header missing");
            return -1;
        }
        final SAMFileHeader header2 = header1.clone();
        header2.addComment(getProgramName() + ":" + getVersion() + ":" + getProgramCommandLine());
        header2.setSortOrder(SortOrder.unsorted);
        sfw = this.writingBamArgs.openSAMFileWriter(outputFile, header2, true);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header1);
        iter = sfr.iterator();
        while (iter.hasNext()) {
            final SAMRecord rec = progress.watch(iter.next());
            ++nRecords;
            nBases += rec.getReadLength();
            if (rec.getReadUnmappedFlag()) {
                SAMUtils.makeReadUnmapped(rec);
                sfw.addAlignment(rec);
                continue;
            }
            if (rec.isSecondaryOrSupplementary()) {
                continue;
            }
            final Cigar cigar = rec.getCigar();
            byte[] bases = rec.getReadBases();
            byte[] quals = rec.getBaseQualities();
            if (cigar == null || cigar.isEmpty()) {
                SAMUtils.makeReadUnmapped(rec);
                sfw.addAlignment(rec);
                continue;
            }
            int readpos = 0;
            for (final CigarElement ce : cigar) {
                final CigarOperator op = ce.getOperator();
                if (op.consumesReadBases()) {
                    if (op.consumesReferenceBases()) {
                        for (int x = 0; x < ce.getLength(); ++x) {
                            if (bases != null)
                                bases[readpos + x] = RESET_CHAR;
                            if (quals != null)
                                quals[readpos + x] = RESET_QUAL;
                            ++nBasesMasked;
                        }
                    }
                    readpos += ce.getLength();
                }
            }
            ++nRecordMasked;
            SAMUtils.makeReadUnmapped(rec);
            sfw.addAlignment(rec);
        }
        iter.close();
        sfw.close();
        progress.finish();
        LOG.info("done : reads masked " + nRecordMasked + "/" + nRecords + " Bases masked:" + nBasesMasked + "/" + nBases);
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(iter);
        CloserUtil.close(sfr);
        CloserUtil.close(sfw);
    }
}
Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMFileWriter(htsjdk.samtools.SAMFileWriter) CigarOperator(htsjdk.samtools.CigarOperator) CigarElement(htsjdk.samtools.CigarElement) SamReader(htsjdk.samtools.SamReader) Cigar(htsjdk.samtools.Cigar) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Aggregations

SAMFileWriter (htsjdk.samtools.SAMFileWriter)76 SAMRecord (htsjdk.samtools.SAMRecord)63 SAMFileHeader (htsjdk.samtools.SAMFileHeader)55 SamReader (htsjdk.samtools.SamReader)55 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)46 File (java.io.File)40 SAMFileWriterFactory (htsjdk.samtools.SAMFileWriterFactory)27 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)25 IOException (java.io.IOException)22 ArrayList (java.util.ArrayList)20 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)14 Cigar (htsjdk.samtools.Cigar)13 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)13 CigarElement (htsjdk.samtools.CigarElement)12 SamReaderFactory (htsjdk.samtools.SamReaderFactory)12 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)10 Interval (htsjdk.samtools.util.Interval)9 PrintWriter (java.io.PrintWriter)9 List (java.util.List)9 HashMap (java.util.HashMap)8