Search in sources :

Example 1 with SAMRecordFactory

use of htsjdk.samtools.SAMRecordFactory in project jvarkit by lindenb.

the class BWAMemNOp method doWork.

@Override
public int doWork(List<String> args) {
    SamReader r = null;
    SAMFileWriter w = null;
    try {
        r = super.openSamReader(oneFileOrNull(args));
        SAMFileHeader header = r.getFileHeader();
        OtherCanonicalAlignFactory ocaf = new OtherCanonicalAlignFactory(header);
        w = writingBamArgs.openSAMFileWriter(outputFile, header, true);
        SAMRecordFactory samRecordFactory = new DefaultSAMRecordFactory();
        SAMRecordIterator iter = r.iterator();
        while (iter.hasNext()) {
            SAMRecord rec = iter.next();
            if (rec.getSupplementaryAlignmentFlag()) {
                continue;
            }
            if (rec.getReadUnmappedFlag()) {
                if (!print_only_spit_read)
                    w.addAlignment(rec);
                continue;
            }
            Cigar cigar1 = rec.getCigar();
            if (cigar1 == null || cigar1.isEmpty() || !(cigar1.getCigarElement(cigar1.numCigarElements() - 1).getOperator().equals(CigarOperator.S) || cigar1.getCigarElement(0).getOperator().equals(CigarOperator.S))) // last or first is soft clipping
            {
                if (!print_only_spit_read)
                    w.addAlignment(rec);
                continue;
            }
            rec.getAlignmentStart();
            List<OtherCanonicalAlign> xps = ocaf.getXPAligns(rec);
            if (xps.isEmpty()) {
                if (!print_only_spit_read)
                    w.addAlignment(rec);
                continue;
            }
            boolean found_one = false;
            for (OtherCanonicalAlign xp : xps) {
                if (!rec.getReferenceName().equals(xp.getReferenceName()))
                    continue;
                if (xp.getReadNegativeStrandFlag() != rec.getReadNegativeStrandFlag())
                    continue;
                Cigar cigar2 = xp.getCigar();
                if (cigar2 == null || cigar2.isEmpty()) {
                    continue;
                }
                SAMRecord newrec = null;
                List<CigarEvt> L1 = null;
                List<CigarEvt> L2 = null;
                if (cigar1.getCigarElement(cigar1.numCigarElements() - 1).getOperator().equals(CigarOperator.S) && cigar1.getCigarElement(cigar1.numCigarElements() - 1).getLength() >= this.min_soft_clip_length && cigar2.getCigarElement(0).getOperator().equals(CigarOperator.S) && cigar2.getCigarElement(0).getLength() >= this.min_soft_clip_length && rec.getAlignmentEnd() < xp.getAlignmentStart()) {
                    newrec = samRecordFactory.createSAMRecord(header);
                    int ref1 = rec.getAlignmentStart();
                    newrec.setAlignmentStart(ref1);
                    L1 = cigarEvents(0, ref1, cigar1);
                    L2 = cigarEvents(0, xp.getAlignmentStart(), cigar2);
                } else if (cigar2.getCigarElement(cigar2.numCigarElements() - 1).getOperator().equals(CigarOperator.S) && cigar2.getCigarElement(cigar2.numCigarElements() - 1).getLength() >= this.min_soft_clip_length && cigar1.getCigarElement(0).getOperator().equals(CigarOperator.S) && cigar1.getCigarElement(0).getLength() >= this.min_soft_clip_length && xp.getAlignmentEnd() < rec.getAlignmentStart()) {
                    newrec = samRecordFactory.createSAMRecord(header);
                    int ref1 = xp.getAlignmentStart();
                    newrec.setAlignmentStart(ref1);
                    L1 = cigarEvents(0, ref1, cigar2);
                    L2 = cigarEvents(0, rec.getAlignmentStart(), cigar1);
                }
                if (newrec == null)
                    continue;
                newrec.setFlags(rec.getFlags());
                newrec.setReadName(rec.getReadName());
                newrec.setReadBases(rec.getReadBases());
                newrec.setMappingQuality(rec.getMappingQuality());
                newrec.setReferenceIndex(rec.getReferenceIndex());
                newrec.setBaseQualities(rec.getBaseQualities());
                if (found_one) {
                    newrec.setNotPrimaryAlignmentFlag(true);
                }
                found_one = true;
                for (SAMTagAndValue tav : rec.getAttributes()) {
                    if (tav.tag.equals(ocaf.getAttributeKey()))
                        continue;
                    if (tav.tag.equals("NM"))
                        continue;
                    newrec.setAttribute(tav.tag, tav.value);
                }
                if (rec.getReadPairedFlag() && !rec.getMateUnmappedFlag()) {
                    newrec.setMateAlignmentStart(rec.getMateAlignmentStart());
                    newrec.setMateReferenceIndex(rec.getMateReferenceIndex());
                    newrec.setInferredInsertSize(rec.getInferredInsertSize());
                }
                while (!L1.isEmpty() && (L1.get(L1.size() - 1).op.equals(CigarOperator.S) || L1.get(L1.size() - 1).op.equals(CigarOperator.D) || L1.get(L1.size() - 1).op.equals(CigarOperator.H))) {
                    L1.remove(L1.size() - 1);
                }
                while (!L2.isEmpty() && L2.get(0).read0 <= L1.get(L1.size() - 1).read0) {
                    L2.remove(0);
                }
                List<CigarElement> cigarElements = new ArrayList<CigarElement>();
                int i = 0;
                while (i < L1.size()) {
                    int j = i + 1;
                    while (j < L1.size() && L1.get(i).op.equals(L1.get(j).op)) {
                        j++;
                    }
                    cigarElements.add(new CigarElement(j - i, L1.get(i).op));
                    i = j;
                }
                // add 'N'
                cigarElements.add(new CigarElement((L2.get(0).ref1 - L1.get(L1.size() - 1).ref1) - 1, CigarOperator.N));
                i = 0;
                while (i < L2.size()) {
                    int j = i + 1;
                    while (j < L2.size() && L2.get(i).op.equals(L2.get(j).op)) {
                        j++;
                    }
                    cigarElements.add(new CigarElement(j - i, L2.get(i).op));
                    i = j;
                }
                // cleanup : case where  'S' is close to 'N'
                i = 0;
                while (i + 1 < cigarElements.size()) {
                    CigarElement ce1 = cigarElements.get(i);
                    CigarElement ce2 = cigarElements.get(i + 1);
                    if (i > 0 && ce1.getOperator().equals(CigarOperator.S) && ce2.getOperator().equals(CigarOperator.N)) {
                        cigarElements.set(i, new CigarElement(ce1.getLength(), CigarOperator.X));
                    } else if (i + 2 < cigarElements.size() && ce1.getOperator().equals(CigarOperator.N) && ce2.getOperator().equals(CigarOperator.S)) {
                        cigarElements.set(i + 1, new CigarElement(ce2.getLength(), CigarOperator.X));
                    }
                    i++;
                }
                newrec.setCigar(new Cigar(cigarElements));
                List<SAMValidationError> validations = newrec.isValid();
                if (validations != null) {
                    for (SAMValidationError err : validations) {
                        LOG.warning(err.getType() + ":" + err.getMessage());
                    }
                }
                w.addAlignment(newrec);
            }
            if (!found_one) {
                if (!print_only_spit_read)
                    w.addAlignment(rec);
            }
        }
        iter.close();
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(r);
        CloserUtil.close(w);
    }
}
Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) OtherCanonicalAlign(com.github.lindenb.jvarkit.util.picard.OtherCanonicalAlign) SAMFileWriter(htsjdk.samtools.SAMFileWriter) ArrayList(java.util.ArrayList) OtherCanonicalAlignFactory(com.github.lindenb.jvarkit.util.picard.OtherCanonicalAlignFactory) DefaultSAMRecordFactory(htsjdk.samtools.DefaultSAMRecordFactory) CigarElement(htsjdk.samtools.CigarElement) SamReader(htsjdk.samtools.SamReader) SAMValidationError(htsjdk.samtools.SAMValidationError) Cigar(htsjdk.samtools.Cigar) SAMRecord(htsjdk.samtools.SAMRecord) DefaultSAMRecordFactory(htsjdk.samtools.DefaultSAMRecordFactory) SAMRecordFactory(htsjdk.samtools.SAMRecordFactory) SAMFileHeader(htsjdk.samtools.SAMFileHeader) SAMTagAndValue(htsjdk.samtools.SAMRecord.SAMTagAndValue)

Example 2 with SAMRecordFactory

use of htsjdk.samtools.SAMRecordFactory in project jvarkit by lindenb.

the class BlastToSam method convertIterationToSequenceIteration.

private SequenceIteration convertIterationToSequenceIteration(final List<Iteration> stack, final SAMFileHeader header) throws XMLStreamException, JAXBException {
    final SequenceIteration sequenceIteration = new SequenceIteration();
    if (stack.isEmpty())
        return sequenceIteration;
    final SAMReadGroupRecord rg1 = header.getReadGroup("g1");
    // sequenceIteration.iteration=iter1;
    final SAMRecordFactory samRecordFactory = new DefaultSAMRecordFactory();
    final StringBuilder readContent = new StringBuilder();
    final int iterLength = Integer.parseInt(stack.get(0).getIterationQueryLen());
    for (final Iteration iter1 : stack) {
        for (final Hit hit : iter1.getIterationHits().getHit()) {
            for (final Hsp hsp : hit.getHitHsps().getHsp()) {
                for (final BlastHspAlignment.Align a : new BlastHspAlignment(hsp)) {
                    char c = a.getQueryChar();
                    if (!Character.isLetter(c))
                        continue;
                    final int queryIndex0 = a.getQueryIndex1() - 1;
                    while (readContent.length() <= queryIndex0) readContent.append('N');
                    if (readContent.charAt(queryIndex0) == 'N') {
                        readContent.setCharAt(queryIndex0, c);
                    } else if (readContent.charAt(queryIndex0) != c) {
                        throw new IllegalStateException("Expected character '" + readContent.charAt(queryIndex0) + "' but got '" + c + "' at " + queryIndex0 + "\n" + hsp.getHspQseq() + "\n" + hsp.getHspMidline() + "\n" + hsp.getHspHseq() + "\n" + readContent + "\n");
                    }
                }
            }
        }
    }
    for (Iteration iter1 : stack) {
        for (Hit hit : iter1.getIterationHits().getHit()) {
            for (Hsp hsp : hit.getHitHsps().getHsp()) {
                SAMRecord rec = samRecordFactory.createSAMRecord(header);
                rec.setReadUnmappedFlag(false);
                rec.setReadName(iter1.getIterationQueryDef());
                if (hit.getHitAccession() != null && !hit.getHitAccession().trim().isEmpty() && this.dictionary.getSequence(hit.getHitAccession()) != null) {
                    rec.setReferenceName(hit.getHitAccession());
                } else {
                    rec.setReferenceName(hit.getHitDef());
                }
                final SAMSequenceRecord ssr = this.dictionary.getSequence(hit.getHitDef());
                if (ssr == null) {
                    LOG.warn("Hit is not in SAMDictionary " + hit.getHitDef());
                    rec.setReferenceIndex(-1);
                } else {
                    rec.setReferenceIndex(ssr.getSequenceIndex());
                }
                final BlastHspAlignment blastHspAlignment = new BlastHspAlignment(hsp);
                rec.setReadNegativeStrandFlag(blastHspAlignment.isPlusMinus());
                final List<CigarOperator> cigarL = new ArrayList<CigarOperator>();
                for (BlastHspAlignment.Align a : blastHspAlignment) {
                    // System.err.println("##"+a);
                    if (a.getMidChar() == '|') {
                        cigarL.add(CigarOperator.EQ);
                    } else if (a.getMidChar() == ':') {
                        cigarL.add(CigarOperator.M);
                    } else if (a.getHitChar() == '-') {
                        cigarL.add(CigarOperator.I);
                    } else if (a.getQueryChar() == '-') {
                        cigarL.add(CigarOperator.D);
                    } else {
                        cigarL.add(CigarOperator.X);
                    }
                }
                if (cigarL.size() != hsp.getHspMidline().length()) {
                    throw new IllegalStateException("Boumm");
                }
                Cigar cigarE = new Cigar();
                if (blastHspAlignment.getQueryFrom1() > 1) {
                    cigarE.add(new CigarElement(blastHspAlignment.getQueryFrom1() - 1, CigarOperator.S));
                }
                int x = 0;
                while (x < cigarL.size()) {
                    int y = x + 1;
                    while (y < cigarL.size() && cigarL.get(x) == cigarL.get(y)) {
                        ++y;
                    }
                    cigarE.add(new CigarElement(y - x, cigarL.get(x)));
                    x = y;
                }
                /* soft clip */
                if (blastHspAlignment.getQueryTo1() < readContent.length()) {
                    cigarE.add(new CigarElement((readContent.length() - blastHspAlignment.getQueryTo1()), CigarOperator.S));
                }
                /* hard clip */
                if (readContent.length() < iterLength) {
                    cigarE.add(new CigarElement((iterLength - readContent.length()), CigarOperator.H));
                }
                rec.setCigar(cigarE);
                rec.setMappingQuality(40);
                rec.setAlignmentStart(Math.min(blastHspAlignment.getHitFrom1(), blastHspAlignment.getHitTo1()));
                rec.setAttribute("BB", Float.parseFloat(hsp.getHspBitScore()));
                rec.setAttribute("BE", Float.parseFloat(hsp.getHspEvalue()));
                rec.setAttribute("BS", Float.parseFloat(hsp.getHspScore()));
                rec.setAttribute("NM", Integer.parseInt(hsp.getHspGaps()));
                rec.setAttribute("RG", rg1.getId());
                // setAlignmentEnd not supported in SAM API
                // rec.setAlignmentEnd(Math.max(blastHspAlignment.getHitFrom1(),blastHspAlignment.getHitTo1()));
                sequenceIteration.records.add(rec);
            }
        }
    }
    if (readContent.length() == 0) {
        readContent.append('N');
    }
    byte[] readBases = readContent.toString().getBytes();
    char[] readQuals = new char[readBases.length];
    for (int i = 0; i < readBases.length; ++i) {
        readQuals[i] = (readBases[i] == 'N' ? '#' : 'J');
    }
    if (sequenceIteration.records.isEmpty()) {
        SAMRecord rec = samRecordFactory.createSAMRecord(header);
        rec.setReadName(stack.get(0).getIterationQueryDef());
        rec.setReadUnmappedFlag(true);
        rec.setAttribute("RG", rg1.getId());
        sequenceIteration.records.add(rec);
    }
    for (SAMRecord rec : sequenceIteration.records) {
        rec.setReadString(new String(readBases));
        rec.setReadBases(readBases);
        rec.setBaseQualityString(new String(readQuals, 0, readQuals.length));
        rec.setBaseQualities(htsjdk.samtools.SAMUtils.fastqToPhred(new String(readQuals, 0, readQuals.length)));
    }
    return sequenceIteration;
}
Also used : Hsp(gov.nih.nlm.ncbi.blast.Hsp) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) BlastHspAlignment(com.github.lindenb.jvarkit.util.bio.blast.BlastHspAlignment) ArrayList(java.util.ArrayList) Iteration(gov.nih.nlm.ncbi.blast.Iteration) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) CigarOperator(htsjdk.samtools.CigarOperator) DefaultSAMRecordFactory(htsjdk.samtools.DefaultSAMRecordFactory) CigarElement(htsjdk.samtools.CigarElement) Hit(gov.nih.nlm.ncbi.blast.Hit) Cigar(htsjdk.samtools.Cigar) SAMRecord(htsjdk.samtools.SAMRecord) DefaultSAMRecordFactory(htsjdk.samtools.DefaultSAMRecordFactory) SAMRecordFactory(htsjdk.samtools.SAMRecordFactory)

Aggregations

Cigar (htsjdk.samtools.Cigar)2 CigarElement (htsjdk.samtools.CigarElement)2 DefaultSAMRecordFactory (htsjdk.samtools.DefaultSAMRecordFactory)2 SAMRecord (htsjdk.samtools.SAMRecord)2 SAMRecordFactory (htsjdk.samtools.SAMRecordFactory)2 ArrayList (java.util.ArrayList)2 BlastHspAlignment (com.github.lindenb.jvarkit.util.bio.blast.BlastHspAlignment)1 OtherCanonicalAlign (com.github.lindenb.jvarkit.util.picard.OtherCanonicalAlign)1 OtherCanonicalAlignFactory (com.github.lindenb.jvarkit.util.picard.OtherCanonicalAlignFactory)1 Hit (gov.nih.nlm.ncbi.blast.Hit)1 Hsp (gov.nih.nlm.ncbi.blast.Hsp)1 Iteration (gov.nih.nlm.ncbi.blast.Iteration)1 CigarOperator (htsjdk.samtools.CigarOperator)1 SAMFileHeader (htsjdk.samtools.SAMFileHeader)1 SAMFileWriter (htsjdk.samtools.SAMFileWriter)1 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)1 SAMTagAndValue (htsjdk.samtools.SAMRecord.SAMTagAndValue)1 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)1 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)1 SAMValidationError (htsjdk.samtools.SAMValidationError)1