Search in sources :

Example 1 with BlastHspAlignment

use of com.github.lindenb.jvarkit.util.bio.blast.BlastHspAlignment in project jvarkit by lindenb.

the class MergeSplittedBlast method merge.

private Hit merge(List<Hit> hits) {
    Hit first = hits.get(0);
    Split firstSplit = parse(first);
    Hit newHit = new Hit();
    newHit.setHitAccession(first.getHitAccession());
    newHit.setHitDef(firstSplit.chrom);
    newHit.setHitId(first.getHitId());
    newHit.setHitNum(first.getHitNum());
    newHit.setHitLen(String.valueOf(firstSplit.len));
    newHit.setHitHsps(new HitHsps());
    List<Hsp> hsps = new ArrayList<Hsp>();
    for (Hit h0 : hits) {
        /* fix hit_from/hit_to */
        Split split = parse(h0);
        List<Hsp> L = h0.getHitHsps().getHsp();
        for (int i = 0; i < L.size(); ++i) {
            Hsp newHsp = BlastHspAlignment.cloneHsp(L.get(i));
            BlastHspAlignment aln = new BlastHspAlignment(newHsp);
            int h_from = aln.getHitFrom1();
            int h_to = aln.getHitTo1();
            h_from += (split.start1 - 1);
            h_to += (split.start1 - 1);
            newHsp.setHspHitFrom(String.valueOf(h_from));
            newHsp.setHspHitTo(String.valueOf(h_to));
            hsps.add(newHsp);
        }
    }
    boolean done = false;
    while (!done) {
        done = true;
        for (int i = 0; i + 1 < hsps.size(); ++i) {
            Hsp hsp0 = hsps.get(i);
            for (int j = i + 1; j < hsps.size(); ++j) {
                // debug("comparing hsp "+i+" vs "+j+" N="+hsps.size());
                Hsp hsp1 = hsps.get(j);
                Hsp newHitHsp = merge(hsp0, hsp1);
                if (newHitHsp != null) {
                    hsps.set(i, newHitHsp);
                    hsps.remove(j);
                    done = false;
                    break;
                }
            }
            if (!done)
                break;
        }
    }
    for (int i = 0; i < hsps.size(); ++i) hsps.get(i).setHspNum(String.valueOf(i + 1));
    newHit.getHitHsps().getHsp().addAll(hsps);
    return newHit;
}
Also used : Hit(gov.nih.nlm.ncbi.blast.Hit) HitHsps(gov.nih.nlm.ncbi.blast.HitHsps) Hsp(gov.nih.nlm.ncbi.blast.Hsp) ArrayList(java.util.ArrayList) BlastHspAlignment(com.github.lindenb.jvarkit.util.bio.blast.BlastHspAlignment)

Example 2 with BlastHspAlignment

use of com.github.lindenb.jvarkit.util.bio.blast.BlastHspAlignment in project jvarkit by lindenb.

the class MergeSplittedBlast method merge.

private Hsp merge(Hsp hh0, Hsp hh1) {
    BlastHspAlignment aln0 = new BlastHspAlignment(hh0);
    BlastHspAlignment aln1 = new BlastHspAlignment(hh1);
    // order hsp on query pos
    if (aln0.getQueryFrom1() > aln1.getQueryFrom1()) {
        Hsp th = hh0;
        hh0 = hh1;
        hh1 = th;
        aln0 = new BlastHspAlignment(hh0);
        aln1 = new BlastHspAlignment(hh1);
    }
    /* not the same strand */
    if (aln0.isPlusPlus() != aln1.isPlusPlus()) {
        // debug("strand");
        return null;
    }
    /* hits overlap */
    if (!overlap(aln0.getHitFrom1(), aln0.getHitTo1(), aln1.getHitFrom1(), aln1.getHitTo1())) {
        // debug("no overlap hit");
        return null;
    }
    /* query overlap */
    if (!overlap(aln0.getQueryFrom1(), aln0.getQueryTo1(), aln1.getQueryFrom1(), aln1.getQueryTo1())) {
        // debug("no overlap query");
        return null;
    }
    // hit1 is contained in hit0
    if (aln0.getQueryFrom1() <= aln1.getQueryFrom1() && aln1.getQueryTo1() <= aln0.getQueryTo1()) {
        // debug("contained");
        return hh0;
    }
    StringBuilder qsb = new StringBuilder();
    StringBuilder msb = new StringBuilder();
    StringBuilder hsb = new StringBuilder();
    int expect_hit = -1;
    int found_hit = -1;
    for (BlastHspAlignment.Align a : aln0) {
        if (a.getQueryIndex1() >= aln1.getQueryFrom1()) {
            // debug("###BREAK###############");
            expect_hit = a.getHitIndex1();
            break;
        }
        qsb.append(a.getQueryChar());
        msb.append(a.getMidChar());
        hsb.append(a.getHitChar());
    }
    if (expect_hit == -1) {
        // debug("HU?");
        return null;
    }
    for (BlastHspAlignment.Align a : aln1) {
        if (a.getQueryIndex1() < aln1.getQueryFrom1())
            continue;
        if (found_hit == -1) {
            found_hit = a.getHitIndex1();
            if (expect_hit != found_hit) {
                LOG.info("Not the expected hit position " + expect_hit + "/" + found_hit);
                return null;
            }
        }
        qsb.append(a.getQueryChar());
        msb.append(a.getMidChar());
        hsb.append(a.getHitChar());
    }
    // info("\n"+qsb+"\n"+msb+"\n"+hsb);
    Hsp newHsp = BlastHspAlignment.cloneHsp(hh0);
    newHsp.setHspAlignLen(String.valueOf(msb.length()));
    newHsp.setHspMidline(msb.toString());
    newHsp.setHspQseq(qsb.toString());
    newHsp.setHspHseq(hsb.toString());
    newHsp.setHspQueryFrom(String.valueOf(Math.min(aln0.getQueryFrom1(), aln1.getQueryFrom1())));
    newHsp.setHspQueryTo(String.valueOf(Math.max(aln0.getQueryTo1(), aln1.getQueryTo1())));
    if (aln0.isPlusPlus()) {
        newHsp.setHspHitFrom(String.valueOf(Math.min(aln0.getHitFrom1(), aln1.getHitFrom1())));
        newHsp.setHspHitTo(String.valueOf(Math.max(aln0.getHitTo1(), aln1.getHitTo1())));
    } else {
        newHsp.setHspHitFrom(String.valueOf(Math.max(aln0.getHitFrom1(), aln1.getHitFrom1())));
        newHsp.setHspHitTo(String.valueOf(Math.min(aln0.getHitTo1(), aln1.getHitTo1())));
    }
    newHsp.setHspGaps(String.valueOf(newHsp.getHspMidline().replaceAll("[^ ]", "").length()));
    newHsp.setHspScore(String.valueOf(newHsp.getHspMidline().replaceAll("[ ]", "").length()));
    // debug("success");
    return newHsp;
}
Also used : Hsp(gov.nih.nlm.ncbi.blast.Hsp) BlastHspAlignment(com.github.lindenb.jvarkit.util.bio.blast.BlastHspAlignment)

Example 3 with BlastHspAlignment

use of com.github.lindenb.jvarkit.util.bio.blast.BlastHspAlignment in project jvarkit by lindenb.

the class BlastToSam method convertIterationToSequenceIteration.

private SequenceIteration convertIterationToSequenceIteration(final List<Iteration> stack, final SAMFileHeader header) throws XMLStreamException, JAXBException {
    final SequenceIteration sequenceIteration = new SequenceIteration();
    if (stack.isEmpty())
        return sequenceIteration;
    final SAMReadGroupRecord rg1 = header.getReadGroup("g1");
    // sequenceIteration.iteration=iter1;
    final SAMRecordFactory samRecordFactory = new DefaultSAMRecordFactory();
    final StringBuilder readContent = new StringBuilder();
    final int iterLength = Integer.parseInt(stack.get(0).getIterationQueryLen());
    for (final Iteration iter1 : stack) {
        for (final Hit hit : iter1.getIterationHits().getHit()) {
            for (final Hsp hsp : hit.getHitHsps().getHsp()) {
                for (final BlastHspAlignment.Align a : new BlastHspAlignment(hsp)) {
                    char c = a.getQueryChar();
                    if (!Character.isLetter(c))
                        continue;
                    final int queryIndex0 = a.getQueryIndex1() - 1;
                    while (readContent.length() <= queryIndex0) readContent.append('N');
                    if (readContent.charAt(queryIndex0) == 'N') {
                        readContent.setCharAt(queryIndex0, c);
                    } else if (readContent.charAt(queryIndex0) != c) {
                        throw new IllegalStateException("Expected character '" + readContent.charAt(queryIndex0) + "' but got '" + c + "' at " + queryIndex0 + "\n" + hsp.getHspQseq() + "\n" + hsp.getHspMidline() + "\n" + hsp.getHspHseq() + "\n" + readContent + "\n");
                    }
                }
            }
        }
    }
    for (Iteration iter1 : stack) {
        for (Hit hit : iter1.getIterationHits().getHit()) {
            for (Hsp hsp : hit.getHitHsps().getHsp()) {
                SAMRecord rec = samRecordFactory.createSAMRecord(header);
                rec.setReadUnmappedFlag(false);
                rec.setReadName(iter1.getIterationQueryDef());
                if (hit.getHitAccession() != null && !hit.getHitAccession().trim().isEmpty() && this.dictionary.getSequence(hit.getHitAccession()) != null) {
                    rec.setReferenceName(hit.getHitAccession());
                } else {
                    rec.setReferenceName(hit.getHitDef());
                }
                final SAMSequenceRecord ssr = this.dictionary.getSequence(hit.getHitDef());
                if (ssr == null) {
                    LOG.warn("Hit is not in SAMDictionary " + hit.getHitDef());
                    rec.setReferenceIndex(-1);
                } else {
                    rec.setReferenceIndex(ssr.getSequenceIndex());
                }
                final BlastHspAlignment blastHspAlignment = new BlastHspAlignment(hsp);
                rec.setReadNegativeStrandFlag(blastHspAlignment.isPlusMinus());
                final List<CigarOperator> cigarL = new ArrayList<CigarOperator>();
                for (BlastHspAlignment.Align a : blastHspAlignment) {
                    // System.err.println("##"+a);
                    if (a.getMidChar() == '|') {
                        cigarL.add(CigarOperator.EQ);
                    } else if (a.getMidChar() == ':') {
                        cigarL.add(CigarOperator.M);
                    } else if (a.getHitChar() == '-') {
                        cigarL.add(CigarOperator.I);
                    } else if (a.getQueryChar() == '-') {
                        cigarL.add(CigarOperator.D);
                    } else {
                        cigarL.add(CigarOperator.X);
                    }
                }
                if (cigarL.size() != hsp.getHspMidline().length()) {
                    throw new IllegalStateException("Boumm");
                }
                Cigar cigarE = new Cigar();
                if (blastHspAlignment.getQueryFrom1() > 1) {
                    cigarE.add(new CigarElement(blastHspAlignment.getQueryFrom1() - 1, CigarOperator.S));
                }
                int x = 0;
                while (x < cigarL.size()) {
                    int y = x + 1;
                    while (y < cigarL.size() && cigarL.get(x) == cigarL.get(y)) {
                        ++y;
                    }
                    cigarE.add(new CigarElement(y - x, cigarL.get(x)));
                    x = y;
                }
                /* soft clip */
                if (blastHspAlignment.getQueryTo1() < readContent.length()) {
                    cigarE.add(new CigarElement((readContent.length() - blastHspAlignment.getQueryTo1()), CigarOperator.S));
                }
                /* hard clip */
                if (readContent.length() < iterLength) {
                    cigarE.add(new CigarElement((iterLength - readContent.length()), CigarOperator.H));
                }
                rec.setCigar(cigarE);
                rec.setMappingQuality(40);
                rec.setAlignmentStart(Math.min(blastHspAlignment.getHitFrom1(), blastHspAlignment.getHitTo1()));
                rec.setAttribute("BB", Float.parseFloat(hsp.getHspBitScore()));
                rec.setAttribute("BE", Float.parseFloat(hsp.getHspEvalue()));
                rec.setAttribute("BS", Float.parseFloat(hsp.getHspScore()));
                rec.setAttribute("NM", Integer.parseInt(hsp.getHspGaps()));
                rec.setAttribute("RG", rg1.getId());
                // setAlignmentEnd not supported in SAM API
                // rec.setAlignmentEnd(Math.max(blastHspAlignment.getHitFrom1(),blastHspAlignment.getHitTo1()));
                sequenceIteration.records.add(rec);
            }
        }
    }
    if (readContent.length() == 0) {
        readContent.append('N');
    }
    byte[] readBases = readContent.toString().getBytes();
    char[] readQuals = new char[readBases.length];
    for (int i = 0; i < readBases.length; ++i) {
        readQuals[i] = (readBases[i] == 'N' ? '#' : 'J');
    }
    if (sequenceIteration.records.isEmpty()) {
        SAMRecord rec = samRecordFactory.createSAMRecord(header);
        rec.setReadName(stack.get(0).getIterationQueryDef());
        rec.setReadUnmappedFlag(true);
        rec.setAttribute("RG", rg1.getId());
        sequenceIteration.records.add(rec);
    }
    for (SAMRecord rec : sequenceIteration.records) {
        rec.setReadString(new String(readBases));
        rec.setReadBases(readBases);
        rec.setBaseQualityString(new String(readQuals, 0, readQuals.length));
        rec.setBaseQualities(htsjdk.samtools.SAMUtils.fastqToPhred(new String(readQuals, 0, readQuals.length)));
    }
    return sequenceIteration;
}
Also used : Hsp(gov.nih.nlm.ncbi.blast.Hsp) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) BlastHspAlignment(com.github.lindenb.jvarkit.util.bio.blast.BlastHspAlignment) ArrayList(java.util.ArrayList) Iteration(gov.nih.nlm.ncbi.blast.Iteration) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) CigarOperator(htsjdk.samtools.CigarOperator) DefaultSAMRecordFactory(htsjdk.samtools.DefaultSAMRecordFactory) CigarElement(htsjdk.samtools.CigarElement) Hit(gov.nih.nlm.ncbi.blast.Hit) Cigar(htsjdk.samtools.Cigar) SAMRecord(htsjdk.samtools.SAMRecord) DefaultSAMRecordFactory(htsjdk.samtools.DefaultSAMRecordFactory) SAMRecordFactory(htsjdk.samtools.SAMRecordFactory)

Aggregations

BlastHspAlignment (com.github.lindenb.jvarkit.util.bio.blast.BlastHspAlignment)3 Hsp (gov.nih.nlm.ncbi.blast.Hsp)3 Hit (gov.nih.nlm.ncbi.blast.Hit)2 ArrayList (java.util.ArrayList)2 HitHsps (gov.nih.nlm.ncbi.blast.HitHsps)1 Iteration (gov.nih.nlm.ncbi.blast.Iteration)1 Cigar (htsjdk.samtools.Cigar)1 CigarElement (htsjdk.samtools.CigarElement)1 CigarOperator (htsjdk.samtools.CigarOperator)1 DefaultSAMRecordFactory (htsjdk.samtools.DefaultSAMRecordFactory)1 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)1 SAMRecord (htsjdk.samtools.SAMRecord)1 SAMRecordFactory (htsjdk.samtools.SAMRecordFactory)1 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)1