Search in sources :

Example 6 with Hsp

use of gov.nih.nlm.ncbi.blast.Hsp in project jvarkit by lindenb.

the class MergeSplittedBlast method merge.

private Hsp merge(Hsp hh0, Hsp hh1) {
    BlastHspAlignment aln0 = new BlastHspAlignment(hh0);
    BlastHspAlignment aln1 = new BlastHspAlignment(hh1);
    // order hsp on query pos
    if (aln0.getQueryFrom1() > aln1.getQueryFrom1()) {
        Hsp th = hh0;
        hh0 = hh1;
        hh1 = th;
        aln0 = new BlastHspAlignment(hh0);
        aln1 = new BlastHspAlignment(hh1);
    }
    /* not the same strand */
    if (aln0.isPlusPlus() != aln1.isPlusPlus()) {
        // debug("strand");
        return null;
    }
    /* hits overlap */
    if (!overlap(aln0.getHitFrom1(), aln0.getHitTo1(), aln1.getHitFrom1(), aln1.getHitTo1())) {
        // debug("no overlap hit");
        return null;
    }
    /* query overlap */
    if (!overlap(aln0.getQueryFrom1(), aln0.getQueryTo1(), aln1.getQueryFrom1(), aln1.getQueryTo1())) {
        // debug("no overlap query");
        return null;
    }
    // hit1 is contained in hit0
    if (aln0.getQueryFrom1() <= aln1.getQueryFrom1() && aln1.getQueryTo1() <= aln0.getQueryTo1()) {
        // debug("contained");
        return hh0;
    }
    StringBuilder qsb = new StringBuilder();
    StringBuilder msb = new StringBuilder();
    StringBuilder hsb = new StringBuilder();
    int expect_hit = -1;
    int found_hit = -1;
    for (BlastHspAlignment.Align a : aln0) {
        if (a.getQueryIndex1() >= aln1.getQueryFrom1()) {
            // debug("###BREAK###############");
            expect_hit = a.getHitIndex1();
            break;
        }
        qsb.append(a.getQueryChar());
        msb.append(a.getMidChar());
        hsb.append(a.getHitChar());
    }
    if (expect_hit == -1) {
        // debug("HU?");
        return null;
    }
    for (BlastHspAlignment.Align a : aln1) {
        if (a.getQueryIndex1() < aln1.getQueryFrom1())
            continue;
        if (found_hit == -1) {
            found_hit = a.getHitIndex1();
            if (expect_hit != found_hit) {
                LOG.info("Not the expected hit position " + expect_hit + "/" + found_hit);
                return null;
            }
        }
        qsb.append(a.getQueryChar());
        msb.append(a.getMidChar());
        hsb.append(a.getHitChar());
    }
    // info("\n"+qsb+"\n"+msb+"\n"+hsb);
    Hsp newHsp = BlastHspAlignment.cloneHsp(hh0);
    newHsp.setHspAlignLen(String.valueOf(msb.length()));
    newHsp.setHspMidline(msb.toString());
    newHsp.setHspQseq(qsb.toString());
    newHsp.setHspHseq(hsb.toString());
    newHsp.setHspQueryFrom(String.valueOf(Math.min(aln0.getQueryFrom1(), aln1.getQueryFrom1())));
    newHsp.setHspQueryTo(String.valueOf(Math.max(aln0.getQueryTo1(), aln1.getQueryTo1())));
    if (aln0.isPlusPlus()) {
        newHsp.setHspHitFrom(String.valueOf(Math.min(aln0.getHitFrom1(), aln1.getHitFrom1())));
        newHsp.setHspHitTo(String.valueOf(Math.max(aln0.getHitTo1(), aln1.getHitTo1())));
    } else {
        newHsp.setHspHitFrom(String.valueOf(Math.max(aln0.getHitFrom1(), aln1.getHitFrom1())));
        newHsp.setHspHitTo(String.valueOf(Math.min(aln0.getHitTo1(), aln1.getHitTo1())));
    }
    newHsp.setHspGaps(String.valueOf(newHsp.getHspMidline().replaceAll("[^ ]", "").length()));
    newHsp.setHspScore(String.valueOf(newHsp.getHspMidline().replaceAll("[ ]", "").length()));
    // debug("success");
    return newHsp;
}
Also used : Hsp(gov.nih.nlm.ncbi.blast.Hsp) BlastHspAlignment(com.github.lindenb.jvarkit.util.bio.blast.BlastHspAlignment)

Example 7 with Hsp

use of gov.nih.nlm.ncbi.blast.Hsp in project jvarkit by lindenb.

the class BlastToSam method convertIterationToSequenceIteration.

private SequenceIteration convertIterationToSequenceIteration(final List<Iteration> stack, final SAMFileHeader header) throws XMLStreamException, JAXBException {
    final SequenceIteration sequenceIteration = new SequenceIteration();
    if (stack.isEmpty())
        return sequenceIteration;
    final SAMReadGroupRecord rg1 = header.getReadGroup("g1");
    // sequenceIteration.iteration=iter1;
    final SAMRecordFactory samRecordFactory = new DefaultSAMRecordFactory();
    final StringBuilder readContent = new StringBuilder();
    final int iterLength = Integer.parseInt(stack.get(0).getIterationQueryLen());
    for (final Iteration iter1 : stack) {
        for (final Hit hit : iter1.getIterationHits().getHit()) {
            for (final Hsp hsp : hit.getHitHsps().getHsp()) {
                for (final BlastHspAlignment.Align a : new BlastHspAlignment(hsp)) {
                    char c = a.getQueryChar();
                    if (!Character.isLetter(c))
                        continue;
                    final int queryIndex0 = a.getQueryIndex1() - 1;
                    while (readContent.length() <= queryIndex0) readContent.append('N');
                    if (readContent.charAt(queryIndex0) == 'N') {
                        readContent.setCharAt(queryIndex0, c);
                    } else if (readContent.charAt(queryIndex0) != c) {
                        throw new IllegalStateException("Expected character '" + readContent.charAt(queryIndex0) + "' but got '" + c + "' at " + queryIndex0 + "\n" + hsp.getHspQseq() + "\n" + hsp.getHspMidline() + "\n" + hsp.getHspHseq() + "\n" + readContent + "\n");
                    }
                }
            }
        }
    }
    for (Iteration iter1 : stack) {
        for (Hit hit : iter1.getIterationHits().getHit()) {
            for (Hsp hsp : hit.getHitHsps().getHsp()) {
                SAMRecord rec = samRecordFactory.createSAMRecord(header);
                rec.setReadUnmappedFlag(false);
                rec.setReadName(iter1.getIterationQueryDef());
                if (hit.getHitAccession() != null && !hit.getHitAccession().trim().isEmpty() && this.dictionary.getSequence(hit.getHitAccession()) != null) {
                    rec.setReferenceName(hit.getHitAccession());
                } else {
                    rec.setReferenceName(hit.getHitDef());
                }
                final SAMSequenceRecord ssr = this.dictionary.getSequence(hit.getHitDef());
                if (ssr == null) {
                    LOG.warn("Hit is not in SAMDictionary " + hit.getHitDef());
                    rec.setReferenceIndex(-1);
                } else {
                    rec.setReferenceIndex(ssr.getSequenceIndex());
                }
                final BlastHspAlignment blastHspAlignment = new BlastHspAlignment(hsp);
                rec.setReadNegativeStrandFlag(blastHspAlignment.isPlusMinus());
                final List<CigarOperator> cigarL = new ArrayList<CigarOperator>();
                for (BlastHspAlignment.Align a : blastHspAlignment) {
                    // System.err.println("##"+a);
                    if (a.getMidChar() == '|') {
                        cigarL.add(CigarOperator.EQ);
                    } else if (a.getMidChar() == ':') {
                        cigarL.add(CigarOperator.M);
                    } else if (a.getHitChar() == '-') {
                        cigarL.add(CigarOperator.I);
                    } else if (a.getQueryChar() == '-') {
                        cigarL.add(CigarOperator.D);
                    } else {
                        cigarL.add(CigarOperator.X);
                    }
                }
                if (cigarL.size() != hsp.getHspMidline().length()) {
                    throw new IllegalStateException("Boumm");
                }
                Cigar cigarE = new Cigar();
                if (blastHspAlignment.getQueryFrom1() > 1) {
                    cigarE.add(new CigarElement(blastHspAlignment.getQueryFrom1() - 1, CigarOperator.S));
                }
                int x = 0;
                while (x < cigarL.size()) {
                    int y = x + 1;
                    while (y < cigarL.size() && cigarL.get(x) == cigarL.get(y)) {
                        ++y;
                    }
                    cigarE.add(new CigarElement(y - x, cigarL.get(x)));
                    x = y;
                }
                /* soft clip */
                if (blastHspAlignment.getQueryTo1() < readContent.length()) {
                    cigarE.add(new CigarElement((readContent.length() - blastHspAlignment.getQueryTo1()), CigarOperator.S));
                }
                /* hard clip */
                if (readContent.length() < iterLength) {
                    cigarE.add(new CigarElement((iterLength - readContent.length()), CigarOperator.H));
                }
                rec.setCigar(cigarE);
                rec.setMappingQuality(40);
                rec.setAlignmentStart(Math.min(blastHspAlignment.getHitFrom1(), blastHspAlignment.getHitTo1()));
                rec.setAttribute("BB", Float.parseFloat(hsp.getHspBitScore()));
                rec.setAttribute("BE", Float.parseFloat(hsp.getHspEvalue()));
                rec.setAttribute("BS", Float.parseFloat(hsp.getHspScore()));
                rec.setAttribute("NM", Integer.parseInt(hsp.getHspGaps()));
                rec.setAttribute("RG", rg1.getId());
                // setAlignmentEnd not supported in SAM API
                // rec.setAlignmentEnd(Math.max(blastHspAlignment.getHitFrom1(),blastHspAlignment.getHitTo1()));
                sequenceIteration.records.add(rec);
            }
        }
    }
    if (readContent.length() == 0) {
        readContent.append('N');
    }
    byte[] readBases = readContent.toString().getBytes();
    char[] readQuals = new char[readBases.length];
    for (int i = 0; i < readBases.length; ++i) {
        readQuals[i] = (readBases[i] == 'N' ? '#' : 'J');
    }
    if (sequenceIteration.records.isEmpty()) {
        SAMRecord rec = samRecordFactory.createSAMRecord(header);
        rec.setReadName(stack.get(0).getIterationQueryDef());
        rec.setReadUnmappedFlag(true);
        rec.setAttribute("RG", rg1.getId());
        sequenceIteration.records.add(rec);
    }
    for (SAMRecord rec : sequenceIteration.records) {
        rec.setReadString(new String(readBases));
        rec.setReadBases(readBases);
        rec.setBaseQualityString(new String(readQuals, 0, readQuals.length));
        rec.setBaseQualities(htsjdk.samtools.SAMUtils.fastqToPhred(new String(readQuals, 0, readQuals.length)));
    }
    return sequenceIteration;
}
Also used : Hsp(gov.nih.nlm.ncbi.blast.Hsp) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) BlastHspAlignment(com.github.lindenb.jvarkit.util.bio.blast.BlastHspAlignment) ArrayList(java.util.ArrayList) Iteration(gov.nih.nlm.ncbi.blast.Iteration) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) CigarOperator(htsjdk.samtools.CigarOperator) DefaultSAMRecordFactory(htsjdk.samtools.DefaultSAMRecordFactory) CigarElement(htsjdk.samtools.CigarElement) Hit(gov.nih.nlm.ncbi.blast.Hit) Cigar(htsjdk.samtools.Cigar) SAMRecord(htsjdk.samtools.SAMRecord) DefaultSAMRecordFactory(htsjdk.samtools.DefaultSAMRecordFactory) SAMRecordFactory(htsjdk.samtools.SAMRecordFactory)

Example 8 with Hsp

use of gov.nih.nlm.ncbi.blast.Hsp in project jvarkit by lindenb.

the class BlastMapAnnotations method printGB.

private void printGB(GBSet gbSet) {
    for (GBSeq gbSeq : gbSet.getGBSeq()) {
        BlastOutputIterations iterations = this.blastOutput.getBlastOutputIterations();
        for (Iteration iteration : iterations.getIteration()) {
            for (GBFeature feature : gbSeq.getGBSeqFeatureTable().getGBFeature()) {
                if (feature.getGBFeatureIntervals() == null)
                    continue;
                if (!acceptfeature(feature.getGBFeatureKey()))
                    continue;
                for (GBInterval interval : feature.getGBFeatureIntervals().getGBInterval()) {
                    for (Hit hit : iteration.getIterationHits().getHit()) {
                        for (Hsp hsp : hit.getHitHsps().getHsp()) {
                            GenbankInterval bi = new GenbankInterval();
                            bi.gbSeq = gbSeq;
                            bi.gbFeature = feature;
                            bi.gbInterval = interval;
                            bi.hit = hit;
                            bi.hsp = hsp;
                            LOG.debug("interval " + bi);
                            if (!bi.isGbForward())
                                LOG.info("CHECK INTERVAL REVERSE");
                            if (!bi.isFeatureOverlapHsp())
                                continue;
                            stdout().println(bi.toBedString());
                        }
                    }
                }
            }
            break;
        }
    }
// System.err.println("OK");
}
Also used : BlastOutputIterations(gov.nih.nlm.ncbi.blast.BlastOutputIterations) Hit(gov.nih.nlm.ncbi.blast.Hit) GBSeq(gov.nih.nlm.ncbi.gb.GBSeq) Hsp(gov.nih.nlm.ncbi.blast.Hsp) GBFeature(gov.nih.nlm.ncbi.gb.GBFeature) Iteration(gov.nih.nlm.ncbi.blast.Iteration) GBInterval(gov.nih.nlm.ncbi.gb.GBInterval)

Aggregations

Hsp (gov.nih.nlm.ncbi.blast.Hsp)8 Hit (gov.nih.nlm.ncbi.blast.Hit)6 Iteration (gov.nih.nlm.ncbi.blast.Iteration)4 BlastHspAlignment (com.github.lindenb.jvarkit.util.bio.blast.BlastHspAlignment)3 BlastOutputIterations (gov.nih.nlm.ncbi.blast.BlastOutputIterations)2 ArrayList (java.util.ArrayList)2 HitHsps (gov.nih.nlm.ncbi.blast.HitHsps)1 GBFeature (gov.nih.nlm.ncbi.gb.GBFeature)1 GBInterval (gov.nih.nlm.ncbi.gb.GBInterval)1 GBSeq (gov.nih.nlm.ncbi.gb.GBSeq)1 INSDFeature (gov.nih.nlm.ncbi.insdseq.INSDFeature)1 Cigar (htsjdk.samtools.Cigar)1 CigarElement (htsjdk.samtools.CigarElement)1 CigarOperator (htsjdk.samtools.CigarOperator)1 DefaultSAMRecordFactory (htsjdk.samtools.DefaultSAMRecordFactory)1 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)1 SAMRecord (htsjdk.samtools.SAMRecord)1 SAMRecordFactory (htsjdk.samtools.SAMRecordFactory)1 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)1 QName (javax.xml.namespace.QName)1