Search in sources :

Example 1 with Hsp

use of gov.nih.nlm.ncbi.blast.Hsp in project jvarkit by lindenb.

the class BlastNToSnp method run.

private void run(PrintWriter pw, XMLEventReader r) throws XMLStreamException, JAXBException {
    long numIterations = 0L;
    long numPerfectMath = 0L;
    long numNoHit = 0L;
    pw.print("#query");
    pw.print('\t');
    pw.print("hit");
    pw.print('\t');
    pw.print(("hit-index"));
    pw.print('\t');
    pw.print("hsp-index");
    pw.print('\t');
    pw.print("query-POS");
    pw.print('\t');
    pw.print("hit-POS");
    pw.print('\t');
    pw.print("STRAND");
    pw.print('\t');
    pw.print("REF(hit)");
    pw.print('\t');
    pw.print("ALT(query)");
    pw.print('\t');
    pw.print("blast.align_length");
    pw.print('\t');
    pw.print("blast.hit.var");
    pw.print('\t');
    pw.print("blast.query.var");
    pw.print('\t');
    pw.print("blast.mid.var");
    pw.println();
    for (; ; ) {
        Iteration iter1 = peekIteration(r);
        if (iter1 == null)
            break;
        ++numIterations;
        if (iter1.getIterationHits().getHit().isEmpty()) {
            LOG.info("No hit found for " + iter1.getIterationQueryDef());
            ++numNoHit;
            continue;
        }
        boolean found_mismatch = false;
        for (int hit_loop = 0; hit_loop < iter1.getIterationHits().getHit().size(); ++hit_loop) {
            final Hit hit = iter1.getIterationHits().getHit().get(hit_loop);
            for (int hsp_loop = 0; hsp_loop < hit.getHitHsps().getHsp().size(); ++hsp_loop) {
                final Hsp hsp = hit.getHitHsps().getHsp().get(hsp_loop);
                String hspQseq = hsp.getHspQseq();
                String hspMid = hsp.getHspMidline();
                String hspHSeq = hsp.getHspHseq();
                int hsp_query_from = Integer.parseInt(hsp.getHspQueryFrom());
                int hsp_query_to = Integer.parseInt(hsp.getHspQueryTo());
                int hsp_hit_from = Integer.parseInt(hsp.getHspHitFrom());
                int hsp_hit_to = Integer.parseInt(hsp.getHspHitTo());
                final int align_length = Integer.parseInt(hsp.getHspAlignLen());
                final int hit_shift = (hsp_hit_from > hsp_hit_to ? -1 : 1);
                int i = 0;
                int query_index = hsp_query_from;
                int hit_index = hsp_hit_from;
                while (i < align_length) {
                    char ch = hspHSeq.charAt(i);
                    char cq = hspQseq.charAt(i);
                    char cm = hspMid.charAt(i);
                    if (cm == '|') {
                        ++i;
                        query_index++;
                        hit_index += hit_shift;
                        continue;
                    }
                    found_mismatch = true;
                    int j = i + 1;
                    for (; ; ) {
                        int k = hspMid.indexOf(' ', j);
                        if (k == -1 || k - j > minGapSize)
                            break;
                        j = k + 1;
                    }
                    String ref = new String(hspHSeq.substring(i, j)).replaceAll("[\\- ]", "");
                    String alt = new String(hspQseq.substring(i, j)).replaceAll("[\\- ]", "");
                    if (hit_shift < 0) {
                        StringBuilder sb = new StringBuilder(alt.length());
                        for (int x = alt.length() - 1; x >= 0; --x) {
                            sb.append(AcidNucleics.complement(alt.charAt(x)));
                        }
                        alt = sb.toString();
                        sb = new StringBuilder(ref.length());
                        for (int x = ref.length() - 1; x >= 0; --x) {
                            sb.append(AcidNucleics.complement(ref.charAt(x)));
                        }
                        ref = sb.toString();
                    }
                    pw.print(iter1.getIterationQueryDef());
                    pw.print('\t');
                    pw.print(hit.getHitDef());
                    pw.print('\t');
                    pw.print((1 + hit_loop));
                    pw.print('\t');
                    pw.print((1 + hsp_loop));
                    pw.print('\t');
                    pw.print(query_index);
                    pw.print('\t');
                    pw.print(hit_index);
                    pw.print('\t');
                    pw.print(hit_shift == 1 ? '+' : '-');
                    pw.print('\t');
                    pw.print(ref);
                    pw.print('\t');
                    pw.print(alt);
                    pw.print('\t');
                    pw.print(align_length);
                    pw.print('\t');
                    pw.print(hspHSeq.substring(i, j).replace(' ', '.'));
                    pw.print('\t');
                    pw.print(hspQseq.substring(i, j).replace(' ', '.'));
                    pw.print('\t');
                    pw.print(hspMid.substring(i, j).replace(' ', '.'));
                    pw.println();
                    while (i < j) {
                        ch = hspHSeq.charAt(i);
                        cq = hspQseq.charAt(i);
                        if (ch != '-' && ch != ' ') {
                            hit_index += hit_shift;
                        }
                        if (cq != '-' && cq != ' ') {
                            query_index++;
                        }
                        ++i;
                    }
                }
                if (hit_index - hit_shift != hsp_hit_to) {
                    marshaller.marshal(new JAXBElement<Hsp>(new QName("Hsp"), Hsp.class, hsp), stderr());
                    throw new IllegalStateException("Error expected hit-index= " + hit_index + "-" + hit_shift + " == hsp-hit-to=" + hsp_hit_to);
                }
                if (query_index - 1 != hsp_query_to) {
                    marshaller.marshal(new JAXBElement<Hit>(new QName("Hit"), Hit.class, hit), stderr());
                    throw new IllegalStateException("query_index " + query_index + "(1 != hsp_query_to:" + hsp_query_to);
                }
                if (pw.checkError())
                    break;
            }
        }
        if (!found_mismatch) {
            numPerfectMath++;
        }
    }
    // end while read
    LOG.info("ITERATIONS : " + numIterations);
    LOG.info("ONLY_PERFECT_MATCH : " + numPerfectMath);
    LOG.info("NO_HIT : " + numNoHit);
}
Also used : Hsp(gov.nih.nlm.ncbi.blast.Hsp) QName(javax.xml.namespace.QName) Iteration(gov.nih.nlm.ncbi.blast.Iteration) Hit(gov.nih.nlm.ncbi.blast.Hit)

Example 2 with Hsp

use of gov.nih.nlm.ncbi.blast.Hsp in project jvarkit by lindenb.

the class BlastMapAnnotations method printUniprot.

private void printUniprot(Uniprot uniprotSet) {
    if (uniprotSet.getEntry().isEmpty()) {
        LOG.warn("empty uniprot entry.");
        return;
    }
    if (uniprotSet.getEntry().size() > 1) {
        LOG.warn("entry contains more than one sequence.");
    }
    for (Entry entry : uniprotSet.getEntry()) {
        BlastOutputIterations iterations = this.blastOutput.getBlastOutputIterations();
        for (Iteration iteration : iterations.getIteration()) {
            for (FeatureType feature : entry.getFeature()) {
                if (!acceptfeature(feature.getType()))
                    continue;
                for (Hit hit : iteration.getIterationHits().getHit()) {
                    for (Hsp hsp : hit.getHitHsps().getHsp()) {
                        UniprotInterval bi = new UniprotInterval();
                        bi.entry = entry;
                        bi.featureType = feature;
                        bi.hit = hit;
                        bi.hsp = hsp;
                        LOG.debug("interval " + bi);
                        if (!bi.isFeatureOverlapHsp()) {
                            continue;
                        }
                        stdout().println(bi.toBedString());
                    }
                }
            }
            break;
        }
        break;
    }
// System.err.println("OK");
}
Also used : BlastOutputIterations(gov.nih.nlm.ncbi.blast.BlastOutputIterations) FeatureType(org.uniprot.FeatureType) Entry(org.uniprot.Entry) Hit(gov.nih.nlm.ncbi.blast.Hit) Hsp(gov.nih.nlm.ncbi.blast.Hsp) Iteration(gov.nih.nlm.ncbi.blast.Iteration)

Example 3 with Hsp

use of gov.nih.nlm.ncbi.blast.Hsp in project jvarkit by lindenb.

the class BlastHspAlignment method cloneHsp.

public static Hsp cloneHsp(Hsp hsp) {
    Hsp h2 = new Hsp();
    h2.setHspNum(hsp.getHspNum());
    h2.setHspBitScore(hsp.getHspBitScore());
    h2.setHspScore(hsp.getHspScore());
    h2.setHspEvalue(hsp.getHspEvalue());
    h2.setHspIdentity(hsp.getHspIdentity());
    h2.setHspGaps(hsp.getHspGaps());
    h2.setHspPositive(hsp.getHspPositive());
    h2.setHspAlignLen(hsp.getHspAlignLen());
    h2.setHspQueryFrom(hsp.getHspQueryFrom());
    h2.setHspQueryTo(hsp.getHspQueryTo());
    h2.setHspQueryFrame(hsp.getHspQueryFrame());
    h2.setHspHitFrom(hsp.getHspHitFrom());
    h2.setHspHitTo(hsp.getHspHitTo());
    h2.setHspHitFrame(hsp.getHspHitFrame());
    h2.setHspQseq(hsp.getHspQseq());
    h2.setHspHseq(hsp.getHspHseq());
    h2.setHspMidline(hsp.getHspMidline());
    h2.setHspPatternFrom(hsp.getHspPatternFrom());
    h2.setHspPatternTo(hsp.getHspPatternTo());
    return h2;
}
Also used : Hsp(gov.nih.nlm.ncbi.blast.Hsp)

Example 4 with Hsp

use of gov.nih.nlm.ncbi.blast.Hsp in project jvarkit by lindenb.

the class Biostar3654 method parseIteration.

private void parseIteration(String database, Iteration iteration) throws Exception {
    this.pw.println("QUERY: " + iteration.getIterationQueryDef());
    this.pw.println("       ID:" + iteration.getIterationQueryID() + " Len:" + iteration.getIterationQueryLen());
    for (Hit hit : iteration.getIterationHits().getHit()) {
        this.pw.println(">" + hit.getHitDef());
        this.pw.println(" " + hit.getHitAccession());
        this.pw.println(" id:" + hit.getHitId() + " len:" + hit.getHitLen());
        for (Hsp hsp : hit.getHitHsps().getHsp()) {
            List<INSDFeature> qFeatures = fetchAnnotations(database, iteration.getIterationQueryID(), Integer.parseInt(hsp.getHspQueryFrom()), Integer.parseInt(hsp.getHspQueryTo()));
            List<INSDFeature> hFeatures = fetchAnnotations(database, hit.getHitAccession(), Integer.parseInt(hsp.getHspHitFrom()), Integer.parseInt(hsp.getHspHitTo()));
            this.pw.println();
            this.pw.println("   e-value:" + hsp.getHspEvalue() + " gap:" + hsp.getHspGaps() + " bitScore:" + hsp.getHspBitScore());
            this.pw.println();
            // create the Printer for the Query and the Hit
            QPrinter qPrinter = new QPrinter(hsp, qFeatures);
            HPrinter hPrinter = new HPrinter(hsp, hFeatures);
            // loop over the lines
            while (qPrinter.next() && hPrinter.next()) {
                qPrinter.print();
                this.pw.printf("QUERY %0" + margin + "d ", qPrinter.seqStart);
                this.pw.print(hsp.getHspQseq().substring(qPrinter.stringStart, qPrinter.stringEnd));
                this.pw.printf(" %0" + margin + "d", qPrinter.seqEnd - (qPrinter.sign));
                this.pw.println();
                this.pw.printf("      %" + margin + "s ", "");
                this.pw.print(hsp.getHspMidline().substring(qPrinter.stringStart, qPrinter.stringEnd));
                this.pw.println();
                this.pw.printf("HIT   %0" + margin + "d ", hPrinter.seqStart);
                this.pw.print(hsp.getHspHseq().substring(hPrinter.stringStart, hPrinter.stringEnd));
                this.pw.printf(" %0" + margin + "d", hPrinter.seqEnd - (hPrinter.sign));
                this.pw.println();
                hPrinter.print();
                this.pw.println();
            }
            this.pw.flush();
        }
    }
// System.err.println("OK");
}
Also used : Hit(gov.nih.nlm.ncbi.blast.Hit) INSDFeature(gov.nih.nlm.ncbi.insdseq.INSDFeature) Hsp(gov.nih.nlm.ncbi.blast.Hsp)

Example 5 with Hsp

use of gov.nih.nlm.ncbi.blast.Hsp in project jvarkit by lindenb.

the class MergeSplittedBlast method merge.

private Hit merge(List<Hit> hits) {
    Hit first = hits.get(0);
    Split firstSplit = parse(first);
    Hit newHit = new Hit();
    newHit.setHitAccession(first.getHitAccession());
    newHit.setHitDef(firstSplit.chrom);
    newHit.setHitId(first.getHitId());
    newHit.setHitNum(first.getHitNum());
    newHit.setHitLen(String.valueOf(firstSplit.len));
    newHit.setHitHsps(new HitHsps());
    List<Hsp> hsps = new ArrayList<Hsp>();
    for (Hit h0 : hits) {
        /* fix hit_from/hit_to */
        Split split = parse(h0);
        List<Hsp> L = h0.getHitHsps().getHsp();
        for (int i = 0; i < L.size(); ++i) {
            Hsp newHsp = BlastHspAlignment.cloneHsp(L.get(i));
            BlastHspAlignment aln = new BlastHspAlignment(newHsp);
            int h_from = aln.getHitFrom1();
            int h_to = aln.getHitTo1();
            h_from += (split.start1 - 1);
            h_to += (split.start1 - 1);
            newHsp.setHspHitFrom(String.valueOf(h_from));
            newHsp.setHspHitTo(String.valueOf(h_to));
            hsps.add(newHsp);
        }
    }
    boolean done = false;
    while (!done) {
        done = true;
        for (int i = 0; i + 1 < hsps.size(); ++i) {
            Hsp hsp0 = hsps.get(i);
            for (int j = i + 1; j < hsps.size(); ++j) {
                // debug("comparing hsp "+i+" vs "+j+" N="+hsps.size());
                Hsp hsp1 = hsps.get(j);
                Hsp newHitHsp = merge(hsp0, hsp1);
                if (newHitHsp != null) {
                    hsps.set(i, newHitHsp);
                    hsps.remove(j);
                    done = false;
                    break;
                }
            }
            if (!done)
                break;
        }
    }
    for (int i = 0; i < hsps.size(); ++i) hsps.get(i).setHspNum(String.valueOf(i + 1));
    newHit.getHitHsps().getHsp().addAll(hsps);
    return newHit;
}
Also used : Hit(gov.nih.nlm.ncbi.blast.Hit) HitHsps(gov.nih.nlm.ncbi.blast.HitHsps) Hsp(gov.nih.nlm.ncbi.blast.Hsp) ArrayList(java.util.ArrayList) BlastHspAlignment(com.github.lindenb.jvarkit.util.bio.blast.BlastHspAlignment)

Aggregations

Hsp (gov.nih.nlm.ncbi.blast.Hsp)8 Hit (gov.nih.nlm.ncbi.blast.Hit)6 Iteration (gov.nih.nlm.ncbi.blast.Iteration)4 BlastHspAlignment (com.github.lindenb.jvarkit.util.bio.blast.BlastHspAlignment)3 BlastOutputIterations (gov.nih.nlm.ncbi.blast.BlastOutputIterations)2 ArrayList (java.util.ArrayList)2 HitHsps (gov.nih.nlm.ncbi.blast.HitHsps)1 GBFeature (gov.nih.nlm.ncbi.gb.GBFeature)1 GBInterval (gov.nih.nlm.ncbi.gb.GBInterval)1 GBSeq (gov.nih.nlm.ncbi.gb.GBSeq)1 INSDFeature (gov.nih.nlm.ncbi.insdseq.INSDFeature)1 Cigar (htsjdk.samtools.Cigar)1 CigarElement (htsjdk.samtools.CigarElement)1 CigarOperator (htsjdk.samtools.CigarOperator)1 DefaultSAMRecordFactory (htsjdk.samtools.DefaultSAMRecordFactory)1 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)1 SAMRecord (htsjdk.samtools.SAMRecord)1 SAMRecordFactory (htsjdk.samtools.SAMRecordFactory)1 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)1 QName (javax.xml.namespace.QName)1