Search in sources :

Example 1 with Hit

use of gov.nih.nlm.ncbi.blast.Hit in project jvarkit by lindenb.

the class BlastNToSnp method run.

private void run(PrintWriter pw, XMLEventReader r) throws XMLStreamException, JAXBException {
    long numIterations = 0L;
    long numPerfectMath = 0L;
    long numNoHit = 0L;
    pw.print("#query");
    pw.print('\t');
    pw.print("hit");
    pw.print('\t');
    pw.print(("hit-index"));
    pw.print('\t');
    pw.print("hsp-index");
    pw.print('\t');
    pw.print("query-POS");
    pw.print('\t');
    pw.print("hit-POS");
    pw.print('\t');
    pw.print("STRAND");
    pw.print('\t');
    pw.print("REF(hit)");
    pw.print('\t');
    pw.print("ALT(query)");
    pw.print('\t');
    pw.print("blast.align_length");
    pw.print('\t');
    pw.print("blast.hit.var");
    pw.print('\t');
    pw.print("blast.query.var");
    pw.print('\t');
    pw.print("blast.mid.var");
    pw.println();
    for (; ; ) {
        Iteration iter1 = peekIteration(r);
        if (iter1 == null)
            break;
        ++numIterations;
        if (iter1.getIterationHits().getHit().isEmpty()) {
            LOG.info("No hit found for " + iter1.getIterationQueryDef());
            ++numNoHit;
            continue;
        }
        boolean found_mismatch = false;
        for (int hit_loop = 0; hit_loop < iter1.getIterationHits().getHit().size(); ++hit_loop) {
            final Hit hit = iter1.getIterationHits().getHit().get(hit_loop);
            for (int hsp_loop = 0; hsp_loop < hit.getHitHsps().getHsp().size(); ++hsp_loop) {
                final Hsp hsp = hit.getHitHsps().getHsp().get(hsp_loop);
                String hspQseq = hsp.getHspQseq();
                String hspMid = hsp.getHspMidline();
                String hspHSeq = hsp.getHspHseq();
                int hsp_query_from = Integer.parseInt(hsp.getHspQueryFrom());
                int hsp_query_to = Integer.parseInt(hsp.getHspQueryTo());
                int hsp_hit_from = Integer.parseInt(hsp.getHspHitFrom());
                int hsp_hit_to = Integer.parseInt(hsp.getHspHitTo());
                final int align_length = Integer.parseInt(hsp.getHspAlignLen());
                final int hit_shift = (hsp_hit_from > hsp_hit_to ? -1 : 1);
                int i = 0;
                int query_index = hsp_query_from;
                int hit_index = hsp_hit_from;
                while (i < align_length) {
                    char ch = hspHSeq.charAt(i);
                    char cq = hspQseq.charAt(i);
                    char cm = hspMid.charAt(i);
                    if (cm == '|') {
                        ++i;
                        query_index++;
                        hit_index += hit_shift;
                        continue;
                    }
                    found_mismatch = true;
                    int j = i + 1;
                    for (; ; ) {
                        int k = hspMid.indexOf(' ', j);
                        if (k == -1 || k - j > minGapSize)
                            break;
                        j = k + 1;
                    }
                    String ref = new String(hspHSeq.substring(i, j)).replaceAll("[\\- ]", "");
                    String alt = new String(hspQseq.substring(i, j)).replaceAll("[\\- ]", "");
                    if (hit_shift < 0) {
                        StringBuilder sb = new StringBuilder(alt.length());
                        for (int x = alt.length() - 1; x >= 0; --x) {
                            sb.append(AcidNucleics.complement(alt.charAt(x)));
                        }
                        alt = sb.toString();
                        sb = new StringBuilder(ref.length());
                        for (int x = ref.length() - 1; x >= 0; --x) {
                            sb.append(AcidNucleics.complement(ref.charAt(x)));
                        }
                        ref = sb.toString();
                    }
                    pw.print(iter1.getIterationQueryDef());
                    pw.print('\t');
                    pw.print(hit.getHitDef());
                    pw.print('\t');
                    pw.print((1 + hit_loop));
                    pw.print('\t');
                    pw.print((1 + hsp_loop));
                    pw.print('\t');
                    pw.print(query_index);
                    pw.print('\t');
                    pw.print(hit_index);
                    pw.print('\t');
                    pw.print(hit_shift == 1 ? '+' : '-');
                    pw.print('\t');
                    pw.print(ref);
                    pw.print('\t');
                    pw.print(alt);
                    pw.print('\t');
                    pw.print(align_length);
                    pw.print('\t');
                    pw.print(hspHSeq.substring(i, j).replace(' ', '.'));
                    pw.print('\t');
                    pw.print(hspQseq.substring(i, j).replace(' ', '.'));
                    pw.print('\t');
                    pw.print(hspMid.substring(i, j).replace(' ', '.'));
                    pw.println();
                    while (i < j) {
                        ch = hspHSeq.charAt(i);
                        cq = hspQseq.charAt(i);
                        if (ch != '-' && ch != ' ') {
                            hit_index += hit_shift;
                        }
                        if (cq != '-' && cq != ' ') {
                            query_index++;
                        }
                        ++i;
                    }
                }
                if (hit_index - hit_shift != hsp_hit_to) {
                    marshaller.marshal(new JAXBElement<Hsp>(new QName("Hsp"), Hsp.class, hsp), stderr());
                    throw new IllegalStateException("Error expected hit-index= " + hit_index + "-" + hit_shift + " == hsp-hit-to=" + hsp_hit_to);
                }
                if (query_index - 1 != hsp_query_to) {
                    marshaller.marshal(new JAXBElement<Hit>(new QName("Hit"), Hit.class, hit), stderr());
                    throw new IllegalStateException("query_index " + query_index + "(1 != hsp_query_to:" + hsp_query_to);
                }
                if (pw.checkError())
                    break;
            }
        }
        if (!found_mismatch) {
            numPerfectMath++;
        }
    }
    // end while read
    LOG.info("ITERATIONS : " + numIterations);
    LOG.info("ONLY_PERFECT_MATCH : " + numPerfectMath);
    LOG.info("NO_HIT : " + numNoHit);
}
Also used : Hsp(gov.nih.nlm.ncbi.blast.Hsp) QName(javax.xml.namespace.QName) Iteration(gov.nih.nlm.ncbi.blast.Iteration) Hit(gov.nih.nlm.ncbi.blast.Hit)

Example 2 with Hit

use of gov.nih.nlm.ncbi.blast.Hit in project jvarkit by lindenb.

the class MergeSplittedBlast method merge.

private Iteration merge(Iteration iteration) {
    if (iteration.getIterationHits().getHit().size() <= 1)
        return iteration;
    Map<String, List<Hit>> chrom2hits = new LinkedHashMap<String, List<Hit>>();
    for (Hit hit : iteration.getIterationHits().getHit()) {
        Split s = parse(hit);
        if (s == null)
            return iteration;
        List<Hit> L = chrom2hits.get(s.chrom);
        if (L == null) {
            L = new ArrayList<Hit>();
            chrom2hits.put(s.chrom, L);
        }
        L.add(hit);
    }
    Iteration newiteration = new Iteration();
    List<Hit> newHits = new ArrayList<Hit>();
    for (String chrom : chrom2hits.keySet()) {
        List<Hit> L = chrom2hits.get(chrom);
        Hit newHit = merge(L);
        newHits.add(newHit);
    }
    newiteration.setIterationIterNum(iteration.getIterationIterNum());
    newiteration.setIterationQueryID(iteration.getIterationQueryID());
    newiteration.setIterationQueryLen(iteration.getIterationQueryLen());
    newiteration.setIterationQueryDef(iteration.getIterationQueryDef());
    newiteration.setIterationMessage(iteration.getIterationMessage());
    newiteration.setIterationHits(new IterationHits());
    newiteration.getIterationHits().getHit().addAll(newHits);
    return newiteration;
}
Also used : Hit(gov.nih.nlm.ncbi.blast.Hit) IterationHits(gov.nih.nlm.ncbi.blast.IterationHits) ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) List(java.util.List) Iteration(gov.nih.nlm.ncbi.blast.Iteration) LinkedHashMap(java.util.LinkedHashMap)

Example 3 with Hit

use of gov.nih.nlm.ncbi.blast.Hit in project jvarkit by lindenb.

the class BlastMapAnnotations method printUniprot.

private void printUniprot(Uniprot uniprotSet) {
    if (uniprotSet.getEntry().isEmpty()) {
        LOG.warn("empty uniprot entry.");
        return;
    }
    if (uniprotSet.getEntry().size() > 1) {
        LOG.warn("entry contains more than one sequence.");
    }
    for (Entry entry : uniprotSet.getEntry()) {
        BlastOutputIterations iterations = this.blastOutput.getBlastOutputIterations();
        for (Iteration iteration : iterations.getIteration()) {
            for (FeatureType feature : entry.getFeature()) {
                if (!acceptfeature(feature.getType()))
                    continue;
                for (Hit hit : iteration.getIterationHits().getHit()) {
                    for (Hsp hsp : hit.getHitHsps().getHsp()) {
                        UniprotInterval bi = new UniprotInterval();
                        bi.entry = entry;
                        bi.featureType = feature;
                        bi.hit = hit;
                        bi.hsp = hsp;
                        LOG.debug("interval " + bi);
                        if (!bi.isFeatureOverlapHsp()) {
                            continue;
                        }
                        stdout().println(bi.toBedString());
                    }
                }
            }
            break;
        }
        break;
    }
// System.err.println("OK");
}
Also used : BlastOutputIterations(gov.nih.nlm.ncbi.blast.BlastOutputIterations) FeatureType(org.uniprot.FeatureType) Entry(org.uniprot.Entry) Hit(gov.nih.nlm.ncbi.blast.Hit) Hsp(gov.nih.nlm.ncbi.blast.Hsp) Iteration(gov.nih.nlm.ncbi.blast.Iteration)

Example 4 with Hit

use of gov.nih.nlm.ncbi.blast.Hit in project jvarkit by lindenb.

the class Biostar160470 method parseBlast.

/**
 * parses BLAST output
 */
private void parseBlast(final XMLEventReader r) throws Exception {
    final PrintStream out = super.openFileOrStdoutAsPrintStream(this.outputFile);
    final XMLOutputFactory xof = XMLOutputFactory.newFactory();
    final XMLEventWriter w = xof.createXMLEventWriter(out, "UTF-8");
    while (r.hasNext()) {
        final XMLEvent evt = r.peek();
        if (evt.isStartElement() && evt.asStartElement().getName().getLocalPart().equals("BlastOutput_program")) {
            // consumme
            w.add(r.nextEvent());
            final String BlastOutput_program = r.getElementText();
            if (!"tblastn".equals(BlastOutput_program)) {
                throw new IOException("only tblastn is supported but got : " + BlastOutput_program);
            }
        } else if (evt.isStartElement() && evt.asStartElement().getName().getLocalPart().equals("Hit")) {
            final Hit hit = this.unmarshaller.unmarshal(r, Hit.class).getValue();
            parseHit(out, hit);
        } else {
            // consumme
            w.add(r.nextEvent());
        }
    }
    w.flush();
    w.close();
    out.flush();
    out.close();
}
Also used : PrintStream(java.io.PrintStream) XMLOutputFactory(javax.xml.stream.XMLOutputFactory) Hit(gov.nih.nlm.ncbi.blast.Hit) XMLEventWriter(javax.xml.stream.XMLEventWriter) XMLEvent(javax.xml.stream.events.XMLEvent) IOException(java.io.IOException)

Example 5 with Hit

use of gov.nih.nlm.ncbi.blast.Hit in project jvarkit by lindenb.

the class Biostar3654 method parseIteration.

private void parseIteration(String database, Iteration iteration) throws Exception {
    this.pw.println("QUERY: " + iteration.getIterationQueryDef());
    this.pw.println("       ID:" + iteration.getIterationQueryID() + " Len:" + iteration.getIterationQueryLen());
    for (Hit hit : iteration.getIterationHits().getHit()) {
        this.pw.println(">" + hit.getHitDef());
        this.pw.println(" " + hit.getHitAccession());
        this.pw.println(" id:" + hit.getHitId() + " len:" + hit.getHitLen());
        for (Hsp hsp : hit.getHitHsps().getHsp()) {
            List<INSDFeature> qFeatures = fetchAnnotations(database, iteration.getIterationQueryID(), Integer.parseInt(hsp.getHspQueryFrom()), Integer.parseInt(hsp.getHspQueryTo()));
            List<INSDFeature> hFeatures = fetchAnnotations(database, hit.getHitAccession(), Integer.parseInt(hsp.getHspHitFrom()), Integer.parseInt(hsp.getHspHitTo()));
            this.pw.println();
            this.pw.println("   e-value:" + hsp.getHspEvalue() + " gap:" + hsp.getHspGaps() + " bitScore:" + hsp.getHspBitScore());
            this.pw.println();
            // create the Printer for the Query and the Hit
            QPrinter qPrinter = new QPrinter(hsp, qFeatures);
            HPrinter hPrinter = new HPrinter(hsp, hFeatures);
            // loop over the lines
            while (qPrinter.next() && hPrinter.next()) {
                qPrinter.print();
                this.pw.printf("QUERY %0" + margin + "d ", qPrinter.seqStart);
                this.pw.print(hsp.getHspQseq().substring(qPrinter.stringStart, qPrinter.stringEnd));
                this.pw.printf(" %0" + margin + "d", qPrinter.seqEnd - (qPrinter.sign));
                this.pw.println();
                this.pw.printf("      %" + margin + "s ", "");
                this.pw.print(hsp.getHspMidline().substring(qPrinter.stringStart, qPrinter.stringEnd));
                this.pw.println();
                this.pw.printf("HIT   %0" + margin + "d ", hPrinter.seqStart);
                this.pw.print(hsp.getHspHseq().substring(hPrinter.stringStart, hPrinter.stringEnd));
                this.pw.printf(" %0" + margin + "d", hPrinter.seqEnd - (hPrinter.sign));
                this.pw.println();
                hPrinter.print();
                this.pw.println();
            }
            this.pw.flush();
        }
    }
// System.err.println("OK");
}
Also used : Hit(gov.nih.nlm.ncbi.blast.Hit) INSDFeature(gov.nih.nlm.ncbi.insdseq.INSDFeature) Hsp(gov.nih.nlm.ncbi.blast.Hsp)

Aggregations

Hit (gov.nih.nlm.ncbi.blast.Hit)9 Hsp (gov.nih.nlm.ncbi.blast.Hsp)6 Iteration (gov.nih.nlm.ncbi.blast.Iteration)5 ArrayList (java.util.ArrayList)3 BlastHspAlignment (com.github.lindenb.jvarkit.util.bio.blast.BlastHspAlignment)2 BlastOutputIterations (gov.nih.nlm.ncbi.blast.BlastOutputIterations)2 XMLEventWriter (javax.xml.stream.XMLEventWriter)2 XMLOutputFactory (javax.xml.stream.XMLOutputFactory)2 XMLEvent (javax.xml.stream.events.XMLEvent)2 HitHsps (gov.nih.nlm.ncbi.blast.HitHsps)1 IterationHits (gov.nih.nlm.ncbi.blast.IterationHits)1 GBFeature (gov.nih.nlm.ncbi.gb.GBFeature)1 GBInterval (gov.nih.nlm.ncbi.gb.GBInterval)1 GBSeq (gov.nih.nlm.ncbi.gb.GBSeq)1 INSDFeature (gov.nih.nlm.ncbi.insdseq.INSDFeature)1 Cigar (htsjdk.samtools.Cigar)1 CigarElement (htsjdk.samtools.CigarElement)1 CigarOperator (htsjdk.samtools.CigarOperator)1 DefaultSAMRecordFactory (htsjdk.samtools.DefaultSAMRecordFactory)1 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)1