Search in sources :

Example 1 with Iteration

use of gov.nih.nlm.ncbi.blast.Iteration in project jvarkit by lindenb.

the class BlastNToSnp method run.

private void run(PrintWriter pw, XMLEventReader r) throws XMLStreamException, JAXBException {
    long numIterations = 0L;
    long numPerfectMath = 0L;
    long numNoHit = 0L;
    pw.print("#query");
    pw.print('\t');
    pw.print("hit");
    pw.print('\t');
    pw.print(("hit-index"));
    pw.print('\t');
    pw.print("hsp-index");
    pw.print('\t');
    pw.print("query-POS");
    pw.print('\t');
    pw.print("hit-POS");
    pw.print('\t');
    pw.print("STRAND");
    pw.print('\t');
    pw.print("REF(hit)");
    pw.print('\t');
    pw.print("ALT(query)");
    pw.print('\t');
    pw.print("blast.align_length");
    pw.print('\t');
    pw.print("blast.hit.var");
    pw.print('\t');
    pw.print("blast.query.var");
    pw.print('\t');
    pw.print("blast.mid.var");
    pw.println();
    for (; ; ) {
        Iteration iter1 = peekIteration(r);
        if (iter1 == null)
            break;
        ++numIterations;
        if (iter1.getIterationHits().getHit().isEmpty()) {
            LOG.info("No hit found for " + iter1.getIterationQueryDef());
            ++numNoHit;
            continue;
        }
        boolean found_mismatch = false;
        for (int hit_loop = 0; hit_loop < iter1.getIterationHits().getHit().size(); ++hit_loop) {
            final Hit hit = iter1.getIterationHits().getHit().get(hit_loop);
            for (int hsp_loop = 0; hsp_loop < hit.getHitHsps().getHsp().size(); ++hsp_loop) {
                final Hsp hsp = hit.getHitHsps().getHsp().get(hsp_loop);
                String hspQseq = hsp.getHspQseq();
                String hspMid = hsp.getHspMidline();
                String hspHSeq = hsp.getHspHseq();
                int hsp_query_from = Integer.parseInt(hsp.getHspQueryFrom());
                int hsp_query_to = Integer.parseInt(hsp.getHspQueryTo());
                int hsp_hit_from = Integer.parseInt(hsp.getHspHitFrom());
                int hsp_hit_to = Integer.parseInt(hsp.getHspHitTo());
                final int align_length = Integer.parseInt(hsp.getHspAlignLen());
                final int hit_shift = (hsp_hit_from > hsp_hit_to ? -1 : 1);
                int i = 0;
                int query_index = hsp_query_from;
                int hit_index = hsp_hit_from;
                while (i < align_length) {
                    char ch = hspHSeq.charAt(i);
                    char cq = hspQseq.charAt(i);
                    char cm = hspMid.charAt(i);
                    if (cm == '|') {
                        ++i;
                        query_index++;
                        hit_index += hit_shift;
                        continue;
                    }
                    found_mismatch = true;
                    int j = i + 1;
                    for (; ; ) {
                        int k = hspMid.indexOf(' ', j);
                        if (k == -1 || k - j > minGapSize)
                            break;
                        j = k + 1;
                    }
                    String ref = new String(hspHSeq.substring(i, j)).replaceAll("[\\- ]", "");
                    String alt = new String(hspQseq.substring(i, j)).replaceAll("[\\- ]", "");
                    if (hit_shift < 0) {
                        StringBuilder sb = new StringBuilder(alt.length());
                        for (int x = alt.length() - 1; x >= 0; --x) {
                            sb.append(AcidNucleics.complement(alt.charAt(x)));
                        }
                        alt = sb.toString();
                        sb = new StringBuilder(ref.length());
                        for (int x = ref.length() - 1; x >= 0; --x) {
                            sb.append(AcidNucleics.complement(ref.charAt(x)));
                        }
                        ref = sb.toString();
                    }
                    pw.print(iter1.getIterationQueryDef());
                    pw.print('\t');
                    pw.print(hit.getHitDef());
                    pw.print('\t');
                    pw.print((1 + hit_loop));
                    pw.print('\t');
                    pw.print((1 + hsp_loop));
                    pw.print('\t');
                    pw.print(query_index);
                    pw.print('\t');
                    pw.print(hit_index);
                    pw.print('\t');
                    pw.print(hit_shift == 1 ? '+' : '-');
                    pw.print('\t');
                    pw.print(ref);
                    pw.print('\t');
                    pw.print(alt);
                    pw.print('\t');
                    pw.print(align_length);
                    pw.print('\t');
                    pw.print(hspHSeq.substring(i, j).replace(' ', '.'));
                    pw.print('\t');
                    pw.print(hspQseq.substring(i, j).replace(' ', '.'));
                    pw.print('\t');
                    pw.print(hspMid.substring(i, j).replace(' ', '.'));
                    pw.println();
                    while (i < j) {
                        ch = hspHSeq.charAt(i);
                        cq = hspQseq.charAt(i);
                        if (ch != '-' && ch != ' ') {
                            hit_index += hit_shift;
                        }
                        if (cq != '-' && cq != ' ') {
                            query_index++;
                        }
                        ++i;
                    }
                }
                if (hit_index - hit_shift != hsp_hit_to) {
                    marshaller.marshal(new JAXBElement<Hsp>(new QName("Hsp"), Hsp.class, hsp), stderr());
                    throw new IllegalStateException("Error expected hit-index= " + hit_index + "-" + hit_shift + " == hsp-hit-to=" + hsp_hit_to);
                }
                if (query_index - 1 != hsp_query_to) {
                    marshaller.marshal(new JAXBElement<Hit>(new QName("Hit"), Hit.class, hit), stderr());
                    throw new IllegalStateException("query_index " + query_index + "(1 != hsp_query_to:" + hsp_query_to);
                }
                if (pw.checkError())
                    break;
            }
        }
        if (!found_mismatch) {
            numPerfectMath++;
        }
    }
    // end while read
    LOG.info("ITERATIONS : " + numIterations);
    LOG.info("ONLY_PERFECT_MATCH : " + numPerfectMath);
    LOG.info("NO_HIT : " + numNoHit);
}
Also used : Hsp(gov.nih.nlm.ncbi.blast.Hsp) QName(javax.xml.namespace.QName) Iteration(gov.nih.nlm.ncbi.blast.Iteration) Hit(gov.nih.nlm.ncbi.blast.Hit)

Example 2 with Iteration

use of gov.nih.nlm.ncbi.blast.Iteration in project jvarkit by lindenb.

the class MergeBlastXml method doWork.

@Override
public int doWork(List<String> args) {
    if (args.isEmpty()) {
        LOG.error("input xml missing");
        return -1;
    }
    XMLEventReader rx = null;
    XMLEventReader rx2 = null;
    XMLEventWriter wx = null;
    SortingCollection<Iteration> sortingCollection = null;
    try {
        JAXBContext jc = JAXBContext.newInstance("gov.nih.nlm.ncbi.blast");
        this.unmarshaller = jc.createUnmarshaller();
        this.marshaller = jc.createMarshaller();
        this.marshaller.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, true);
        this.marshaller.setProperty(Marshaller.JAXB_FRAGMENT, true);
        XMLInputFactory xmlInputFactory = XMLInputFactory.newFactory();
        xmlInputFactory.setProperty(XMLInputFactory.IS_NAMESPACE_AWARE, Boolean.FALSE);
        xmlInputFactory.setProperty(XMLInputFactory.IS_COALESCING, Boolean.TRUE);
        xmlInputFactory.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, Boolean.TRUE);
        xmlInputFactory.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, Boolean.FALSE);
        xmlInputFactory.setXMLResolver(new XMLResolver() {

            @Override
            public Object resolveEntity(String arg0, String arg1, String arg2, String arg3) throws XMLStreamException {
                LOG.info("resolveEntity:" + arg0 + "/" + arg1 + "/" + arg2);
                return null;
            }
        });
        final Comparator<Iteration> hitComparator = (A, B) -> {
            return A.getIterationQueryDef().compareTo(B.getIterationQueryDef());
        };
        sortingCollection = SortingCollection.newInstance(Iteration.class, new BlastIterationCodec(), hitComparator, this.maxRecordsInRam, this.tmpFile.toPath());
        rx = xmlInputFactory.createXMLEventReader(new FileReader(args.get(0)));
        XMLOutputFactory xof = XMLOutputFactory.newFactory();
        if (this.outputFile != null) {
            wx = xof.createXMLEventWriter(new StreamResult(this.outputFile));
        } else {
            wx = xof.createXMLEventWriter(new StreamResult(stdout()));
        }
        boolean in_iteration = false;
        while (rx.hasNext()) {
            final XMLEvent evt = rx.peek();
            if (evt.isStartElement() && evt.asStartElement().getName().getLocalPart().equals("Iteration")) {
                final Iteration iteration = this.unmarshaller.unmarshal(rx, Iteration.class).getValue();
                sortingCollection.add(iteration);
            } else if (evt.isStartElement() && evt.asStartElement().getName().getLocalPart().equals("BlastOutput_iterations")) {
                wx.add(rx.nextEvent());
                in_iteration = true;
            } else if (evt.isEndElement() && evt.asEndElement().getName().getLocalPart().equals("BlastOutput_iterations")) {
                for (int optind = 1; optind < args.size(); ++optind) {
                    LOG.info("opening " + args.get(optind));
                    rx2 = xmlInputFactory.createXMLEventReader(new FileReader(args.get(optind)));
                    while (rx2.hasNext()) {
                        XMLEvent evt2 = rx2.peek();
                        if (evt2.isStartElement() && evt2.asStartElement().getName().getLocalPart().equals("Iteration")) {
                            final Iteration iteration = this.unmarshaller.unmarshal(rx2, Iteration.class).getValue();
                            sortingCollection.add(iteration);
                        } else {
                            rx2.nextEvent();
                        }
                    }
                    rx2.close();
                    LOG.info("close");
                }
                sortingCollection.doneAdding();
                sortingCollection.setDestructiveIteration(true);
                final CloseableIterator<Iteration> coliter = sortingCollection.iterator();
                final EqualRangeIterator<Iteration> eq = new EqualRangeIterator<>(coliter, hitComparator);
                while (coliter.hasNext()) {
                    final List<Iteration> L = eq.next();
                    for (int i = 1; i < L.size(); ++i) {
                        L.get(0).getIterationHits().getHit().addAll(L.get(i).getIterationHits().getHit());
                    }
                    marshaller.marshal(L.get(0), wx);
                }
                eq.close();
                coliter.close();
                sortingCollection.cleanup();
                sortingCollection = null;
                wx.add(rx.nextEvent());
                in_iteration = false;
            } else if (in_iteration) {
                // consumme text
                rx.nextEvent();
            } else {
                wx.add(rx.nextEvent());
            }
        }
        wx.flush();
        wx.close();
        return 0;
    } catch (Exception e) {
        LOG.error(e);
        if (sortingCollection != null) {
            sortingCollection.cleanup();
        }
        return -1;
    } finally {
    }
}
Also used : DataInputStream(java.io.DataInputStream) CloseableIterator(htsjdk.samtools.util.CloseableIterator) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) XMLInputFactory(javax.xml.stream.XMLInputFactory) Marshaller(javax.xml.bind.Marshaller) StreamResult(javax.xml.transform.stream.StreamResult) StreamSource(javax.xml.transform.stream.StreamSource) XMLEventWriter(javax.xml.stream.XMLEventWriter) DataOutputStream(java.io.DataOutputStream) XMLEvent(javax.xml.stream.events.XMLEvent) AbstractDataCodec(com.github.lindenb.jvarkit.util.picard.AbstractDataCodec) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) XMLStreamException(javax.xml.stream.XMLStreamException) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) Iteration(gov.nih.nlm.ncbi.blast.Iteration) XMLResolver(javax.xml.stream.XMLResolver) JAXBContext(javax.xml.bind.JAXBContext) Unmarshaller(javax.xml.bind.Unmarshaller) XMLEventReader(javax.xml.stream.XMLEventReader) SortingCollection(htsjdk.samtools.util.SortingCollection) StringWriter(java.io.StringWriter) Logger(com.github.lindenb.jvarkit.util.log.Logger) IOException(java.io.IOException) JAXBException(javax.xml.bind.JAXBException) File(java.io.File) XMLOutputFactory(javax.xml.stream.XMLOutputFactory) List(java.util.List) StringReader(java.io.StringReader) EqualRangeIterator(com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator) FileReader(java.io.FileReader) Comparator(java.util.Comparator) XMLOutputFactory(javax.xml.stream.XMLOutputFactory) Iteration(gov.nih.nlm.ncbi.blast.Iteration) JAXBContext(javax.xml.bind.JAXBContext) EqualRangeIterator(com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator) XMLEventWriter(javax.xml.stream.XMLEventWriter) XMLEventReader(javax.xml.stream.XMLEventReader) XMLResolver(javax.xml.stream.XMLResolver) FileReader(java.io.FileReader) StreamResult(javax.xml.transform.stream.StreamResult) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) XMLStreamException(javax.xml.stream.XMLStreamException) IOException(java.io.IOException) JAXBException(javax.xml.bind.JAXBException) XMLStreamException(javax.xml.stream.XMLStreamException) XMLEvent(javax.xml.stream.events.XMLEvent) XMLInputFactory(javax.xml.stream.XMLInputFactory)

Example 3 with Iteration

use of gov.nih.nlm.ncbi.blast.Iteration in project jvarkit by lindenb.

the class MergeSplittedBlast method merge.

private Iteration merge(Iteration iteration) {
    if (iteration.getIterationHits().getHit().size() <= 1)
        return iteration;
    Map<String, List<Hit>> chrom2hits = new LinkedHashMap<String, List<Hit>>();
    for (Hit hit : iteration.getIterationHits().getHit()) {
        Split s = parse(hit);
        if (s == null)
            return iteration;
        List<Hit> L = chrom2hits.get(s.chrom);
        if (L == null) {
            L = new ArrayList<Hit>();
            chrom2hits.put(s.chrom, L);
        }
        L.add(hit);
    }
    Iteration newiteration = new Iteration();
    List<Hit> newHits = new ArrayList<Hit>();
    for (String chrom : chrom2hits.keySet()) {
        List<Hit> L = chrom2hits.get(chrom);
        Hit newHit = merge(L);
        newHits.add(newHit);
    }
    newiteration.setIterationIterNum(iteration.getIterationIterNum());
    newiteration.setIterationQueryID(iteration.getIterationQueryID());
    newiteration.setIterationQueryLen(iteration.getIterationQueryLen());
    newiteration.setIterationQueryDef(iteration.getIterationQueryDef());
    newiteration.setIterationMessage(iteration.getIterationMessage());
    newiteration.setIterationHits(new IterationHits());
    newiteration.getIterationHits().getHit().addAll(newHits);
    return newiteration;
}
Also used : Hit(gov.nih.nlm.ncbi.blast.Hit) IterationHits(gov.nih.nlm.ncbi.blast.IterationHits) ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) List(java.util.List) Iteration(gov.nih.nlm.ncbi.blast.Iteration) LinkedHashMap(java.util.LinkedHashMap)

Example 4 with Iteration

use of gov.nih.nlm.ncbi.blast.Iteration in project jvarkit by lindenb.

the class Biostar3654 method parseBlast.

/**
 * parses BLAST output
 */
private void parseBlast(XMLEventReader r) throws Exception {
    String database = "nucleotide";
    while (r.hasNext()) {
        XMLEvent evt = r.peek();
        if (evt.isStartElement() && evt.asStartElement().getName().getLocalPart().equals("BlastOutput_program")) {
            r.next();
            String BlastOutput_program = r.getElementText();
            if ("blastn".equals(BlastOutput_program)) {
                database = "nucleotide";
            } else if ("blastp".equals(BlastOutput_program)) {
                database = "protein";
            } else {
                throw new IOException("only blastn && blastn are supported: " + database);
            }
        } else if (evt.isStartElement() && evt.asStartElement().getName().getLocalPart().equals("Iteration")) {
            Iteration iteration = this.unmarshaller.unmarshal(r, Iteration.class).getValue();
            parseIteration(database, iteration);
        } else {
            // consumme
            r.next();
        }
    }
}
Also used : XMLEvent(javax.xml.stream.events.XMLEvent) Iteration(gov.nih.nlm.ncbi.blast.Iteration) IOException(java.io.IOException)

Example 5 with Iteration

use of gov.nih.nlm.ncbi.blast.Iteration in project jvarkit by lindenb.

the class ReduceBlast method run.

private void run(final XMLEventReader r, final XMLEventWriter w) throws XMLStreamException, JAXBException {
    final XMLEventFactory eventFactory = XMLEventFactory.newFactory();
    boolean prev_was_iteration = false;
    while (r.hasNext()) {
        final XMLEvent evt = r.peek();
        QName qname = null;
        if (evt.isStartElement() && (qname = evt.asStartElement().getName()).getLocalPart().equals("Iteration")) {
            final Iteration iteration = this.unmarshaller.unmarshal(r, Iteration.class).getValue();
            if (!iteration.getIterationHits().getHit().isEmpty()) {
                if (!this.keep_message)
                    iteration.setIterationMessage(null);
                if (!this.keep_stats)
                    iteration.setIterationStat(null);
                this.marshaller.marshal(new JAXBElement<Iteration>(qname, Iteration.class, iteration), w);
                w.add(eventFactory.createCharacters("\n"));
            }
            prev_was_iteration = true;
            continue;
        } else if (prev_was_iteration && evt.isCharacters() && is_empty(evt.asCharacters().getData())) {
            r.nextEvent();
            continue;
        }
        w.add(r.nextEvent());
        prev_was_iteration = false;
    }
}
Also used : XMLEventFactory(javax.xml.stream.XMLEventFactory) QName(javax.xml.namespace.QName) XMLEvent(javax.xml.stream.events.XMLEvent) Iteration(gov.nih.nlm.ncbi.blast.Iteration)

Aggregations

Iteration (gov.nih.nlm.ncbi.blast.Iteration)12 Hit (gov.nih.nlm.ncbi.blast.Hit)5 XMLEvent (javax.xml.stream.events.XMLEvent)5 Hsp (gov.nih.nlm.ncbi.blast.Hsp)4 ArrayList (java.util.ArrayList)4 XMLStreamException (javax.xml.stream.XMLStreamException)3 BlastOutputIterations (gov.nih.nlm.ncbi.blast.BlastOutputIterations)2 IOException (java.io.IOException)2 List (java.util.List)2 QName (javax.xml.namespace.QName)2 Parameter (com.beust.jcommander.Parameter)1 IOUtils (com.github.lindenb.jvarkit.io.IOUtils)1 BlastHspAlignment (com.github.lindenb.jvarkit.util.bio.blast.BlastHspAlignment)1 EqualRangeIterator (com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator)1 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)1 Program (com.github.lindenb.jvarkit.util.jcommander.Program)1 Logger (com.github.lindenb.jvarkit.util.log.Logger)1 AbstractDataCodec (com.github.lindenb.jvarkit.util.picard.AbstractDataCodec)1 IterationHits (gov.nih.nlm.ncbi.blast.IterationHits)1 GBFeature (gov.nih.nlm.ncbi.gb.GBFeature)1