Search in sources :

Example 6 with Hit

use of gov.nih.nlm.ncbi.blast.Hit in project jvarkit by lindenb.

the class BlastFilterJS method doWork.

@Override
public int doWork(List<String> args) {
    final CompiledScript compiledScript;
    Unmarshaller unmarshaller;
    Marshaller marshaller;
    try {
        compiledScript = super.compileJavascript(scriptExpr, scriptFile);
        JAXBContext jc = JAXBContext.newInstance("gov.nih.nlm.ncbi.blast");
        unmarshaller = jc.createUnmarshaller();
        marshaller = jc.createMarshaller();
        XMLInputFactory xmlInputFactory = XMLInputFactory.newFactory();
        xmlInputFactory.setProperty(XMLInputFactory.IS_NAMESPACE_AWARE, Boolean.TRUE);
        xmlInputFactory.setProperty(XMLInputFactory.IS_COALESCING, Boolean.TRUE);
        xmlInputFactory.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, Boolean.TRUE);
        xmlInputFactory.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, Boolean.FALSE);
        marshaller.setProperty(Marshaller.JAXB_FRAGMENT, true);
        marshaller.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, true);
        PrintWriter pw = openFileOrStdoutAsPrintWriter(outputFile);
        XMLOutputFactory xof = XMLOutputFactory.newFactory();
        xof.setProperty(XMLOutputFactory.IS_REPAIRING_NAMESPACES, Boolean.FALSE);
        XMLEventWriter w = xof.createXMLEventWriter(pw);
        final String inputName = oneFileOrNull(args);
        StreamSource src = null;
        if (inputName == null) {
            LOG.info("Reading stdin");
            src = new StreamSource(stdin());
        } else {
            LOG.info("Reading file " + inputName);
            src = new StreamSource(new File(inputName));
        }
        XMLEventReader r = xmlInputFactory.createXMLEventReader(src);
        XMLEventFactory eventFactory = XMLEventFactory.newFactory();
        SimpleBindings bindings = new SimpleBindings();
        while (r.hasNext()) {
            XMLEvent evt = r.peek();
            switch(evt.getEventType()) {
                case XMLEvent.START_ELEMENT:
                    {
                        StartElement sE = evt.asStartElement();
                        Hit hit = null;
                        JAXBElement<Hit> jaxbElement = null;
                        if (sE.getName().getLocalPart().equals("Hit")) {
                            jaxbElement = unmarshaller.unmarshal(r, Hit.class);
                            hit = jaxbElement.getValue();
                        } else {
                            w.add(r.nextEvent());
                            break;
                        }
                        if (hit != null) {
                            bindings.put("hit", hit);
                            boolean accept = super.evalJavaScriptBoolean(compiledScript, bindings);
                            if (accept) {
                                marshaller.marshal(jaxbElement, w);
                                w.add(eventFactory.createCharacters("\n"));
                            }
                        }
                        break;
                    }
                case XMLEvent.SPACE:
                    break;
                default:
                    {
                        w.add(r.nextEvent());
                        break;
                    }
            }
            r.close();
        }
        w.flush();
        w.close();
        pw.flush();
        pw.close();
        return RETURN_OK;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
    }
}
Also used : CompiledScript(javax.script.CompiledScript) Marshaller(javax.xml.bind.Marshaller) XMLOutputFactory(javax.xml.stream.XMLOutputFactory) XMLEventFactory(javax.xml.stream.XMLEventFactory) StreamSource(javax.xml.transform.stream.StreamSource) JAXBContext(javax.xml.bind.JAXBContext) JAXBElement(javax.xml.bind.JAXBElement) StartElement(javax.xml.stream.events.StartElement) Hit(gov.nih.nlm.ncbi.blast.Hit) XMLEventWriter(javax.xml.stream.XMLEventWriter) SimpleBindings(javax.script.SimpleBindings) XMLEvent(javax.xml.stream.events.XMLEvent) XMLEventReader(javax.xml.stream.XMLEventReader) Unmarshaller(javax.xml.bind.Unmarshaller) File(java.io.File) XMLInputFactory(javax.xml.stream.XMLInputFactory) PrintWriter(java.io.PrintWriter)

Example 7 with Hit

use of gov.nih.nlm.ncbi.blast.Hit in project jvarkit by lindenb.

the class MergeSplittedBlast method merge.

private Hit merge(List<Hit> hits) {
    Hit first = hits.get(0);
    Split firstSplit = parse(first);
    Hit newHit = new Hit();
    newHit.setHitAccession(first.getHitAccession());
    newHit.setHitDef(firstSplit.chrom);
    newHit.setHitId(first.getHitId());
    newHit.setHitNum(first.getHitNum());
    newHit.setHitLen(String.valueOf(firstSplit.len));
    newHit.setHitHsps(new HitHsps());
    List<Hsp> hsps = new ArrayList<Hsp>();
    for (Hit h0 : hits) {
        /* fix hit_from/hit_to */
        Split split = parse(h0);
        List<Hsp> L = h0.getHitHsps().getHsp();
        for (int i = 0; i < L.size(); ++i) {
            Hsp newHsp = BlastHspAlignment.cloneHsp(L.get(i));
            BlastHspAlignment aln = new BlastHspAlignment(newHsp);
            int h_from = aln.getHitFrom1();
            int h_to = aln.getHitTo1();
            h_from += (split.start1 - 1);
            h_to += (split.start1 - 1);
            newHsp.setHspHitFrom(String.valueOf(h_from));
            newHsp.setHspHitTo(String.valueOf(h_to));
            hsps.add(newHsp);
        }
    }
    boolean done = false;
    while (!done) {
        done = true;
        for (int i = 0; i + 1 < hsps.size(); ++i) {
            Hsp hsp0 = hsps.get(i);
            for (int j = i + 1; j < hsps.size(); ++j) {
                // debug("comparing hsp "+i+" vs "+j+" N="+hsps.size());
                Hsp hsp1 = hsps.get(j);
                Hsp newHitHsp = merge(hsp0, hsp1);
                if (newHitHsp != null) {
                    hsps.set(i, newHitHsp);
                    hsps.remove(j);
                    done = false;
                    break;
                }
            }
            if (!done)
                break;
        }
    }
    for (int i = 0; i < hsps.size(); ++i) hsps.get(i).setHspNum(String.valueOf(i + 1));
    newHit.getHitHsps().getHsp().addAll(hsps);
    return newHit;
}
Also used : Hit(gov.nih.nlm.ncbi.blast.Hit) HitHsps(gov.nih.nlm.ncbi.blast.HitHsps) Hsp(gov.nih.nlm.ncbi.blast.Hsp) ArrayList(java.util.ArrayList) BlastHspAlignment(com.github.lindenb.jvarkit.util.bio.blast.BlastHspAlignment)

Example 8 with Hit

use of gov.nih.nlm.ncbi.blast.Hit in project jvarkit by lindenb.

the class BlastToSam method convertIterationToSequenceIteration.

private SequenceIteration convertIterationToSequenceIteration(final List<Iteration> stack, final SAMFileHeader header) throws XMLStreamException, JAXBException {
    final SequenceIteration sequenceIteration = new SequenceIteration();
    if (stack.isEmpty())
        return sequenceIteration;
    final SAMReadGroupRecord rg1 = header.getReadGroup("g1");
    // sequenceIteration.iteration=iter1;
    final SAMRecordFactory samRecordFactory = new DefaultSAMRecordFactory();
    final StringBuilder readContent = new StringBuilder();
    final int iterLength = Integer.parseInt(stack.get(0).getIterationQueryLen());
    for (final Iteration iter1 : stack) {
        for (final Hit hit : iter1.getIterationHits().getHit()) {
            for (final Hsp hsp : hit.getHitHsps().getHsp()) {
                for (final BlastHspAlignment.Align a : new BlastHspAlignment(hsp)) {
                    char c = a.getQueryChar();
                    if (!Character.isLetter(c))
                        continue;
                    final int queryIndex0 = a.getQueryIndex1() - 1;
                    while (readContent.length() <= queryIndex0) readContent.append('N');
                    if (readContent.charAt(queryIndex0) == 'N') {
                        readContent.setCharAt(queryIndex0, c);
                    } else if (readContent.charAt(queryIndex0) != c) {
                        throw new IllegalStateException("Expected character '" + readContent.charAt(queryIndex0) + "' but got '" + c + "' at " + queryIndex0 + "\n" + hsp.getHspQseq() + "\n" + hsp.getHspMidline() + "\n" + hsp.getHspHseq() + "\n" + readContent + "\n");
                    }
                }
            }
        }
    }
    for (Iteration iter1 : stack) {
        for (Hit hit : iter1.getIterationHits().getHit()) {
            for (Hsp hsp : hit.getHitHsps().getHsp()) {
                SAMRecord rec = samRecordFactory.createSAMRecord(header);
                rec.setReadUnmappedFlag(false);
                rec.setReadName(iter1.getIterationQueryDef());
                if (hit.getHitAccession() != null && !hit.getHitAccession().trim().isEmpty() && this.dictionary.getSequence(hit.getHitAccession()) != null) {
                    rec.setReferenceName(hit.getHitAccession());
                } else {
                    rec.setReferenceName(hit.getHitDef());
                }
                final SAMSequenceRecord ssr = this.dictionary.getSequence(hit.getHitDef());
                if (ssr == null) {
                    LOG.warn("Hit is not in SAMDictionary " + hit.getHitDef());
                    rec.setReferenceIndex(-1);
                } else {
                    rec.setReferenceIndex(ssr.getSequenceIndex());
                }
                final BlastHspAlignment blastHspAlignment = new BlastHspAlignment(hsp);
                rec.setReadNegativeStrandFlag(blastHspAlignment.isPlusMinus());
                final List<CigarOperator> cigarL = new ArrayList<CigarOperator>();
                for (BlastHspAlignment.Align a : blastHspAlignment) {
                    // System.err.println("##"+a);
                    if (a.getMidChar() == '|') {
                        cigarL.add(CigarOperator.EQ);
                    } else if (a.getMidChar() == ':') {
                        cigarL.add(CigarOperator.M);
                    } else if (a.getHitChar() == '-') {
                        cigarL.add(CigarOperator.I);
                    } else if (a.getQueryChar() == '-') {
                        cigarL.add(CigarOperator.D);
                    } else {
                        cigarL.add(CigarOperator.X);
                    }
                }
                if (cigarL.size() != hsp.getHspMidline().length()) {
                    throw new IllegalStateException("Boumm");
                }
                Cigar cigarE = new Cigar();
                if (blastHspAlignment.getQueryFrom1() > 1) {
                    cigarE.add(new CigarElement(blastHspAlignment.getQueryFrom1() - 1, CigarOperator.S));
                }
                int x = 0;
                while (x < cigarL.size()) {
                    int y = x + 1;
                    while (y < cigarL.size() && cigarL.get(x) == cigarL.get(y)) {
                        ++y;
                    }
                    cigarE.add(new CigarElement(y - x, cigarL.get(x)));
                    x = y;
                }
                /* soft clip */
                if (blastHspAlignment.getQueryTo1() < readContent.length()) {
                    cigarE.add(new CigarElement((readContent.length() - blastHspAlignment.getQueryTo1()), CigarOperator.S));
                }
                /* hard clip */
                if (readContent.length() < iterLength) {
                    cigarE.add(new CigarElement((iterLength - readContent.length()), CigarOperator.H));
                }
                rec.setCigar(cigarE);
                rec.setMappingQuality(40);
                rec.setAlignmentStart(Math.min(blastHspAlignment.getHitFrom1(), blastHspAlignment.getHitTo1()));
                rec.setAttribute("BB", Float.parseFloat(hsp.getHspBitScore()));
                rec.setAttribute("BE", Float.parseFloat(hsp.getHspEvalue()));
                rec.setAttribute("BS", Float.parseFloat(hsp.getHspScore()));
                rec.setAttribute("NM", Integer.parseInt(hsp.getHspGaps()));
                rec.setAttribute("RG", rg1.getId());
                // setAlignmentEnd not supported in SAM API
                // rec.setAlignmentEnd(Math.max(blastHspAlignment.getHitFrom1(),blastHspAlignment.getHitTo1()));
                sequenceIteration.records.add(rec);
            }
        }
    }
    if (readContent.length() == 0) {
        readContent.append('N');
    }
    byte[] readBases = readContent.toString().getBytes();
    char[] readQuals = new char[readBases.length];
    for (int i = 0; i < readBases.length; ++i) {
        readQuals[i] = (readBases[i] == 'N' ? '#' : 'J');
    }
    if (sequenceIteration.records.isEmpty()) {
        SAMRecord rec = samRecordFactory.createSAMRecord(header);
        rec.setReadName(stack.get(0).getIterationQueryDef());
        rec.setReadUnmappedFlag(true);
        rec.setAttribute("RG", rg1.getId());
        sequenceIteration.records.add(rec);
    }
    for (SAMRecord rec : sequenceIteration.records) {
        rec.setReadString(new String(readBases));
        rec.setReadBases(readBases);
        rec.setBaseQualityString(new String(readQuals, 0, readQuals.length));
        rec.setBaseQualities(htsjdk.samtools.SAMUtils.fastqToPhred(new String(readQuals, 0, readQuals.length)));
    }
    return sequenceIteration;
}
Also used : Hsp(gov.nih.nlm.ncbi.blast.Hsp) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) BlastHspAlignment(com.github.lindenb.jvarkit.util.bio.blast.BlastHspAlignment) ArrayList(java.util.ArrayList) Iteration(gov.nih.nlm.ncbi.blast.Iteration) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) CigarOperator(htsjdk.samtools.CigarOperator) DefaultSAMRecordFactory(htsjdk.samtools.DefaultSAMRecordFactory) CigarElement(htsjdk.samtools.CigarElement) Hit(gov.nih.nlm.ncbi.blast.Hit) Cigar(htsjdk.samtools.Cigar) SAMRecord(htsjdk.samtools.SAMRecord) DefaultSAMRecordFactory(htsjdk.samtools.DefaultSAMRecordFactory) SAMRecordFactory(htsjdk.samtools.SAMRecordFactory)

Example 9 with Hit

use of gov.nih.nlm.ncbi.blast.Hit in project jvarkit by lindenb.

the class BlastMapAnnotations method printGB.

private void printGB(GBSet gbSet) {
    for (GBSeq gbSeq : gbSet.getGBSeq()) {
        BlastOutputIterations iterations = this.blastOutput.getBlastOutputIterations();
        for (Iteration iteration : iterations.getIteration()) {
            for (GBFeature feature : gbSeq.getGBSeqFeatureTable().getGBFeature()) {
                if (feature.getGBFeatureIntervals() == null)
                    continue;
                if (!acceptfeature(feature.getGBFeatureKey()))
                    continue;
                for (GBInterval interval : feature.getGBFeatureIntervals().getGBInterval()) {
                    for (Hit hit : iteration.getIterationHits().getHit()) {
                        for (Hsp hsp : hit.getHitHsps().getHsp()) {
                            GenbankInterval bi = new GenbankInterval();
                            bi.gbSeq = gbSeq;
                            bi.gbFeature = feature;
                            bi.gbInterval = interval;
                            bi.hit = hit;
                            bi.hsp = hsp;
                            LOG.debug("interval " + bi);
                            if (!bi.isGbForward())
                                LOG.info("CHECK INTERVAL REVERSE");
                            if (!bi.isFeatureOverlapHsp())
                                continue;
                            stdout().println(bi.toBedString());
                        }
                    }
                }
            }
            break;
        }
    }
// System.err.println("OK");
}
Also used : BlastOutputIterations(gov.nih.nlm.ncbi.blast.BlastOutputIterations) Hit(gov.nih.nlm.ncbi.blast.Hit) GBSeq(gov.nih.nlm.ncbi.gb.GBSeq) Hsp(gov.nih.nlm.ncbi.blast.Hsp) GBFeature(gov.nih.nlm.ncbi.gb.GBFeature) Iteration(gov.nih.nlm.ncbi.blast.Iteration) GBInterval(gov.nih.nlm.ncbi.gb.GBInterval)

Aggregations

Hit (gov.nih.nlm.ncbi.blast.Hit)9 Hsp (gov.nih.nlm.ncbi.blast.Hsp)6 Iteration (gov.nih.nlm.ncbi.blast.Iteration)5 ArrayList (java.util.ArrayList)3 BlastHspAlignment (com.github.lindenb.jvarkit.util.bio.blast.BlastHspAlignment)2 BlastOutputIterations (gov.nih.nlm.ncbi.blast.BlastOutputIterations)2 XMLEventWriter (javax.xml.stream.XMLEventWriter)2 XMLOutputFactory (javax.xml.stream.XMLOutputFactory)2 XMLEvent (javax.xml.stream.events.XMLEvent)2 HitHsps (gov.nih.nlm.ncbi.blast.HitHsps)1 IterationHits (gov.nih.nlm.ncbi.blast.IterationHits)1 GBFeature (gov.nih.nlm.ncbi.gb.GBFeature)1 GBInterval (gov.nih.nlm.ncbi.gb.GBInterval)1 GBSeq (gov.nih.nlm.ncbi.gb.GBSeq)1 INSDFeature (gov.nih.nlm.ncbi.insdseq.INSDFeature)1 Cigar (htsjdk.samtools.Cigar)1 CigarElement (htsjdk.samtools.CigarElement)1 CigarOperator (htsjdk.samtools.CigarOperator)1 DefaultSAMRecordFactory (htsjdk.samtools.DefaultSAMRecordFactory)1 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)1