use of gov.nih.nlm.ncbi.blast.Hit in project jvarkit by lindenb.
the class BlastNToSnp method run.
private void run(PrintWriter pw, XMLEventReader r) throws XMLStreamException, JAXBException {
long numIterations = 0L;
long numPerfectMath = 0L;
long numNoHit = 0L;
pw.print("#query");
pw.print('\t');
pw.print("hit");
pw.print('\t');
pw.print(("hit-index"));
pw.print('\t');
pw.print("hsp-index");
pw.print('\t');
pw.print("query-POS");
pw.print('\t');
pw.print("hit-POS");
pw.print('\t');
pw.print("STRAND");
pw.print('\t');
pw.print("REF(hit)");
pw.print('\t');
pw.print("ALT(query)");
pw.print('\t');
pw.print("blast.align_length");
pw.print('\t');
pw.print("blast.hit.var");
pw.print('\t');
pw.print("blast.query.var");
pw.print('\t');
pw.print("blast.mid.var");
pw.println();
for (; ; ) {
Iteration iter1 = peekIteration(r);
if (iter1 == null)
break;
++numIterations;
if (iter1.getIterationHits().getHit().isEmpty()) {
LOG.info("No hit found for " + iter1.getIterationQueryDef());
++numNoHit;
continue;
}
boolean found_mismatch = false;
for (int hit_loop = 0; hit_loop < iter1.getIterationHits().getHit().size(); ++hit_loop) {
final Hit hit = iter1.getIterationHits().getHit().get(hit_loop);
for (int hsp_loop = 0; hsp_loop < hit.getHitHsps().getHsp().size(); ++hsp_loop) {
final Hsp hsp = hit.getHitHsps().getHsp().get(hsp_loop);
String hspQseq = hsp.getHspQseq();
String hspMid = hsp.getHspMidline();
String hspHSeq = hsp.getHspHseq();
int hsp_query_from = Integer.parseInt(hsp.getHspQueryFrom());
int hsp_query_to = Integer.parseInt(hsp.getHspQueryTo());
int hsp_hit_from = Integer.parseInt(hsp.getHspHitFrom());
int hsp_hit_to = Integer.parseInt(hsp.getHspHitTo());
final int align_length = Integer.parseInt(hsp.getHspAlignLen());
final int hit_shift = (hsp_hit_from > hsp_hit_to ? -1 : 1);
int i = 0;
int query_index = hsp_query_from;
int hit_index = hsp_hit_from;
while (i < align_length) {
char ch = hspHSeq.charAt(i);
char cq = hspQseq.charAt(i);
char cm = hspMid.charAt(i);
if (cm == '|') {
++i;
query_index++;
hit_index += hit_shift;
continue;
}
found_mismatch = true;
int j = i + 1;
for (; ; ) {
int k = hspMid.indexOf(' ', j);
if (k == -1 || k - j > minGapSize)
break;
j = k + 1;
}
String ref = new String(hspHSeq.substring(i, j)).replaceAll("[\\- ]", "");
String alt = new String(hspQseq.substring(i, j)).replaceAll("[\\- ]", "");
if (hit_shift < 0) {
StringBuilder sb = new StringBuilder(alt.length());
for (int x = alt.length() - 1; x >= 0; --x) {
sb.append(AcidNucleics.complement(alt.charAt(x)));
}
alt = sb.toString();
sb = new StringBuilder(ref.length());
for (int x = ref.length() - 1; x >= 0; --x) {
sb.append(AcidNucleics.complement(ref.charAt(x)));
}
ref = sb.toString();
}
pw.print(iter1.getIterationQueryDef());
pw.print('\t');
pw.print(hit.getHitDef());
pw.print('\t');
pw.print((1 + hit_loop));
pw.print('\t');
pw.print((1 + hsp_loop));
pw.print('\t');
pw.print(query_index);
pw.print('\t');
pw.print(hit_index);
pw.print('\t');
pw.print(hit_shift == 1 ? '+' : '-');
pw.print('\t');
pw.print(ref);
pw.print('\t');
pw.print(alt);
pw.print('\t');
pw.print(align_length);
pw.print('\t');
pw.print(hspHSeq.substring(i, j).replace(' ', '.'));
pw.print('\t');
pw.print(hspQseq.substring(i, j).replace(' ', '.'));
pw.print('\t');
pw.print(hspMid.substring(i, j).replace(' ', '.'));
pw.println();
while (i < j) {
ch = hspHSeq.charAt(i);
cq = hspQseq.charAt(i);
if (ch != '-' && ch != ' ') {
hit_index += hit_shift;
}
if (cq != '-' && cq != ' ') {
query_index++;
}
++i;
}
}
if (hit_index - hit_shift != hsp_hit_to) {
marshaller.marshal(new JAXBElement<Hsp>(new QName("Hsp"), Hsp.class, hsp), stderr());
throw new IllegalStateException("Error expected hit-index= " + hit_index + "-" + hit_shift + " == hsp-hit-to=" + hsp_hit_to);
}
if (query_index - 1 != hsp_query_to) {
marshaller.marshal(new JAXBElement<Hit>(new QName("Hit"), Hit.class, hit), stderr());
throw new IllegalStateException("query_index " + query_index + "(1 != hsp_query_to:" + hsp_query_to);
}
if (pw.checkError())
break;
}
}
if (!found_mismatch) {
numPerfectMath++;
}
}
// end while read
LOG.info("ITERATIONS : " + numIterations);
LOG.info("ONLY_PERFECT_MATCH : " + numPerfectMath);
LOG.info("NO_HIT : " + numNoHit);
}
use of gov.nih.nlm.ncbi.blast.Hit in project jvarkit by lindenb.
the class MergeSplittedBlast method merge.
private Iteration merge(Iteration iteration) {
if (iteration.getIterationHits().getHit().size() <= 1)
return iteration;
Map<String, List<Hit>> chrom2hits = new LinkedHashMap<String, List<Hit>>();
for (Hit hit : iteration.getIterationHits().getHit()) {
Split s = parse(hit);
if (s == null)
return iteration;
List<Hit> L = chrom2hits.get(s.chrom);
if (L == null) {
L = new ArrayList<Hit>();
chrom2hits.put(s.chrom, L);
}
L.add(hit);
}
Iteration newiteration = new Iteration();
List<Hit> newHits = new ArrayList<Hit>();
for (String chrom : chrom2hits.keySet()) {
List<Hit> L = chrom2hits.get(chrom);
Hit newHit = merge(L);
newHits.add(newHit);
}
newiteration.setIterationIterNum(iteration.getIterationIterNum());
newiteration.setIterationQueryID(iteration.getIterationQueryID());
newiteration.setIterationQueryLen(iteration.getIterationQueryLen());
newiteration.setIterationQueryDef(iteration.getIterationQueryDef());
newiteration.setIterationMessage(iteration.getIterationMessage());
newiteration.setIterationHits(new IterationHits());
newiteration.getIterationHits().getHit().addAll(newHits);
return newiteration;
}
use of gov.nih.nlm.ncbi.blast.Hit in project jvarkit by lindenb.
the class BlastMapAnnotations method printUniprot.
private void printUniprot(Uniprot uniprotSet) {
if (uniprotSet.getEntry().isEmpty()) {
LOG.warn("empty uniprot entry.");
return;
}
if (uniprotSet.getEntry().size() > 1) {
LOG.warn("entry contains more than one sequence.");
}
for (Entry entry : uniprotSet.getEntry()) {
BlastOutputIterations iterations = this.blastOutput.getBlastOutputIterations();
for (Iteration iteration : iterations.getIteration()) {
for (FeatureType feature : entry.getFeature()) {
if (!acceptfeature(feature.getType()))
continue;
for (Hit hit : iteration.getIterationHits().getHit()) {
for (Hsp hsp : hit.getHitHsps().getHsp()) {
UniprotInterval bi = new UniprotInterval();
bi.entry = entry;
bi.featureType = feature;
bi.hit = hit;
bi.hsp = hsp;
LOG.debug("interval " + bi);
if (!bi.isFeatureOverlapHsp()) {
continue;
}
stdout().println(bi.toBedString());
}
}
}
break;
}
break;
}
// System.err.println("OK");
}
use of gov.nih.nlm.ncbi.blast.Hit in project jvarkit by lindenb.
the class Biostar160470 method parseBlast.
/**
* parses BLAST output
*/
private void parseBlast(final XMLEventReader r) throws Exception {
final PrintStream out = super.openFileOrStdoutAsPrintStream(this.outputFile);
final XMLOutputFactory xof = XMLOutputFactory.newFactory();
final XMLEventWriter w = xof.createXMLEventWriter(out, "UTF-8");
while (r.hasNext()) {
final XMLEvent evt = r.peek();
if (evt.isStartElement() && evt.asStartElement().getName().getLocalPart().equals("BlastOutput_program")) {
// consumme
w.add(r.nextEvent());
final String BlastOutput_program = r.getElementText();
if (!"tblastn".equals(BlastOutput_program)) {
throw new IOException("only tblastn is supported but got : " + BlastOutput_program);
}
} else if (evt.isStartElement() && evt.asStartElement().getName().getLocalPart().equals("Hit")) {
final Hit hit = this.unmarshaller.unmarshal(r, Hit.class).getValue();
parseHit(out, hit);
} else {
// consumme
w.add(r.nextEvent());
}
}
w.flush();
w.close();
out.flush();
out.close();
}
use of gov.nih.nlm.ncbi.blast.Hit in project jvarkit by lindenb.
the class Biostar3654 method parseIteration.
private void parseIteration(String database, Iteration iteration) throws Exception {
this.pw.println("QUERY: " + iteration.getIterationQueryDef());
this.pw.println(" ID:" + iteration.getIterationQueryID() + " Len:" + iteration.getIterationQueryLen());
for (Hit hit : iteration.getIterationHits().getHit()) {
this.pw.println(">" + hit.getHitDef());
this.pw.println(" " + hit.getHitAccession());
this.pw.println(" id:" + hit.getHitId() + " len:" + hit.getHitLen());
for (Hsp hsp : hit.getHitHsps().getHsp()) {
List<INSDFeature> qFeatures = fetchAnnotations(database, iteration.getIterationQueryID(), Integer.parseInt(hsp.getHspQueryFrom()), Integer.parseInt(hsp.getHspQueryTo()));
List<INSDFeature> hFeatures = fetchAnnotations(database, hit.getHitAccession(), Integer.parseInt(hsp.getHspHitFrom()), Integer.parseInt(hsp.getHspHitTo()));
this.pw.println();
this.pw.println(" e-value:" + hsp.getHspEvalue() + " gap:" + hsp.getHspGaps() + " bitScore:" + hsp.getHspBitScore());
this.pw.println();
// create the Printer for the Query and the Hit
QPrinter qPrinter = new QPrinter(hsp, qFeatures);
HPrinter hPrinter = new HPrinter(hsp, hFeatures);
// loop over the lines
while (qPrinter.next() && hPrinter.next()) {
qPrinter.print();
this.pw.printf("QUERY %0" + margin + "d ", qPrinter.seqStart);
this.pw.print(hsp.getHspQseq().substring(qPrinter.stringStart, qPrinter.stringEnd));
this.pw.printf(" %0" + margin + "d", qPrinter.seqEnd - (qPrinter.sign));
this.pw.println();
this.pw.printf(" %" + margin + "s ", "");
this.pw.print(hsp.getHspMidline().substring(qPrinter.stringStart, qPrinter.stringEnd));
this.pw.println();
this.pw.printf("HIT %0" + margin + "d ", hPrinter.seqStart);
this.pw.print(hsp.getHspHseq().substring(hPrinter.stringStart, hPrinter.stringEnd));
this.pw.printf(" %0" + margin + "d", hPrinter.seqEnd - (hPrinter.sign));
this.pw.println();
hPrinter.print();
this.pw.println();
}
this.pw.flush();
}
}
// System.err.println("OK");
}
Aggregations