use of gov.nih.nlm.ncbi.blast.Iteration in project jvarkit by lindenb.
the class BlastNToSnp method run.
private void run(PrintWriter pw, XMLEventReader r) throws XMLStreamException, JAXBException {
long numIterations = 0L;
long numPerfectMath = 0L;
long numNoHit = 0L;
pw.print("#query");
pw.print('\t');
pw.print("hit");
pw.print('\t');
pw.print(("hit-index"));
pw.print('\t');
pw.print("hsp-index");
pw.print('\t');
pw.print("query-POS");
pw.print('\t');
pw.print("hit-POS");
pw.print('\t');
pw.print("STRAND");
pw.print('\t');
pw.print("REF(hit)");
pw.print('\t');
pw.print("ALT(query)");
pw.print('\t');
pw.print("blast.align_length");
pw.print('\t');
pw.print("blast.hit.var");
pw.print('\t');
pw.print("blast.query.var");
pw.print('\t');
pw.print("blast.mid.var");
pw.println();
for (; ; ) {
Iteration iter1 = peekIteration(r);
if (iter1 == null)
break;
++numIterations;
if (iter1.getIterationHits().getHit().isEmpty()) {
LOG.info("No hit found for " + iter1.getIterationQueryDef());
++numNoHit;
continue;
}
boolean found_mismatch = false;
for (int hit_loop = 0; hit_loop < iter1.getIterationHits().getHit().size(); ++hit_loop) {
final Hit hit = iter1.getIterationHits().getHit().get(hit_loop);
for (int hsp_loop = 0; hsp_loop < hit.getHitHsps().getHsp().size(); ++hsp_loop) {
final Hsp hsp = hit.getHitHsps().getHsp().get(hsp_loop);
String hspQseq = hsp.getHspQseq();
String hspMid = hsp.getHspMidline();
String hspHSeq = hsp.getHspHseq();
int hsp_query_from = Integer.parseInt(hsp.getHspQueryFrom());
int hsp_query_to = Integer.parseInt(hsp.getHspQueryTo());
int hsp_hit_from = Integer.parseInt(hsp.getHspHitFrom());
int hsp_hit_to = Integer.parseInt(hsp.getHspHitTo());
final int align_length = Integer.parseInt(hsp.getHspAlignLen());
final int hit_shift = (hsp_hit_from > hsp_hit_to ? -1 : 1);
int i = 0;
int query_index = hsp_query_from;
int hit_index = hsp_hit_from;
while (i < align_length) {
char ch = hspHSeq.charAt(i);
char cq = hspQseq.charAt(i);
char cm = hspMid.charAt(i);
if (cm == '|') {
++i;
query_index++;
hit_index += hit_shift;
continue;
}
found_mismatch = true;
int j = i + 1;
for (; ; ) {
int k = hspMid.indexOf(' ', j);
if (k == -1 || k - j > minGapSize)
break;
j = k + 1;
}
String ref = new String(hspHSeq.substring(i, j)).replaceAll("[\\- ]", "");
String alt = new String(hspQseq.substring(i, j)).replaceAll("[\\- ]", "");
if (hit_shift < 0) {
StringBuilder sb = new StringBuilder(alt.length());
for (int x = alt.length() - 1; x >= 0; --x) {
sb.append(AcidNucleics.complement(alt.charAt(x)));
}
alt = sb.toString();
sb = new StringBuilder(ref.length());
for (int x = ref.length() - 1; x >= 0; --x) {
sb.append(AcidNucleics.complement(ref.charAt(x)));
}
ref = sb.toString();
}
pw.print(iter1.getIterationQueryDef());
pw.print('\t');
pw.print(hit.getHitDef());
pw.print('\t');
pw.print((1 + hit_loop));
pw.print('\t');
pw.print((1 + hsp_loop));
pw.print('\t');
pw.print(query_index);
pw.print('\t');
pw.print(hit_index);
pw.print('\t');
pw.print(hit_shift == 1 ? '+' : '-');
pw.print('\t');
pw.print(ref);
pw.print('\t');
pw.print(alt);
pw.print('\t');
pw.print(align_length);
pw.print('\t');
pw.print(hspHSeq.substring(i, j).replace(' ', '.'));
pw.print('\t');
pw.print(hspQseq.substring(i, j).replace(' ', '.'));
pw.print('\t');
pw.print(hspMid.substring(i, j).replace(' ', '.'));
pw.println();
while (i < j) {
ch = hspHSeq.charAt(i);
cq = hspQseq.charAt(i);
if (ch != '-' && ch != ' ') {
hit_index += hit_shift;
}
if (cq != '-' && cq != ' ') {
query_index++;
}
++i;
}
}
if (hit_index - hit_shift != hsp_hit_to) {
marshaller.marshal(new JAXBElement<Hsp>(new QName("Hsp"), Hsp.class, hsp), stderr());
throw new IllegalStateException("Error expected hit-index= " + hit_index + "-" + hit_shift + " == hsp-hit-to=" + hsp_hit_to);
}
if (query_index - 1 != hsp_query_to) {
marshaller.marshal(new JAXBElement<Hit>(new QName("Hit"), Hit.class, hit), stderr());
throw new IllegalStateException("query_index " + query_index + "(1 != hsp_query_to:" + hsp_query_to);
}
if (pw.checkError())
break;
}
}
if (!found_mismatch) {
numPerfectMath++;
}
}
// end while read
LOG.info("ITERATIONS : " + numIterations);
LOG.info("ONLY_PERFECT_MATCH : " + numPerfectMath);
LOG.info("NO_HIT : " + numNoHit);
}
use of gov.nih.nlm.ncbi.blast.Iteration in project jvarkit by lindenb.
the class MergeBlastXml method doWork.
@Override
public int doWork(List<String> args) {
if (args.isEmpty()) {
LOG.error("input xml missing");
return -1;
}
XMLEventReader rx = null;
XMLEventReader rx2 = null;
XMLEventWriter wx = null;
SortingCollection<Iteration> sortingCollection = null;
try {
JAXBContext jc = JAXBContext.newInstance("gov.nih.nlm.ncbi.blast");
this.unmarshaller = jc.createUnmarshaller();
this.marshaller = jc.createMarshaller();
this.marshaller.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, true);
this.marshaller.setProperty(Marshaller.JAXB_FRAGMENT, true);
XMLInputFactory xmlInputFactory = XMLInputFactory.newFactory();
xmlInputFactory.setProperty(XMLInputFactory.IS_NAMESPACE_AWARE, Boolean.FALSE);
xmlInputFactory.setProperty(XMLInputFactory.IS_COALESCING, Boolean.TRUE);
xmlInputFactory.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, Boolean.TRUE);
xmlInputFactory.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, Boolean.FALSE);
xmlInputFactory.setXMLResolver(new XMLResolver() {
@Override
public Object resolveEntity(String arg0, String arg1, String arg2, String arg3) throws XMLStreamException {
LOG.info("resolveEntity:" + arg0 + "/" + arg1 + "/" + arg2);
return null;
}
});
final Comparator<Iteration> hitComparator = (A, B) -> {
return A.getIterationQueryDef().compareTo(B.getIterationQueryDef());
};
sortingCollection = SortingCollection.newInstance(Iteration.class, new BlastIterationCodec(), hitComparator, this.maxRecordsInRam, this.tmpFile.toPath());
rx = xmlInputFactory.createXMLEventReader(new FileReader(args.get(0)));
XMLOutputFactory xof = XMLOutputFactory.newFactory();
if (this.outputFile != null) {
wx = xof.createXMLEventWriter(new StreamResult(this.outputFile));
} else {
wx = xof.createXMLEventWriter(new StreamResult(stdout()));
}
boolean in_iteration = false;
while (rx.hasNext()) {
final XMLEvent evt = rx.peek();
if (evt.isStartElement() && evt.asStartElement().getName().getLocalPart().equals("Iteration")) {
final Iteration iteration = this.unmarshaller.unmarshal(rx, Iteration.class).getValue();
sortingCollection.add(iteration);
} else if (evt.isStartElement() && evt.asStartElement().getName().getLocalPart().equals("BlastOutput_iterations")) {
wx.add(rx.nextEvent());
in_iteration = true;
} else if (evt.isEndElement() && evt.asEndElement().getName().getLocalPart().equals("BlastOutput_iterations")) {
for (int optind = 1; optind < args.size(); ++optind) {
LOG.info("opening " + args.get(optind));
rx2 = xmlInputFactory.createXMLEventReader(new FileReader(args.get(optind)));
while (rx2.hasNext()) {
XMLEvent evt2 = rx2.peek();
if (evt2.isStartElement() && evt2.asStartElement().getName().getLocalPart().equals("Iteration")) {
final Iteration iteration = this.unmarshaller.unmarshal(rx2, Iteration.class).getValue();
sortingCollection.add(iteration);
} else {
rx2.nextEvent();
}
}
rx2.close();
LOG.info("close");
}
sortingCollection.doneAdding();
sortingCollection.setDestructiveIteration(true);
final CloseableIterator<Iteration> coliter = sortingCollection.iterator();
final EqualRangeIterator<Iteration> eq = new EqualRangeIterator<>(coliter, hitComparator);
while (coliter.hasNext()) {
final List<Iteration> L = eq.next();
for (int i = 1; i < L.size(); ++i) {
L.get(0).getIterationHits().getHit().addAll(L.get(i).getIterationHits().getHit());
}
marshaller.marshal(L.get(0), wx);
}
eq.close();
coliter.close();
sortingCollection.cleanup();
sortingCollection = null;
wx.add(rx.nextEvent());
in_iteration = false;
} else if (in_iteration) {
// consumme text
rx.nextEvent();
} else {
wx.add(rx.nextEvent());
}
}
wx.flush();
wx.close();
return 0;
} catch (Exception e) {
LOG.error(e);
if (sortingCollection != null) {
sortingCollection.cleanup();
}
return -1;
} finally {
}
}
use of gov.nih.nlm.ncbi.blast.Iteration in project jvarkit by lindenb.
the class MergeSplittedBlast method merge.
private Iteration merge(Iteration iteration) {
if (iteration.getIterationHits().getHit().size() <= 1)
return iteration;
Map<String, List<Hit>> chrom2hits = new LinkedHashMap<String, List<Hit>>();
for (Hit hit : iteration.getIterationHits().getHit()) {
Split s = parse(hit);
if (s == null)
return iteration;
List<Hit> L = chrom2hits.get(s.chrom);
if (L == null) {
L = new ArrayList<Hit>();
chrom2hits.put(s.chrom, L);
}
L.add(hit);
}
Iteration newiteration = new Iteration();
List<Hit> newHits = new ArrayList<Hit>();
for (String chrom : chrom2hits.keySet()) {
List<Hit> L = chrom2hits.get(chrom);
Hit newHit = merge(L);
newHits.add(newHit);
}
newiteration.setIterationIterNum(iteration.getIterationIterNum());
newiteration.setIterationQueryID(iteration.getIterationQueryID());
newiteration.setIterationQueryLen(iteration.getIterationQueryLen());
newiteration.setIterationQueryDef(iteration.getIterationQueryDef());
newiteration.setIterationMessage(iteration.getIterationMessage());
newiteration.setIterationHits(new IterationHits());
newiteration.getIterationHits().getHit().addAll(newHits);
return newiteration;
}
use of gov.nih.nlm.ncbi.blast.Iteration in project jvarkit by lindenb.
the class Biostar3654 method parseBlast.
/**
* parses BLAST output
*/
private void parseBlast(XMLEventReader r) throws Exception {
String database = "nucleotide";
while (r.hasNext()) {
XMLEvent evt = r.peek();
if (evt.isStartElement() && evt.asStartElement().getName().getLocalPart().equals("BlastOutput_program")) {
r.next();
String BlastOutput_program = r.getElementText();
if ("blastn".equals(BlastOutput_program)) {
database = "nucleotide";
} else if ("blastp".equals(BlastOutput_program)) {
database = "protein";
} else {
throw new IOException("only blastn && blastn are supported: " + database);
}
} else if (evt.isStartElement() && evt.asStartElement().getName().getLocalPart().equals("Iteration")) {
Iteration iteration = this.unmarshaller.unmarshal(r, Iteration.class).getValue();
parseIteration(database, iteration);
} else {
// consumme
r.next();
}
}
}
use of gov.nih.nlm.ncbi.blast.Iteration in project jvarkit by lindenb.
the class ReduceBlast method run.
private void run(final XMLEventReader r, final XMLEventWriter w) throws XMLStreamException, JAXBException {
final XMLEventFactory eventFactory = XMLEventFactory.newFactory();
boolean prev_was_iteration = false;
while (r.hasNext()) {
final XMLEvent evt = r.peek();
QName qname = null;
if (evt.isStartElement() && (qname = evt.asStartElement().getName()).getLocalPart().equals("Iteration")) {
final Iteration iteration = this.unmarshaller.unmarshal(r, Iteration.class).getValue();
if (!iteration.getIterationHits().getHit().isEmpty()) {
if (!this.keep_message)
iteration.setIterationMessage(null);
if (!this.keep_stats)
iteration.setIterationStat(null);
this.marshaller.marshal(new JAXBElement<Iteration>(qname, Iteration.class, iteration), w);
w.add(eventFactory.createCharacters("\n"));
}
prev_was_iteration = true;
continue;
} else if (prev_was_iteration && evt.isCharacters() && is_empty(evt.asCharacters().getData())) {
r.nextEvent();
continue;
}
w.add(r.nextEvent());
prev_was_iteration = false;
}
}
Aggregations