use of gov.nih.nlm.ncbi.blast.Hsp in project jvarkit by lindenb.
the class BlastNToSnp method run.
private void run(PrintWriter pw, XMLEventReader r) throws XMLStreamException, JAXBException {
long numIterations = 0L;
long numPerfectMath = 0L;
long numNoHit = 0L;
pw.print("#query");
pw.print('\t');
pw.print("hit");
pw.print('\t');
pw.print(("hit-index"));
pw.print('\t');
pw.print("hsp-index");
pw.print('\t');
pw.print("query-POS");
pw.print('\t');
pw.print("hit-POS");
pw.print('\t');
pw.print("STRAND");
pw.print('\t');
pw.print("REF(hit)");
pw.print('\t');
pw.print("ALT(query)");
pw.print('\t');
pw.print("blast.align_length");
pw.print('\t');
pw.print("blast.hit.var");
pw.print('\t');
pw.print("blast.query.var");
pw.print('\t');
pw.print("blast.mid.var");
pw.println();
for (; ; ) {
Iteration iter1 = peekIteration(r);
if (iter1 == null)
break;
++numIterations;
if (iter1.getIterationHits().getHit().isEmpty()) {
LOG.info("No hit found for " + iter1.getIterationQueryDef());
++numNoHit;
continue;
}
boolean found_mismatch = false;
for (int hit_loop = 0; hit_loop < iter1.getIterationHits().getHit().size(); ++hit_loop) {
final Hit hit = iter1.getIterationHits().getHit().get(hit_loop);
for (int hsp_loop = 0; hsp_loop < hit.getHitHsps().getHsp().size(); ++hsp_loop) {
final Hsp hsp = hit.getHitHsps().getHsp().get(hsp_loop);
String hspQseq = hsp.getHspQseq();
String hspMid = hsp.getHspMidline();
String hspHSeq = hsp.getHspHseq();
int hsp_query_from = Integer.parseInt(hsp.getHspQueryFrom());
int hsp_query_to = Integer.parseInt(hsp.getHspQueryTo());
int hsp_hit_from = Integer.parseInt(hsp.getHspHitFrom());
int hsp_hit_to = Integer.parseInt(hsp.getHspHitTo());
final int align_length = Integer.parseInt(hsp.getHspAlignLen());
final int hit_shift = (hsp_hit_from > hsp_hit_to ? -1 : 1);
int i = 0;
int query_index = hsp_query_from;
int hit_index = hsp_hit_from;
while (i < align_length) {
char ch = hspHSeq.charAt(i);
char cq = hspQseq.charAt(i);
char cm = hspMid.charAt(i);
if (cm == '|') {
++i;
query_index++;
hit_index += hit_shift;
continue;
}
found_mismatch = true;
int j = i + 1;
for (; ; ) {
int k = hspMid.indexOf(' ', j);
if (k == -1 || k - j > minGapSize)
break;
j = k + 1;
}
String ref = new String(hspHSeq.substring(i, j)).replaceAll("[\\- ]", "");
String alt = new String(hspQseq.substring(i, j)).replaceAll("[\\- ]", "");
if (hit_shift < 0) {
StringBuilder sb = new StringBuilder(alt.length());
for (int x = alt.length() - 1; x >= 0; --x) {
sb.append(AcidNucleics.complement(alt.charAt(x)));
}
alt = sb.toString();
sb = new StringBuilder(ref.length());
for (int x = ref.length() - 1; x >= 0; --x) {
sb.append(AcidNucleics.complement(ref.charAt(x)));
}
ref = sb.toString();
}
pw.print(iter1.getIterationQueryDef());
pw.print('\t');
pw.print(hit.getHitDef());
pw.print('\t');
pw.print((1 + hit_loop));
pw.print('\t');
pw.print((1 + hsp_loop));
pw.print('\t');
pw.print(query_index);
pw.print('\t');
pw.print(hit_index);
pw.print('\t');
pw.print(hit_shift == 1 ? '+' : '-');
pw.print('\t');
pw.print(ref);
pw.print('\t');
pw.print(alt);
pw.print('\t');
pw.print(align_length);
pw.print('\t');
pw.print(hspHSeq.substring(i, j).replace(' ', '.'));
pw.print('\t');
pw.print(hspQseq.substring(i, j).replace(' ', '.'));
pw.print('\t');
pw.print(hspMid.substring(i, j).replace(' ', '.'));
pw.println();
while (i < j) {
ch = hspHSeq.charAt(i);
cq = hspQseq.charAt(i);
if (ch != '-' && ch != ' ') {
hit_index += hit_shift;
}
if (cq != '-' && cq != ' ') {
query_index++;
}
++i;
}
}
if (hit_index - hit_shift != hsp_hit_to) {
marshaller.marshal(new JAXBElement<Hsp>(new QName("Hsp"), Hsp.class, hsp), stderr());
throw new IllegalStateException("Error expected hit-index= " + hit_index + "-" + hit_shift + " == hsp-hit-to=" + hsp_hit_to);
}
if (query_index - 1 != hsp_query_to) {
marshaller.marshal(new JAXBElement<Hit>(new QName("Hit"), Hit.class, hit), stderr());
throw new IllegalStateException("query_index " + query_index + "(1 != hsp_query_to:" + hsp_query_to);
}
if (pw.checkError())
break;
}
}
if (!found_mismatch) {
numPerfectMath++;
}
}
// end while read
LOG.info("ITERATIONS : " + numIterations);
LOG.info("ONLY_PERFECT_MATCH : " + numPerfectMath);
LOG.info("NO_HIT : " + numNoHit);
}
use of gov.nih.nlm.ncbi.blast.Hsp in project jvarkit by lindenb.
the class BlastMapAnnotations method printUniprot.
private void printUniprot(Uniprot uniprotSet) {
if (uniprotSet.getEntry().isEmpty()) {
LOG.warn("empty uniprot entry.");
return;
}
if (uniprotSet.getEntry().size() > 1) {
LOG.warn("entry contains more than one sequence.");
}
for (Entry entry : uniprotSet.getEntry()) {
BlastOutputIterations iterations = this.blastOutput.getBlastOutputIterations();
for (Iteration iteration : iterations.getIteration()) {
for (FeatureType feature : entry.getFeature()) {
if (!acceptfeature(feature.getType()))
continue;
for (Hit hit : iteration.getIterationHits().getHit()) {
for (Hsp hsp : hit.getHitHsps().getHsp()) {
UniprotInterval bi = new UniprotInterval();
bi.entry = entry;
bi.featureType = feature;
bi.hit = hit;
bi.hsp = hsp;
LOG.debug("interval " + bi);
if (!bi.isFeatureOverlapHsp()) {
continue;
}
stdout().println(bi.toBedString());
}
}
}
break;
}
break;
}
// System.err.println("OK");
}
use of gov.nih.nlm.ncbi.blast.Hsp in project jvarkit by lindenb.
the class BlastHspAlignment method cloneHsp.
public static Hsp cloneHsp(Hsp hsp) {
Hsp h2 = new Hsp();
h2.setHspNum(hsp.getHspNum());
h2.setHspBitScore(hsp.getHspBitScore());
h2.setHspScore(hsp.getHspScore());
h2.setHspEvalue(hsp.getHspEvalue());
h2.setHspIdentity(hsp.getHspIdentity());
h2.setHspGaps(hsp.getHspGaps());
h2.setHspPositive(hsp.getHspPositive());
h2.setHspAlignLen(hsp.getHspAlignLen());
h2.setHspQueryFrom(hsp.getHspQueryFrom());
h2.setHspQueryTo(hsp.getHspQueryTo());
h2.setHspQueryFrame(hsp.getHspQueryFrame());
h2.setHspHitFrom(hsp.getHspHitFrom());
h2.setHspHitTo(hsp.getHspHitTo());
h2.setHspHitFrame(hsp.getHspHitFrame());
h2.setHspQseq(hsp.getHspQseq());
h2.setHspHseq(hsp.getHspHseq());
h2.setHspMidline(hsp.getHspMidline());
h2.setHspPatternFrom(hsp.getHspPatternFrom());
h2.setHspPatternTo(hsp.getHspPatternTo());
return h2;
}
use of gov.nih.nlm.ncbi.blast.Hsp in project jvarkit by lindenb.
the class Biostar3654 method parseIteration.
private void parseIteration(String database, Iteration iteration) throws Exception {
this.pw.println("QUERY: " + iteration.getIterationQueryDef());
this.pw.println(" ID:" + iteration.getIterationQueryID() + " Len:" + iteration.getIterationQueryLen());
for (Hit hit : iteration.getIterationHits().getHit()) {
this.pw.println(">" + hit.getHitDef());
this.pw.println(" " + hit.getHitAccession());
this.pw.println(" id:" + hit.getHitId() + " len:" + hit.getHitLen());
for (Hsp hsp : hit.getHitHsps().getHsp()) {
List<INSDFeature> qFeatures = fetchAnnotations(database, iteration.getIterationQueryID(), Integer.parseInt(hsp.getHspQueryFrom()), Integer.parseInt(hsp.getHspQueryTo()));
List<INSDFeature> hFeatures = fetchAnnotations(database, hit.getHitAccession(), Integer.parseInt(hsp.getHspHitFrom()), Integer.parseInt(hsp.getHspHitTo()));
this.pw.println();
this.pw.println(" e-value:" + hsp.getHspEvalue() + " gap:" + hsp.getHspGaps() + " bitScore:" + hsp.getHspBitScore());
this.pw.println();
// create the Printer for the Query and the Hit
QPrinter qPrinter = new QPrinter(hsp, qFeatures);
HPrinter hPrinter = new HPrinter(hsp, hFeatures);
// loop over the lines
while (qPrinter.next() && hPrinter.next()) {
qPrinter.print();
this.pw.printf("QUERY %0" + margin + "d ", qPrinter.seqStart);
this.pw.print(hsp.getHspQseq().substring(qPrinter.stringStart, qPrinter.stringEnd));
this.pw.printf(" %0" + margin + "d", qPrinter.seqEnd - (qPrinter.sign));
this.pw.println();
this.pw.printf(" %" + margin + "s ", "");
this.pw.print(hsp.getHspMidline().substring(qPrinter.stringStart, qPrinter.stringEnd));
this.pw.println();
this.pw.printf("HIT %0" + margin + "d ", hPrinter.seqStart);
this.pw.print(hsp.getHspHseq().substring(hPrinter.stringStart, hPrinter.stringEnd));
this.pw.printf(" %0" + margin + "d", hPrinter.seqEnd - (hPrinter.sign));
this.pw.println();
hPrinter.print();
this.pw.println();
}
this.pw.flush();
}
}
// System.err.println("OK");
}
use of gov.nih.nlm.ncbi.blast.Hsp in project jvarkit by lindenb.
the class MergeSplittedBlast method merge.
private Hit merge(List<Hit> hits) {
Hit first = hits.get(0);
Split firstSplit = parse(first);
Hit newHit = new Hit();
newHit.setHitAccession(first.getHitAccession());
newHit.setHitDef(firstSplit.chrom);
newHit.setHitId(first.getHitId());
newHit.setHitNum(first.getHitNum());
newHit.setHitLen(String.valueOf(firstSplit.len));
newHit.setHitHsps(new HitHsps());
List<Hsp> hsps = new ArrayList<Hsp>();
for (Hit h0 : hits) {
/* fix hit_from/hit_to */
Split split = parse(h0);
List<Hsp> L = h0.getHitHsps().getHsp();
for (int i = 0; i < L.size(); ++i) {
Hsp newHsp = BlastHspAlignment.cloneHsp(L.get(i));
BlastHspAlignment aln = new BlastHspAlignment(newHsp);
int h_from = aln.getHitFrom1();
int h_to = aln.getHitTo1();
h_from += (split.start1 - 1);
h_to += (split.start1 - 1);
newHsp.setHspHitFrom(String.valueOf(h_from));
newHsp.setHspHitTo(String.valueOf(h_to));
hsps.add(newHsp);
}
}
boolean done = false;
while (!done) {
done = true;
for (int i = 0; i + 1 < hsps.size(); ++i) {
Hsp hsp0 = hsps.get(i);
for (int j = i + 1; j < hsps.size(); ++j) {
// debug("comparing hsp "+i+" vs "+j+" N="+hsps.size());
Hsp hsp1 = hsps.get(j);
Hsp newHitHsp = merge(hsp0, hsp1);
if (newHitHsp != null) {
hsps.set(i, newHitHsp);
hsps.remove(j);
done = false;
break;
}
}
if (!done)
break;
}
}
for (int i = 0; i < hsps.size(); ++i) hsps.get(i).setHspNum(String.valueOf(i + 1));
newHit.getHitHsps().getHsp().addAll(hsps);
return newHit;
}
Aggregations