use of gov.nih.nlm.ncbi.blast.Hsp in project jvarkit by lindenb.
the class MergeSplittedBlast method merge.
private Hsp merge(Hsp hh0, Hsp hh1) {
BlastHspAlignment aln0 = new BlastHspAlignment(hh0);
BlastHspAlignment aln1 = new BlastHspAlignment(hh1);
// order hsp on query pos
if (aln0.getQueryFrom1() > aln1.getQueryFrom1()) {
Hsp th = hh0;
hh0 = hh1;
hh1 = th;
aln0 = new BlastHspAlignment(hh0);
aln1 = new BlastHspAlignment(hh1);
}
/* not the same strand */
if (aln0.isPlusPlus() != aln1.isPlusPlus()) {
// debug("strand");
return null;
}
/* hits overlap */
if (!overlap(aln0.getHitFrom1(), aln0.getHitTo1(), aln1.getHitFrom1(), aln1.getHitTo1())) {
// debug("no overlap hit");
return null;
}
/* query overlap */
if (!overlap(aln0.getQueryFrom1(), aln0.getQueryTo1(), aln1.getQueryFrom1(), aln1.getQueryTo1())) {
// debug("no overlap query");
return null;
}
// hit1 is contained in hit0
if (aln0.getQueryFrom1() <= aln1.getQueryFrom1() && aln1.getQueryTo1() <= aln0.getQueryTo1()) {
// debug("contained");
return hh0;
}
StringBuilder qsb = new StringBuilder();
StringBuilder msb = new StringBuilder();
StringBuilder hsb = new StringBuilder();
int expect_hit = -1;
int found_hit = -1;
for (BlastHspAlignment.Align a : aln0) {
if (a.getQueryIndex1() >= aln1.getQueryFrom1()) {
// debug("###BREAK###############");
expect_hit = a.getHitIndex1();
break;
}
qsb.append(a.getQueryChar());
msb.append(a.getMidChar());
hsb.append(a.getHitChar());
}
if (expect_hit == -1) {
// debug("HU?");
return null;
}
for (BlastHspAlignment.Align a : aln1) {
if (a.getQueryIndex1() < aln1.getQueryFrom1())
continue;
if (found_hit == -1) {
found_hit = a.getHitIndex1();
if (expect_hit != found_hit) {
LOG.info("Not the expected hit position " + expect_hit + "/" + found_hit);
return null;
}
}
qsb.append(a.getQueryChar());
msb.append(a.getMidChar());
hsb.append(a.getHitChar());
}
// info("\n"+qsb+"\n"+msb+"\n"+hsb);
Hsp newHsp = BlastHspAlignment.cloneHsp(hh0);
newHsp.setHspAlignLen(String.valueOf(msb.length()));
newHsp.setHspMidline(msb.toString());
newHsp.setHspQseq(qsb.toString());
newHsp.setHspHseq(hsb.toString());
newHsp.setHspQueryFrom(String.valueOf(Math.min(aln0.getQueryFrom1(), aln1.getQueryFrom1())));
newHsp.setHspQueryTo(String.valueOf(Math.max(aln0.getQueryTo1(), aln1.getQueryTo1())));
if (aln0.isPlusPlus()) {
newHsp.setHspHitFrom(String.valueOf(Math.min(aln0.getHitFrom1(), aln1.getHitFrom1())));
newHsp.setHspHitTo(String.valueOf(Math.max(aln0.getHitTo1(), aln1.getHitTo1())));
} else {
newHsp.setHspHitFrom(String.valueOf(Math.max(aln0.getHitFrom1(), aln1.getHitFrom1())));
newHsp.setHspHitTo(String.valueOf(Math.min(aln0.getHitTo1(), aln1.getHitTo1())));
}
newHsp.setHspGaps(String.valueOf(newHsp.getHspMidline().replaceAll("[^ ]", "").length()));
newHsp.setHspScore(String.valueOf(newHsp.getHspMidline().replaceAll("[ ]", "").length()));
// debug("success");
return newHsp;
}
use of gov.nih.nlm.ncbi.blast.Hsp in project jvarkit by lindenb.
the class BlastToSam method convertIterationToSequenceIteration.
private SequenceIteration convertIterationToSequenceIteration(final List<Iteration> stack, final SAMFileHeader header) throws XMLStreamException, JAXBException {
final SequenceIteration sequenceIteration = new SequenceIteration();
if (stack.isEmpty())
return sequenceIteration;
final SAMReadGroupRecord rg1 = header.getReadGroup("g1");
// sequenceIteration.iteration=iter1;
final SAMRecordFactory samRecordFactory = new DefaultSAMRecordFactory();
final StringBuilder readContent = new StringBuilder();
final int iterLength = Integer.parseInt(stack.get(0).getIterationQueryLen());
for (final Iteration iter1 : stack) {
for (final Hit hit : iter1.getIterationHits().getHit()) {
for (final Hsp hsp : hit.getHitHsps().getHsp()) {
for (final BlastHspAlignment.Align a : new BlastHspAlignment(hsp)) {
char c = a.getQueryChar();
if (!Character.isLetter(c))
continue;
final int queryIndex0 = a.getQueryIndex1() - 1;
while (readContent.length() <= queryIndex0) readContent.append('N');
if (readContent.charAt(queryIndex0) == 'N') {
readContent.setCharAt(queryIndex0, c);
} else if (readContent.charAt(queryIndex0) != c) {
throw new IllegalStateException("Expected character '" + readContent.charAt(queryIndex0) + "' but got '" + c + "' at " + queryIndex0 + "\n" + hsp.getHspQseq() + "\n" + hsp.getHspMidline() + "\n" + hsp.getHspHseq() + "\n" + readContent + "\n");
}
}
}
}
}
for (Iteration iter1 : stack) {
for (Hit hit : iter1.getIterationHits().getHit()) {
for (Hsp hsp : hit.getHitHsps().getHsp()) {
SAMRecord rec = samRecordFactory.createSAMRecord(header);
rec.setReadUnmappedFlag(false);
rec.setReadName(iter1.getIterationQueryDef());
if (hit.getHitAccession() != null && !hit.getHitAccession().trim().isEmpty() && this.dictionary.getSequence(hit.getHitAccession()) != null) {
rec.setReferenceName(hit.getHitAccession());
} else {
rec.setReferenceName(hit.getHitDef());
}
final SAMSequenceRecord ssr = this.dictionary.getSequence(hit.getHitDef());
if (ssr == null) {
LOG.warn("Hit is not in SAMDictionary " + hit.getHitDef());
rec.setReferenceIndex(-1);
} else {
rec.setReferenceIndex(ssr.getSequenceIndex());
}
final BlastHspAlignment blastHspAlignment = new BlastHspAlignment(hsp);
rec.setReadNegativeStrandFlag(blastHspAlignment.isPlusMinus());
final List<CigarOperator> cigarL = new ArrayList<CigarOperator>();
for (BlastHspAlignment.Align a : blastHspAlignment) {
// System.err.println("##"+a);
if (a.getMidChar() == '|') {
cigarL.add(CigarOperator.EQ);
} else if (a.getMidChar() == ':') {
cigarL.add(CigarOperator.M);
} else if (a.getHitChar() == '-') {
cigarL.add(CigarOperator.I);
} else if (a.getQueryChar() == '-') {
cigarL.add(CigarOperator.D);
} else {
cigarL.add(CigarOperator.X);
}
}
if (cigarL.size() != hsp.getHspMidline().length()) {
throw new IllegalStateException("Boumm");
}
Cigar cigarE = new Cigar();
if (blastHspAlignment.getQueryFrom1() > 1) {
cigarE.add(new CigarElement(blastHspAlignment.getQueryFrom1() - 1, CigarOperator.S));
}
int x = 0;
while (x < cigarL.size()) {
int y = x + 1;
while (y < cigarL.size() && cigarL.get(x) == cigarL.get(y)) {
++y;
}
cigarE.add(new CigarElement(y - x, cigarL.get(x)));
x = y;
}
/* soft clip */
if (blastHspAlignment.getQueryTo1() < readContent.length()) {
cigarE.add(new CigarElement((readContent.length() - blastHspAlignment.getQueryTo1()), CigarOperator.S));
}
/* hard clip */
if (readContent.length() < iterLength) {
cigarE.add(new CigarElement((iterLength - readContent.length()), CigarOperator.H));
}
rec.setCigar(cigarE);
rec.setMappingQuality(40);
rec.setAlignmentStart(Math.min(blastHspAlignment.getHitFrom1(), blastHspAlignment.getHitTo1()));
rec.setAttribute("BB", Float.parseFloat(hsp.getHspBitScore()));
rec.setAttribute("BE", Float.parseFloat(hsp.getHspEvalue()));
rec.setAttribute("BS", Float.parseFloat(hsp.getHspScore()));
rec.setAttribute("NM", Integer.parseInt(hsp.getHspGaps()));
rec.setAttribute("RG", rg1.getId());
// setAlignmentEnd not supported in SAM API
// rec.setAlignmentEnd(Math.max(blastHspAlignment.getHitFrom1(),blastHspAlignment.getHitTo1()));
sequenceIteration.records.add(rec);
}
}
}
if (readContent.length() == 0) {
readContent.append('N');
}
byte[] readBases = readContent.toString().getBytes();
char[] readQuals = new char[readBases.length];
for (int i = 0; i < readBases.length; ++i) {
readQuals[i] = (readBases[i] == 'N' ? '#' : 'J');
}
if (sequenceIteration.records.isEmpty()) {
SAMRecord rec = samRecordFactory.createSAMRecord(header);
rec.setReadName(stack.get(0).getIterationQueryDef());
rec.setReadUnmappedFlag(true);
rec.setAttribute("RG", rg1.getId());
sequenceIteration.records.add(rec);
}
for (SAMRecord rec : sequenceIteration.records) {
rec.setReadString(new String(readBases));
rec.setReadBases(readBases);
rec.setBaseQualityString(new String(readQuals, 0, readQuals.length));
rec.setBaseQualities(htsjdk.samtools.SAMUtils.fastqToPhred(new String(readQuals, 0, readQuals.length)));
}
return sequenceIteration;
}
use of gov.nih.nlm.ncbi.blast.Hsp in project jvarkit by lindenb.
the class BlastMapAnnotations method printGB.
private void printGB(GBSet gbSet) {
for (GBSeq gbSeq : gbSet.getGBSeq()) {
BlastOutputIterations iterations = this.blastOutput.getBlastOutputIterations();
for (Iteration iteration : iterations.getIteration()) {
for (GBFeature feature : gbSeq.getGBSeqFeatureTable().getGBFeature()) {
if (feature.getGBFeatureIntervals() == null)
continue;
if (!acceptfeature(feature.getGBFeatureKey()))
continue;
for (GBInterval interval : feature.getGBFeatureIntervals().getGBInterval()) {
for (Hit hit : iteration.getIterationHits().getHit()) {
for (Hsp hsp : hit.getHitHsps().getHsp()) {
GenbankInterval bi = new GenbankInterval();
bi.gbSeq = gbSeq;
bi.gbFeature = feature;
bi.gbInterval = interval;
bi.hit = hit;
bi.hsp = hsp;
LOG.debug("interval " + bi);
if (!bi.isGbForward())
LOG.info("CHECK INTERVAL REVERSE");
if (!bi.isFeatureOverlapHsp())
continue;
stdout().println(bi.toBedString());
}
}
}
}
break;
}
}
// System.err.println("OK");
}
Aggregations