use of gov.nih.nlm.ncbi.blast.Iteration in project jvarkit by lindenb.
the class BlastToSam method convertIterationToSequenceIteration.
private SequenceIteration convertIterationToSequenceIteration(final List<Iteration> stack, final SAMFileHeader header) throws XMLStreamException, JAXBException {
final SequenceIteration sequenceIteration = new SequenceIteration();
if (stack.isEmpty())
return sequenceIteration;
final SAMReadGroupRecord rg1 = header.getReadGroup("g1");
// sequenceIteration.iteration=iter1;
final SAMRecordFactory samRecordFactory = new DefaultSAMRecordFactory();
final StringBuilder readContent = new StringBuilder();
final int iterLength = Integer.parseInt(stack.get(0).getIterationQueryLen());
for (final Iteration iter1 : stack) {
for (final Hit hit : iter1.getIterationHits().getHit()) {
for (final Hsp hsp : hit.getHitHsps().getHsp()) {
for (final BlastHspAlignment.Align a : new BlastHspAlignment(hsp)) {
char c = a.getQueryChar();
if (!Character.isLetter(c))
continue;
final int queryIndex0 = a.getQueryIndex1() - 1;
while (readContent.length() <= queryIndex0) readContent.append('N');
if (readContent.charAt(queryIndex0) == 'N') {
readContent.setCharAt(queryIndex0, c);
} else if (readContent.charAt(queryIndex0) != c) {
throw new IllegalStateException("Expected character '" + readContent.charAt(queryIndex0) + "' but got '" + c + "' at " + queryIndex0 + "\n" + hsp.getHspQseq() + "\n" + hsp.getHspMidline() + "\n" + hsp.getHspHseq() + "\n" + readContent + "\n");
}
}
}
}
}
for (Iteration iter1 : stack) {
for (Hit hit : iter1.getIterationHits().getHit()) {
for (Hsp hsp : hit.getHitHsps().getHsp()) {
SAMRecord rec = samRecordFactory.createSAMRecord(header);
rec.setReadUnmappedFlag(false);
rec.setReadName(iter1.getIterationQueryDef());
if (hit.getHitAccession() != null && !hit.getHitAccession().trim().isEmpty() && this.dictionary.getSequence(hit.getHitAccession()) != null) {
rec.setReferenceName(hit.getHitAccession());
} else {
rec.setReferenceName(hit.getHitDef());
}
final SAMSequenceRecord ssr = this.dictionary.getSequence(hit.getHitDef());
if (ssr == null) {
LOG.warn("Hit is not in SAMDictionary " + hit.getHitDef());
rec.setReferenceIndex(-1);
} else {
rec.setReferenceIndex(ssr.getSequenceIndex());
}
final BlastHspAlignment blastHspAlignment = new BlastHspAlignment(hsp);
rec.setReadNegativeStrandFlag(blastHspAlignment.isPlusMinus());
final List<CigarOperator> cigarL = new ArrayList<CigarOperator>();
for (BlastHspAlignment.Align a : blastHspAlignment) {
// System.err.println("##"+a);
if (a.getMidChar() == '|') {
cigarL.add(CigarOperator.EQ);
} else if (a.getMidChar() == ':') {
cigarL.add(CigarOperator.M);
} else if (a.getHitChar() == '-') {
cigarL.add(CigarOperator.I);
} else if (a.getQueryChar() == '-') {
cigarL.add(CigarOperator.D);
} else {
cigarL.add(CigarOperator.X);
}
}
if (cigarL.size() != hsp.getHspMidline().length()) {
throw new IllegalStateException("Boumm");
}
Cigar cigarE = new Cigar();
if (blastHspAlignment.getQueryFrom1() > 1) {
cigarE.add(new CigarElement(blastHspAlignment.getQueryFrom1() - 1, CigarOperator.S));
}
int x = 0;
while (x < cigarL.size()) {
int y = x + 1;
while (y < cigarL.size() && cigarL.get(x) == cigarL.get(y)) {
++y;
}
cigarE.add(new CigarElement(y - x, cigarL.get(x)));
x = y;
}
/* soft clip */
if (blastHspAlignment.getQueryTo1() < readContent.length()) {
cigarE.add(new CigarElement((readContent.length() - blastHspAlignment.getQueryTo1()), CigarOperator.S));
}
/* hard clip */
if (readContent.length() < iterLength) {
cigarE.add(new CigarElement((iterLength - readContent.length()), CigarOperator.H));
}
rec.setCigar(cigarE);
rec.setMappingQuality(40);
rec.setAlignmentStart(Math.min(blastHspAlignment.getHitFrom1(), blastHspAlignment.getHitTo1()));
rec.setAttribute("BB", Float.parseFloat(hsp.getHspBitScore()));
rec.setAttribute("BE", Float.parseFloat(hsp.getHspEvalue()));
rec.setAttribute("BS", Float.parseFloat(hsp.getHspScore()));
rec.setAttribute("NM", Integer.parseInt(hsp.getHspGaps()));
rec.setAttribute("RG", rg1.getId());
// setAlignmentEnd not supported in SAM API
// rec.setAlignmentEnd(Math.max(blastHspAlignment.getHitFrom1(),blastHspAlignment.getHitTo1()));
sequenceIteration.records.add(rec);
}
}
}
if (readContent.length() == 0) {
readContent.append('N');
}
byte[] readBases = readContent.toString().getBytes();
char[] readQuals = new char[readBases.length];
for (int i = 0; i < readBases.length; ++i) {
readQuals[i] = (readBases[i] == 'N' ? '#' : 'J');
}
if (sequenceIteration.records.isEmpty()) {
SAMRecord rec = samRecordFactory.createSAMRecord(header);
rec.setReadName(stack.get(0).getIterationQueryDef());
rec.setReadUnmappedFlag(true);
rec.setAttribute("RG", rg1.getId());
sequenceIteration.records.add(rec);
}
for (SAMRecord rec : sequenceIteration.records) {
rec.setReadString(new String(readBases));
rec.setReadBases(readBases);
rec.setBaseQualityString(new String(readQuals, 0, readQuals.length));
rec.setBaseQualities(htsjdk.samtools.SAMUtils.fastqToPhred(new String(readQuals, 0, readQuals.length)));
}
return sequenceIteration;
}
use of gov.nih.nlm.ncbi.blast.Iteration in project jvarkit by lindenb.
the class BlastMapAnnotations method printGB.
private void printGB(GBSet gbSet) {
for (GBSeq gbSeq : gbSet.getGBSeq()) {
BlastOutputIterations iterations = this.blastOutput.getBlastOutputIterations();
for (Iteration iteration : iterations.getIteration()) {
for (GBFeature feature : gbSeq.getGBSeqFeatureTable().getGBFeature()) {
if (feature.getGBFeatureIntervals() == null)
continue;
if (!acceptfeature(feature.getGBFeatureKey()))
continue;
for (GBInterval interval : feature.getGBFeatureIntervals().getGBInterval()) {
for (Hit hit : iteration.getIterationHits().getHit()) {
for (Hsp hsp : hit.getHitHsps().getHsp()) {
GenbankInterval bi = new GenbankInterval();
bi.gbSeq = gbSeq;
bi.gbFeature = feature;
bi.gbInterval = interval;
bi.hit = hit;
bi.hsp = hsp;
LOG.debug("interval " + bi);
if (!bi.isGbForward())
LOG.info("CHECK INTERVAL REVERSE");
if (!bi.isFeatureOverlapHsp())
continue;
stdout().println(bi.toBedString());
}
}
}
}
break;
}
}
// System.err.println("OK");
}
Aggregations