use of gov.nih.nlm.ncbi.blast.Hit in project jvarkit by lindenb.
the class BlastFilterJS method doWork.
@Override
public int doWork(List<String> args) {
final CompiledScript compiledScript;
Unmarshaller unmarshaller;
Marshaller marshaller;
try {
compiledScript = super.compileJavascript(scriptExpr, scriptFile);
JAXBContext jc = JAXBContext.newInstance("gov.nih.nlm.ncbi.blast");
unmarshaller = jc.createUnmarshaller();
marshaller = jc.createMarshaller();
XMLInputFactory xmlInputFactory = XMLInputFactory.newFactory();
xmlInputFactory.setProperty(XMLInputFactory.IS_NAMESPACE_AWARE, Boolean.TRUE);
xmlInputFactory.setProperty(XMLInputFactory.IS_COALESCING, Boolean.TRUE);
xmlInputFactory.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, Boolean.TRUE);
xmlInputFactory.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, Boolean.FALSE);
marshaller.setProperty(Marshaller.JAXB_FRAGMENT, true);
marshaller.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, true);
PrintWriter pw = openFileOrStdoutAsPrintWriter(outputFile);
XMLOutputFactory xof = XMLOutputFactory.newFactory();
xof.setProperty(XMLOutputFactory.IS_REPAIRING_NAMESPACES, Boolean.FALSE);
XMLEventWriter w = xof.createXMLEventWriter(pw);
final String inputName = oneFileOrNull(args);
StreamSource src = null;
if (inputName == null) {
LOG.info("Reading stdin");
src = new StreamSource(stdin());
} else {
LOG.info("Reading file " + inputName);
src = new StreamSource(new File(inputName));
}
XMLEventReader r = xmlInputFactory.createXMLEventReader(src);
XMLEventFactory eventFactory = XMLEventFactory.newFactory();
SimpleBindings bindings = new SimpleBindings();
while (r.hasNext()) {
XMLEvent evt = r.peek();
switch(evt.getEventType()) {
case XMLEvent.START_ELEMENT:
{
StartElement sE = evt.asStartElement();
Hit hit = null;
JAXBElement<Hit> jaxbElement = null;
if (sE.getName().getLocalPart().equals("Hit")) {
jaxbElement = unmarshaller.unmarshal(r, Hit.class);
hit = jaxbElement.getValue();
} else {
w.add(r.nextEvent());
break;
}
if (hit != null) {
bindings.put("hit", hit);
boolean accept = super.evalJavaScriptBoolean(compiledScript, bindings);
if (accept) {
marshaller.marshal(jaxbElement, w);
w.add(eventFactory.createCharacters("\n"));
}
}
break;
}
case XMLEvent.SPACE:
break;
default:
{
w.add(r.nextEvent());
break;
}
}
r.close();
}
w.flush();
w.close();
pw.flush();
pw.close();
return RETURN_OK;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
}
}
use of gov.nih.nlm.ncbi.blast.Hit in project jvarkit by lindenb.
the class MergeSplittedBlast method merge.
private Hit merge(List<Hit> hits) {
Hit first = hits.get(0);
Split firstSplit = parse(first);
Hit newHit = new Hit();
newHit.setHitAccession(first.getHitAccession());
newHit.setHitDef(firstSplit.chrom);
newHit.setHitId(first.getHitId());
newHit.setHitNum(first.getHitNum());
newHit.setHitLen(String.valueOf(firstSplit.len));
newHit.setHitHsps(new HitHsps());
List<Hsp> hsps = new ArrayList<Hsp>();
for (Hit h0 : hits) {
/* fix hit_from/hit_to */
Split split = parse(h0);
List<Hsp> L = h0.getHitHsps().getHsp();
for (int i = 0; i < L.size(); ++i) {
Hsp newHsp = BlastHspAlignment.cloneHsp(L.get(i));
BlastHspAlignment aln = new BlastHspAlignment(newHsp);
int h_from = aln.getHitFrom1();
int h_to = aln.getHitTo1();
h_from += (split.start1 - 1);
h_to += (split.start1 - 1);
newHsp.setHspHitFrom(String.valueOf(h_from));
newHsp.setHspHitTo(String.valueOf(h_to));
hsps.add(newHsp);
}
}
boolean done = false;
while (!done) {
done = true;
for (int i = 0; i + 1 < hsps.size(); ++i) {
Hsp hsp0 = hsps.get(i);
for (int j = i + 1; j < hsps.size(); ++j) {
// debug("comparing hsp "+i+" vs "+j+" N="+hsps.size());
Hsp hsp1 = hsps.get(j);
Hsp newHitHsp = merge(hsp0, hsp1);
if (newHitHsp != null) {
hsps.set(i, newHitHsp);
hsps.remove(j);
done = false;
break;
}
}
if (!done)
break;
}
}
for (int i = 0; i < hsps.size(); ++i) hsps.get(i).setHspNum(String.valueOf(i + 1));
newHit.getHitHsps().getHsp().addAll(hsps);
return newHit;
}
use of gov.nih.nlm.ncbi.blast.Hit in project jvarkit by lindenb.
the class BlastToSam method convertIterationToSequenceIteration.
private SequenceIteration convertIterationToSequenceIteration(final List<Iteration> stack, final SAMFileHeader header) throws XMLStreamException, JAXBException {
final SequenceIteration sequenceIteration = new SequenceIteration();
if (stack.isEmpty())
return sequenceIteration;
final SAMReadGroupRecord rg1 = header.getReadGroup("g1");
// sequenceIteration.iteration=iter1;
final SAMRecordFactory samRecordFactory = new DefaultSAMRecordFactory();
final StringBuilder readContent = new StringBuilder();
final int iterLength = Integer.parseInt(stack.get(0).getIterationQueryLen());
for (final Iteration iter1 : stack) {
for (final Hit hit : iter1.getIterationHits().getHit()) {
for (final Hsp hsp : hit.getHitHsps().getHsp()) {
for (final BlastHspAlignment.Align a : new BlastHspAlignment(hsp)) {
char c = a.getQueryChar();
if (!Character.isLetter(c))
continue;
final int queryIndex0 = a.getQueryIndex1() - 1;
while (readContent.length() <= queryIndex0) readContent.append('N');
if (readContent.charAt(queryIndex0) == 'N') {
readContent.setCharAt(queryIndex0, c);
} else if (readContent.charAt(queryIndex0) != c) {
throw new IllegalStateException("Expected character '" + readContent.charAt(queryIndex0) + "' but got '" + c + "' at " + queryIndex0 + "\n" + hsp.getHspQseq() + "\n" + hsp.getHspMidline() + "\n" + hsp.getHspHseq() + "\n" + readContent + "\n");
}
}
}
}
}
for (Iteration iter1 : stack) {
for (Hit hit : iter1.getIterationHits().getHit()) {
for (Hsp hsp : hit.getHitHsps().getHsp()) {
SAMRecord rec = samRecordFactory.createSAMRecord(header);
rec.setReadUnmappedFlag(false);
rec.setReadName(iter1.getIterationQueryDef());
if (hit.getHitAccession() != null && !hit.getHitAccession().trim().isEmpty() && this.dictionary.getSequence(hit.getHitAccession()) != null) {
rec.setReferenceName(hit.getHitAccession());
} else {
rec.setReferenceName(hit.getHitDef());
}
final SAMSequenceRecord ssr = this.dictionary.getSequence(hit.getHitDef());
if (ssr == null) {
LOG.warn("Hit is not in SAMDictionary " + hit.getHitDef());
rec.setReferenceIndex(-1);
} else {
rec.setReferenceIndex(ssr.getSequenceIndex());
}
final BlastHspAlignment blastHspAlignment = new BlastHspAlignment(hsp);
rec.setReadNegativeStrandFlag(blastHspAlignment.isPlusMinus());
final List<CigarOperator> cigarL = new ArrayList<CigarOperator>();
for (BlastHspAlignment.Align a : blastHspAlignment) {
// System.err.println("##"+a);
if (a.getMidChar() == '|') {
cigarL.add(CigarOperator.EQ);
} else if (a.getMidChar() == ':') {
cigarL.add(CigarOperator.M);
} else if (a.getHitChar() == '-') {
cigarL.add(CigarOperator.I);
} else if (a.getQueryChar() == '-') {
cigarL.add(CigarOperator.D);
} else {
cigarL.add(CigarOperator.X);
}
}
if (cigarL.size() != hsp.getHspMidline().length()) {
throw new IllegalStateException("Boumm");
}
Cigar cigarE = new Cigar();
if (blastHspAlignment.getQueryFrom1() > 1) {
cigarE.add(new CigarElement(blastHspAlignment.getQueryFrom1() - 1, CigarOperator.S));
}
int x = 0;
while (x < cigarL.size()) {
int y = x + 1;
while (y < cigarL.size() && cigarL.get(x) == cigarL.get(y)) {
++y;
}
cigarE.add(new CigarElement(y - x, cigarL.get(x)));
x = y;
}
/* soft clip */
if (blastHspAlignment.getQueryTo1() < readContent.length()) {
cigarE.add(new CigarElement((readContent.length() - blastHspAlignment.getQueryTo1()), CigarOperator.S));
}
/* hard clip */
if (readContent.length() < iterLength) {
cigarE.add(new CigarElement((iterLength - readContent.length()), CigarOperator.H));
}
rec.setCigar(cigarE);
rec.setMappingQuality(40);
rec.setAlignmentStart(Math.min(blastHspAlignment.getHitFrom1(), blastHspAlignment.getHitTo1()));
rec.setAttribute("BB", Float.parseFloat(hsp.getHspBitScore()));
rec.setAttribute("BE", Float.parseFloat(hsp.getHspEvalue()));
rec.setAttribute("BS", Float.parseFloat(hsp.getHspScore()));
rec.setAttribute("NM", Integer.parseInt(hsp.getHspGaps()));
rec.setAttribute("RG", rg1.getId());
// setAlignmentEnd not supported in SAM API
// rec.setAlignmentEnd(Math.max(blastHspAlignment.getHitFrom1(),blastHspAlignment.getHitTo1()));
sequenceIteration.records.add(rec);
}
}
}
if (readContent.length() == 0) {
readContent.append('N');
}
byte[] readBases = readContent.toString().getBytes();
char[] readQuals = new char[readBases.length];
for (int i = 0; i < readBases.length; ++i) {
readQuals[i] = (readBases[i] == 'N' ? '#' : 'J');
}
if (sequenceIteration.records.isEmpty()) {
SAMRecord rec = samRecordFactory.createSAMRecord(header);
rec.setReadName(stack.get(0).getIterationQueryDef());
rec.setReadUnmappedFlag(true);
rec.setAttribute("RG", rg1.getId());
sequenceIteration.records.add(rec);
}
for (SAMRecord rec : sequenceIteration.records) {
rec.setReadString(new String(readBases));
rec.setReadBases(readBases);
rec.setBaseQualityString(new String(readQuals, 0, readQuals.length));
rec.setBaseQualities(htsjdk.samtools.SAMUtils.fastqToPhred(new String(readQuals, 0, readQuals.length)));
}
return sequenceIteration;
}
use of gov.nih.nlm.ncbi.blast.Hit in project jvarkit by lindenb.
the class BlastMapAnnotations method printGB.
private void printGB(GBSet gbSet) {
for (GBSeq gbSeq : gbSet.getGBSeq()) {
BlastOutputIterations iterations = this.blastOutput.getBlastOutputIterations();
for (Iteration iteration : iterations.getIteration()) {
for (GBFeature feature : gbSeq.getGBSeqFeatureTable().getGBFeature()) {
if (feature.getGBFeatureIntervals() == null)
continue;
if (!acceptfeature(feature.getGBFeatureKey()))
continue;
for (GBInterval interval : feature.getGBFeatureIntervals().getGBInterval()) {
for (Hit hit : iteration.getIterationHits().getHit()) {
for (Hsp hsp : hit.getHitHsps().getHsp()) {
GenbankInterval bi = new GenbankInterval();
bi.gbSeq = gbSeq;
bi.gbFeature = feature;
bi.gbInterval = interval;
bi.hit = hit;
bi.hsp = hsp;
LOG.debug("interval " + bi);
if (!bi.isGbForward())
LOG.info("CHECK INTERVAL REVERSE");
if (!bi.isFeatureOverlapHsp())
continue;
stdout().println(bi.toBedString());
}
}
}
}
break;
}
}
// System.err.println("OK");
}
Aggregations