use of htsjdk.samtools.SAMRecordFactory in project jvarkit by lindenb.
the class BWAMemNOp method doWork.
@Override
public int doWork(List<String> args) {
SamReader r = null;
SAMFileWriter w = null;
try {
r = super.openSamReader(oneFileOrNull(args));
SAMFileHeader header = r.getFileHeader();
OtherCanonicalAlignFactory ocaf = new OtherCanonicalAlignFactory(header);
w = writingBamArgs.openSAMFileWriter(outputFile, header, true);
SAMRecordFactory samRecordFactory = new DefaultSAMRecordFactory();
SAMRecordIterator iter = r.iterator();
while (iter.hasNext()) {
SAMRecord rec = iter.next();
if (rec.getSupplementaryAlignmentFlag()) {
continue;
}
if (rec.getReadUnmappedFlag()) {
if (!print_only_spit_read)
w.addAlignment(rec);
continue;
}
Cigar cigar1 = rec.getCigar();
if (cigar1 == null || cigar1.isEmpty() || !(cigar1.getCigarElement(cigar1.numCigarElements() - 1).getOperator().equals(CigarOperator.S) || cigar1.getCigarElement(0).getOperator().equals(CigarOperator.S))) // last or first is soft clipping
{
if (!print_only_spit_read)
w.addAlignment(rec);
continue;
}
rec.getAlignmentStart();
List<OtherCanonicalAlign> xps = ocaf.getXPAligns(rec);
if (xps.isEmpty()) {
if (!print_only_spit_read)
w.addAlignment(rec);
continue;
}
boolean found_one = false;
for (OtherCanonicalAlign xp : xps) {
if (!rec.getReferenceName().equals(xp.getReferenceName()))
continue;
if (xp.getReadNegativeStrandFlag() != rec.getReadNegativeStrandFlag())
continue;
Cigar cigar2 = xp.getCigar();
if (cigar2 == null || cigar2.isEmpty()) {
continue;
}
SAMRecord newrec = null;
List<CigarEvt> L1 = null;
List<CigarEvt> L2 = null;
if (cigar1.getCigarElement(cigar1.numCigarElements() - 1).getOperator().equals(CigarOperator.S) && cigar1.getCigarElement(cigar1.numCigarElements() - 1).getLength() >= this.min_soft_clip_length && cigar2.getCigarElement(0).getOperator().equals(CigarOperator.S) && cigar2.getCigarElement(0).getLength() >= this.min_soft_clip_length && rec.getAlignmentEnd() < xp.getAlignmentStart()) {
newrec = samRecordFactory.createSAMRecord(header);
int ref1 = rec.getAlignmentStart();
newrec.setAlignmentStart(ref1);
L1 = cigarEvents(0, ref1, cigar1);
L2 = cigarEvents(0, xp.getAlignmentStart(), cigar2);
} else if (cigar2.getCigarElement(cigar2.numCigarElements() - 1).getOperator().equals(CigarOperator.S) && cigar2.getCigarElement(cigar2.numCigarElements() - 1).getLength() >= this.min_soft_clip_length && cigar1.getCigarElement(0).getOperator().equals(CigarOperator.S) && cigar1.getCigarElement(0).getLength() >= this.min_soft_clip_length && xp.getAlignmentEnd() < rec.getAlignmentStart()) {
newrec = samRecordFactory.createSAMRecord(header);
int ref1 = xp.getAlignmentStart();
newrec.setAlignmentStart(ref1);
L1 = cigarEvents(0, ref1, cigar2);
L2 = cigarEvents(0, rec.getAlignmentStart(), cigar1);
}
if (newrec == null)
continue;
newrec.setFlags(rec.getFlags());
newrec.setReadName(rec.getReadName());
newrec.setReadBases(rec.getReadBases());
newrec.setMappingQuality(rec.getMappingQuality());
newrec.setReferenceIndex(rec.getReferenceIndex());
newrec.setBaseQualities(rec.getBaseQualities());
if (found_one) {
newrec.setNotPrimaryAlignmentFlag(true);
}
found_one = true;
for (SAMTagAndValue tav : rec.getAttributes()) {
if (tav.tag.equals(ocaf.getAttributeKey()))
continue;
if (tav.tag.equals("NM"))
continue;
newrec.setAttribute(tav.tag, tav.value);
}
if (rec.getReadPairedFlag() && !rec.getMateUnmappedFlag()) {
newrec.setMateAlignmentStart(rec.getMateAlignmentStart());
newrec.setMateReferenceIndex(rec.getMateReferenceIndex());
newrec.setInferredInsertSize(rec.getInferredInsertSize());
}
while (!L1.isEmpty() && (L1.get(L1.size() - 1).op.equals(CigarOperator.S) || L1.get(L1.size() - 1).op.equals(CigarOperator.D) || L1.get(L1.size() - 1).op.equals(CigarOperator.H))) {
L1.remove(L1.size() - 1);
}
while (!L2.isEmpty() && L2.get(0).read0 <= L1.get(L1.size() - 1).read0) {
L2.remove(0);
}
List<CigarElement> cigarElements = new ArrayList<CigarElement>();
int i = 0;
while (i < L1.size()) {
int j = i + 1;
while (j < L1.size() && L1.get(i).op.equals(L1.get(j).op)) {
j++;
}
cigarElements.add(new CigarElement(j - i, L1.get(i).op));
i = j;
}
// add 'N'
cigarElements.add(new CigarElement((L2.get(0).ref1 - L1.get(L1.size() - 1).ref1) - 1, CigarOperator.N));
i = 0;
while (i < L2.size()) {
int j = i + 1;
while (j < L2.size() && L2.get(i).op.equals(L2.get(j).op)) {
j++;
}
cigarElements.add(new CigarElement(j - i, L2.get(i).op));
i = j;
}
// cleanup : case where 'S' is close to 'N'
i = 0;
while (i + 1 < cigarElements.size()) {
CigarElement ce1 = cigarElements.get(i);
CigarElement ce2 = cigarElements.get(i + 1);
if (i > 0 && ce1.getOperator().equals(CigarOperator.S) && ce2.getOperator().equals(CigarOperator.N)) {
cigarElements.set(i, new CigarElement(ce1.getLength(), CigarOperator.X));
} else if (i + 2 < cigarElements.size() && ce1.getOperator().equals(CigarOperator.N) && ce2.getOperator().equals(CigarOperator.S)) {
cigarElements.set(i + 1, new CigarElement(ce2.getLength(), CigarOperator.X));
}
i++;
}
newrec.setCigar(new Cigar(cigarElements));
List<SAMValidationError> validations = newrec.isValid();
if (validations != null) {
for (SAMValidationError err : validations) {
LOG.warning(err.getType() + ":" + err.getMessage());
}
}
w.addAlignment(newrec);
}
if (!found_one) {
if (!print_only_spit_read)
w.addAlignment(rec);
}
}
iter.close();
return 0;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(r);
CloserUtil.close(w);
}
}
use of htsjdk.samtools.SAMRecordFactory in project jvarkit by lindenb.
the class BlastToSam method convertIterationToSequenceIteration.
private SequenceIteration convertIterationToSequenceIteration(final List<Iteration> stack, final SAMFileHeader header) throws XMLStreamException, JAXBException {
final SequenceIteration sequenceIteration = new SequenceIteration();
if (stack.isEmpty())
return sequenceIteration;
final SAMReadGroupRecord rg1 = header.getReadGroup("g1");
// sequenceIteration.iteration=iter1;
final SAMRecordFactory samRecordFactory = new DefaultSAMRecordFactory();
final StringBuilder readContent = new StringBuilder();
final int iterLength = Integer.parseInt(stack.get(0).getIterationQueryLen());
for (final Iteration iter1 : stack) {
for (final Hit hit : iter1.getIterationHits().getHit()) {
for (final Hsp hsp : hit.getHitHsps().getHsp()) {
for (final BlastHspAlignment.Align a : new BlastHspAlignment(hsp)) {
char c = a.getQueryChar();
if (!Character.isLetter(c))
continue;
final int queryIndex0 = a.getQueryIndex1() - 1;
while (readContent.length() <= queryIndex0) readContent.append('N');
if (readContent.charAt(queryIndex0) == 'N') {
readContent.setCharAt(queryIndex0, c);
} else if (readContent.charAt(queryIndex0) != c) {
throw new IllegalStateException("Expected character '" + readContent.charAt(queryIndex0) + "' but got '" + c + "' at " + queryIndex0 + "\n" + hsp.getHspQseq() + "\n" + hsp.getHspMidline() + "\n" + hsp.getHspHseq() + "\n" + readContent + "\n");
}
}
}
}
}
for (Iteration iter1 : stack) {
for (Hit hit : iter1.getIterationHits().getHit()) {
for (Hsp hsp : hit.getHitHsps().getHsp()) {
SAMRecord rec = samRecordFactory.createSAMRecord(header);
rec.setReadUnmappedFlag(false);
rec.setReadName(iter1.getIterationQueryDef());
if (hit.getHitAccession() != null && !hit.getHitAccession().trim().isEmpty() && this.dictionary.getSequence(hit.getHitAccession()) != null) {
rec.setReferenceName(hit.getHitAccession());
} else {
rec.setReferenceName(hit.getHitDef());
}
final SAMSequenceRecord ssr = this.dictionary.getSequence(hit.getHitDef());
if (ssr == null) {
LOG.warn("Hit is not in SAMDictionary " + hit.getHitDef());
rec.setReferenceIndex(-1);
} else {
rec.setReferenceIndex(ssr.getSequenceIndex());
}
final BlastHspAlignment blastHspAlignment = new BlastHspAlignment(hsp);
rec.setReadNegativeStrandFlag(blastHspAlignment.isPlusMinus());
final List<CigarOperator> cigarL = new ArrayList<CigarOperator>();
for (BlastHspAlignment.Align a : blastHspAlignment) {
// System.err.println("##"+a);
if (a.getMidChar() == '|') {
cigarL.add(CigarOperator.EQ);
} else if (a.getMidChar() == ':') {
cigarL.add(CigarOperator.M);
} else if (a.getHitChar() == '-') {
cigarL.add(CigarOperator.I);
} else if (a.getQueryChar() == '-') {
cigarL.add(CigarOperator.D);
} else {
cigarL.add(CigarOperator.X);
}
}
if (cigarL.size() != hsp.getHspMidline().length()) {
throw new IllegalStateException("Boumm");
}
Cigar cigarE = new Cigar();
if (blastHspAlignment.getQueryFrom1() > 1) {
cigarE.add(new CigarElement(blastHspAlignment.getQueryFrom1() - 1, CigarOperator.S));
}
int x = 0;
while (x < cigarL.size()) {
int y = x + 1;
while (y < cigarL.size() && cigarL.get(x) == cigarL.get(y)) {
++y;
}
cigarE.add(new CigarElement(y - x, cigarL.get(x)));
x = y;
}
/* soft clip */
if (blastHspAlignment.getQueryTo1() < readContent.length()) {
cigarE.add(new CigarElement((readContent.length() - blastHspAlignment.getQueryTo1()), CigarOperator.S));
}
/* hard clip */
if (readContent.length() < iterLength) {
cigarE.add(new CigarElement((iterLength - readContent.length()), CigarOperator.H));
}
rec.setCigar(cigarE);
rec.setMappingQuality(40);
rec.setAlignmentStart(Math.min(blastHspAlignment.getHitFrom1(), blastHspAlignment.getHitTo1()));
rec.setAttribute("BB", Float.parseFloat(hsp.getHspBitScore()));
rec.setAttribute("BE", Float.parseFloat(hsp.getHspEvalue()));
rec.setAttribute("BS", Float.parseFloat(hsp.getHspScore()));
rec.setAttribute("NM", Integer.parseInt(hsp.getHspGaps()));
rec.setAttribute("RG", rg1.getId());
// setAlignmentEnd not supported in SAM API
// rec.setAlignmentEnd(Math.max(blastHspAlignment.getHitFrom1(),blastHspAlignment.getHitTo1()));
sequenceIteration.records.add(rec);
}
}
}
if (readContent.length() == 0) {
readContent.append('N');
}
byte[] readBases = readContent.toString().getBytes();
char[] readQuals = new char[readBases.length];
for (int i = 0; i < readBases.length; ++i) {
readQuals[i] = (readBases[i] == 'N' ? '#' : 'J');
}
if (sequenceIteration.records.isEmpty()) {
SAMRecord rec = samRecordFactory.createSAMRecord(header);
rec.setReadName(stack.get(0).getIterationQueryDef());
rec.setReadUnmappedFlag(true);
rec.setAttribute("RG", rg1.getId());
sequenceIteration.records.add(rec);
}
for (SAMRecord rec : sequenceIteration.records) {
rec.setReadString(new String(readBases));
rec.setReadBases(readBases);
rec.setBaseQualityString(new String(readQuals, 0, readQuals.length));
rec.setBaseQualities(htsjdk.samtools.SAMUtils.fastqToPhred(new String(readQuals, 0, readQuals.length)));
}
return sequenceIteration;
}
Aggregations