use of au.edu.wehi.idsv.alignment.Alignment in project gridss by PapenfussLab.
the class SAMRecordUtil method calculateSATags.
private static void calculateSATags(List<SAMRecord> list) {
// TODO use CC, CP, HI, IH tags if they are present
// TODO break ties in an other other than just overwriting the previous
// alignment that
// finishes at a given position
SortedMap<Integer, SAMRecord> start = new TreeMap<Integer, SAMRecord>();
SortedMap<Integer, SAMRecord> end = new TreeMap<Integer, SAMRecord>();
for (SAMRecord r : list) {
start.put(getFirstAlignedBaseReadOffset(r), r);
end.put(getLastAlignedBaseReadOffset(r), r);
}
for (SAMRecord r : list) {
if (r.getAttribute(SAMTag.SA.name()) != null)
continue;
ArrayDeque<SAMRecord> alignment = new ArrayDeque<SAMRecord>();
SAMRecord prev = r;
while (prev != null) {
SortedMap<Integer, SAMRecord> before = end.headMap(getFirstAlignedBaseReadOffset(prev));
if (before.isEmpty()) {
prev = null;
} else {
prev = before.get(before.lastKey());
alignment.addFirst(prev);
}
}
SAMRecord next = r;
while (next != null) {
SortedMap<Integer, SAMRecord> after = start.tailMap(getLastAlignedBaseReadOffset(next) + 1);
if (after.isEmpty()) {
next = null;
} else {
next = after.get(after.firstKey());
alignment.addLast(next);
}
}
if (alignment.size() == 0) {
r.setAttribute(SAMTag.SA.name(), null);
} else {
List<SAMRecord> aln = new ArrayList<>(alignment);
Collections.sort(aln, new Ordering<SAMRecord>() {
@Override
public int compare(SAMRecord arg0, SAMRecord arg1) {
// element points to the primary line.
return Booleans.compare(arg0.getSupplementaryAlignmentFlag(), arg1.getSupplementaryAlignmentFlag());
}
});
StringBuilder sb = new StringBuilder();
for (SAMRecord chim : aln) {
sb.append(new ChimericAlignment(chim));
sb.append(";");
}
sb.deleteCharAt(sb.length() - 1);
r.setAttribute(SAMTag.SA.name(), sb.toString());
}
}
Set<ChimericAlignment> referencedReads = list.stream().flatMap(r -> ChimericAlignment.getChimericAlignments(r.getStringAttribute(SAMTag.SA.name())).stream()).collect(Collectors.toSet());
// validate SA tags
for (SAMRecord r : list) {
referencedReads.remove(new ChimericAlignment(r));
}
if (!referencedReads.isEmpty()) {
if (!MessageThrottler.Current.shouldSupress(log, "Missing records referred to by SA tags")) {
log.warn(String.format("SA tag of read %s refers to missing alignments %s", list.get(0).getReadName(), referencedReads));
}
}
}
use of au.edu.wehi.idsv.alignment.Alignment in project gridss by PapenfussLab.
the class SAMRecordUtil method realign.
/**
* Performs local realignment of the given SAMRecord in a window around the
* alignment location
*
* @param reference
* reference genome
* @param read
* read
* @param windowSize
* number of bases to extend window around alignment
* @param extendWindowForClippedBases
* extend window for soft clipped bases
* @return copy of realigned read if alignment changed, read if realignment
* did not change the alignment
*/
public static SAMRecord realign(ReferenceLookup reference, SAMRecord read, int windowSize, boolean extendWindowForClippedBases) {
if (read.getReadUnmappedFlag())
return read;
SAMSequenceRecord refSeq = reference.getSequenceDictionary().getSequence(read.getReferenceIndex());
// find reference bounds of read. Negative deletions mean we can't just
// use
// getAlignmentStart() and getAlignmentEnd()
int pos = read.getAlignmentStart();
int start = pos;
int end = pos;
for (CigarElement ce : read.getCigar().getCigarElements()) {
if (ce.getOperator().consumesReferenceBases()) {
pos += ce.getLength();
}
start = Math.min(start, pos);
end = Math.max(end, pos - 1);
}
// extend bounds
start -= windowSize;
end += windowSize;
if (extendWindowForClippedBases) {
start -= getStartSoftClipLength(read);
end += getEndSoftClipLength(read);
}
// don't overrun contig bounds
start = Math.max(1, start);
end = Math.min(refSeq.getSequenceLength(), end);
byte[] ass = read.getReadBases();
byte[] ref = reference.getSubsequenceAt(refSeq.getSequenceName(), start, end).getBases();
if (ass == null || ref == null || ass.length == 0 || ref.length == 0) {
return read;
}
// is encountered
for (int i = 0; i < ass.length; i++) {
if (!htsjdk.samtools.util.SequenceUtil.isValidBase(ass[i])) {
ass[i] = 'N';
}
}
for (int i = 0; i < ref.length; i++) {
if (!htsjdk.samtools.util.SequenceUtil.isValidBase(ref[i])) {
ref[i] = 'N';
}
}
Alignment alignment = null;
if (ass != null && ass.length > 0 && ref != null && ref.length > 0) {
try {
alignment = AlignerFactory.create().align_smith_waterman(ass, ref);
} catch (Exception e) {
// swallow and log alignment error
if (!MessageThrottler.Current.shouldSupress(log, "local realignment failures")) {
log.error(e, String.format("Error aligning %s to %s:%d-%d", read.getReadName(), refSeq.getSequenceName(), start, end));
}
}
}
if (alignment == null) {
return read;
}
Cigar cigar = TextCigarCodec.decode(alignment.getCigar());
int alignmentStart = start + alignment.getStartPosition();
if (alignmentStart == read.getAlignmentStart() && cigar.equals(read.getCigar())) {
return read;
}
SAMRecord copy = SAMRecordUtil.clone(read);
if (!cigar.equals(read.getCigar())) {
copy.setCigar(cigar);
copy.setAttribute(SAMTag.OC.name(), read.getCigarString());
}
if (alignmentStart != read.getAlignmentStart()) {
copy.setAlignmentStart(alignmentStart);
copy.setAttribute(SAMTag.OP.name(), read.getAlignmentStart());
}
return copy;
}
Aggregations