use of htsjdk.samtools.util.Log in project gridss by PapenfussLab.
the class SAMRecordUtil method calculateSATags.
private static void calculateSATags(List<SAMRecord> list) {
// TODO use CC, CP, HI, IH tags if they are present
// TODO break ties in an other other than just overwriting the previous
// alignment that
// finishes at a given position
SortedMap<Integer, SAMRecord> start = new TreeMap<Integer, SAMRecord>();
SortedMap<Integer, SAMRecord> end = new TreeMap<Integer, SAMRecord>();
for (SAMRecord r : list) {
start.put(getFirstAlignedBaseReadOffset(r), r);
end.put(getLastAlignedBaseReadOffset(r), r);
}
for (SAMRecord r : list) {
if (r.getAttribute(SAMTag.SA.name()) != null)
continue;
ArrayDeque<SAMRecord> alignment = new ArrayDeque<SAMRecord>();
SAMRecord prev = r;
while (prev != null) {
SortedMap<Integer, SAMRecord> before = end.headMap(getFirstAlignedBaseReadOffset(prev));
if (before.isEmpty()) {
prev = null;
} else {
prev = before.get(before.lastKey());
alignment.addFirst(prev);
}
}
SAMRecord next = r;
while (next != null) {
SortedMap<Integer, SAMRecord> after = start.tailMap(getLastAlignedBaseReadOffset(next) + 1);
if (after.isEmpty()) {
next = null;
} else {
next = after.get(after.firstKey());
alignment.addLast(next);
}
}
if (alignment.size() == 0) {
r.setAttribute(SAMTag.SA.name(), null);
} else {
List<SAMRecord> aln = new ArrayList<>(alignment);
Collections.sort(aln, new Ordering<SAMRecord>() {
@Override
public int compare(SAMRecord arg0, SAMRecord arg1) {
// element points to the primary line.
return Booleans.compare(arg0.getSupplementaryAlignmentFlag(), arg1.getSupplementaryAlignmentFlag());
}
});
StringBuilder sb = new StringBuilder();
for (SAMRecord chim : aln) {
sb.append(new ChimericAlignment(chim));
sb.append(";");
}
sb.deleteCharAt(sb.length() - 1);
r.setAttribute(SAMTag.SA.name(), sb.toString());
}
}
Set<ChimericAlignment> referencedReads = list.stream().flatMap(r -> ChimericAlignment.getChimericAlignments(r.getStringAttribute(SAMTag.SA.name())).stream()).collect(Collectors.toSet());
// validate SA tags
for (SAMRecord r : list) {
referencedReads.remove(new ChimericAlignment(r));
}
if (!referencedReads.isEmpty()) {
if (!MessageThrottler.Current.shouldSupress(log, "Missing records referred to by SA tags")) {
log.warn(String.format("SA tag of read %s refers to missing alignments %s", list.get(0).getReadName(), referencedReads));
}
}
}
use of htsjdk.samtools.util.Log in project gridss by PapenfussLab.
the class NonReferenceContigAssembler method callContig.
/**
* Calls the contig.
* Note: to call the contig, additional graph nodes may be required to be loaded. No loading checks
* (such as flushing excessively dense intervals) are performed on these additional loaded nodes.
* @param contig to call
* @return SAMRecord containing breakend assembly, null if a valid break-end assembly contig
* could not be created from this contig
*/
private SAMRecord callContig(ArrayDeque<KmerPathSubnode> rawcontig) {
if (rawcontig == null)
return null;
ArrayDeque<KmerPathSubnode> contig = rawcontig;
if (containsKmerRepeat(contig)) {
// recalculate the called contig, this may break the contig at the repeated kmer
MisassemblyFixer fixed = new MisassemblyFixer(contig);
contig = new ArrayDeque<KmerPathSubnode>(fixed.correctMisassignedEvidence(evidenceTracker.support(contig)));
}
if (contig.isEmpty())
return null;
int contigLength = contig.stream().mapToInt(sn -> sn.length()).sum();
int targetAnchorLength = Math.max(Math.min(contigLength, maxExpectedBreakendLength()), maxAnchorLength);
KmerPathNodePath startAnchorPath = new KmerPathNodePath(contig.getFirst(), false, targetAnchorLength + maxEvidenceSupportIntervalWidth + contig.getFirst().length());
startAnchorPath.greedyTraverse(true, false);
ArrayDeque<KmerPathSubnode> startingAnchor = startAnchorPath.headNode().asSubnodes();
startingAnchor.removeLast();
// make sure we have enough of the graph loaded so that when
// we traverse forward, our anchor sequence will be fully defined
advanceUnderlying(contig.getLast().lastEnd() + targetAnchorLength + minDistanceFromNextPositionForEvidenceToBeFullyLoaded());
KmerPathNodePath endAnchorPath = new KmerPathNodePath(contig.getLast(), true, targetAnchorLength + maxEvidenceSupportIntervalWidth + contig.getLast().length());
endAnchorPath.greedyTraverse(true, false);
ArrayDeque<KmerPathSubnode> endingAnchor = endAnchorPath.headNode().asSubnodes();
endingAnchor.removeFirst();
List<KmerPathSubnode> fullContig = new ArrayList<KmerPathSubnode>(contig.size() + startingAnchor.size() + endingAnchor.size());
fullContig.addAll(startingAnchor);
fullContig.addAll(contig);
fullContig.addAll(endingAnchor);
byte[] bases = KmerEncodingHelper.baseCalls(fullContig.stream().flatMap(sn -> sn.node().pathKmers().stream()).collect(Collectors.toList()), k);
byte[] quals = DeBruijnGraphBase.kmerWeightsToBaseQuals(k, fullContig.stream().flatMapToInt(sn -> sn.node().pathWeights().stream().mapToInt(Integer::intValue)).toArray());
assert (quals.length == bases.length);
// left aligned anchor position although it shouldn't matter since anchoring should be a single base wide
int startAnchorPosition = startingAnchor.size() == 0 ? 0 : startingAnchor.getLast().lastStart() + k - 1;
int endAnchorPosition = endingAnchor.size() == 0 ? 0 : endingAnchor.getFirst().firstStart();
int startAnchorBaseCount = startingAnchor.size() == 0 ? 0 : startingAnchor.stream().mapToInt(n -> n.length()).sum() + k - 1;
int endAnchorBaseCount = endingAnchor.size() == 0 ? 0 : endingAnchor.stream().mapToInt(n -> n.length()).sum() + k - 1;
int startBasesToTrim = Math.max(0, startAnchorBaseCount - targetAnchorLength);
int endingBasesToTrim = Math.max(0, endAnchorBaseCount - targetAnchorLength);
bases = Arrays.copyOfRange(bases, startBasesToTrim, bases.length - endingBasesToTrim);
quals = Arrays.copyOfRange(quals, startBasesToTrim, quals.length - endingBasesToTrim);
Set<KmerEvidence> evidence = evidenceTracker.untrack(contig);
List<DirectedEvidence> evidenceIds = evidence.stream().map(e -> e.evidence()).collect(Collectors.toList());
SAMRecord assembledContig;
if (startingAnchor.size() == 0 && endingAnchor.size() == 0) {
assert (startBasesToTrim == 0);
assert (endingBasesToTrim == 0);
// unanchored
if (evidence.size() > 0) {
BreakendSummary be = Models.calculateBreakend(aes.getContext().getLinear(), evidence.stream().map(e -> e.evidence().getBreakendSummary()).collect(Collectors.toList()), evidence.stream().map(e -> ScalingHelper.toScaledWeight(e.evidenceQuality())).collect(Collectors.toList()));
assembledContig = AssemblyFactory.createUnanchoredBreakend(aes.getContext(), aes, assemblyNameGenerator, be, evidenceIds, bases, quals);
if (evidence.stream().anyMatch(e -> e.isAnchored())) {
log.debug(String.format("Unanchored assembly %s at %s:%d contains anchored evidence", assembledContig.getReadName(), contigName, contig.getFirst().firstStart()));
}
} else {
// Can't call contig because we don't known the supporting breakend positions
assembledContig = null;
}
} else if (startingAnchor.size() == 0) {
// end anchored
assembledContig = AssemblyFactory.createAnchoredBreakend(aes.getContext(), aes, assemblyNameGenerator, BreakendDirection.Backward, evidenceIds, referenceIndex, endAnchorPosition, endAnchorBaseCount - endingBasesToTrim, bases, quals);
} else if (endingAnchor.size() == 0) {
// start anchored
assembledContig = AssemblyFactory.createAnchoredBreakend(aes.getContext(), aes, assemblyNameGenerator, BreakendDirection.Forward, evidenceIds, referenceIndex, startAnchorPosition, startAnchorBaseCount - startBasesToTrim, bases, quals);
} else {
if (startAnchorBaseCount + endAnchorBaseCount >= quals.length) {
// no unanchored bases - not an SV assembly
assembledContig = null;
} else {
assembledContig = AssemblyFactory.createAnchoredBreakpoint(aes.getContext(), aes, assemblyNameGenerator, evidenceIds, referenceIndex, startAnchorPosition, startAnchorBaseCount - startBasesToTrim, referenceIndex, endAnchorPosition, endAnchorBaseCount - endingBasesToTrim, bases, quals);
}
}
if (contigLength > maxExpectedBreakendLength()) {
log.debug(String.format("Called breakend contig %s at %s:%d-%d of length %d when maximum expected breakend contig length is %d", assembledContig == null ? "" : assembledContig.getReadName(), contigName, rawcontig.getFirst().firstStart(), rawcontig.getLast().lastEnd(), contigLength, maxExpectedBreakendLength()));
}
if (assembledContig != null) {
if (aes.getContext().getConfig().getVisualisation().assemblyGraph) {
try {
PositionalExporter.exportDot(new File(aes.getContext().getConfig().getVisualisation().directory, "assembly." + contigName + "." + assembledContig.getReadName() + ".dot"), k, graphByPosition, fullContig);
} catch (Exception ex) {
log.debug(ex, "Error exporting assembly ", assembledContig != null ? assembledContig.getReadName() : "(null)", " ", contigName);
}
}
if (aes.getContext().getConfig().getVisualisation().assemblyGraphFullSize) {
try {
PositionalExporter.exportNodeDot(new File(aes.getContext().getConfig().getVisualisation().directory, "assembly.fullsize." + contigName + "." + assembledContig.getReadName() + ".dot"), k, graphByPosition, fullContig);
} catch (Exception ex) {
log.debug(ex, "Error exporting assembly ", assembledContig != null ? assembledContig.getReadName() : "(null)", " ", contigName);
}
}
if (aes.getContext().getConfig().getVisualisation().assemblyContigMemoization) {
File file = new File(aes.getContext().getConfig().getVisualisation().directory, "assembly.path.memoization." + contigName + "." + Integer.toString(pathExportCount.incrementAndGet()) + ".csv");
try {
bestContigCaller.exportState(file);
} catch (IOException e) {
log.debug(e, " Unable to export assembly path memoization to ", file, " ", contigName);
}
}
}
stats.contigNodes = contig.size();
stats.truncatedNodes = rawcontig.size() - contig.size();
stats.contigStartPosition = contig.getFirst().firstStart();
stats.startAnchorNodes = startingAnchor.size();
stats.endAnchorNodes = endingAnchor.size();
if (exportTracker != null) {
exportTracker.trackAssembly(bestContigCaller);
}
// remove all evidence contributing to this assembly from the graph
if (evidence.size() > 0) {
removeFromGraph(evidence);
if (Defaults.SANITY_CHECK_MEMOIZATION) {
bestContigCaller.sanityCheck(graphByPosition);
}
} else {
if (!MessageThrottler.Current.shouldSupress(log, "unsupported paths")) {
log.error("Sanity check failure: found path with no support. Attempting to recover by direct node removal ", contigName);
}
for (KmerPathSubnode n : contig) {
removeFromGraph(n.node(), true);
}
}
contigsCalled++;
if (assembledContig != null) {
called.add(assembledContig);
}
return assembledContig;
}
Aggregations