use of au.edu.wehi.idsv.configuration.AssemblyConfiguration in project gridss by PapenfussLab.
the class AssemblyEvidenceSource method shouldFilterAssembly.
public boolean shouldFilterAssembly(SAMRecord asm) {
AssemblyConfiguration ap = getContext().getAssemblyParameters();
AssemblyAttributes attr = new AssemblyAttributes(asm);
// reference assembly
List<SingleReadEvidence> breakends = SingleReadEvidence.createEvidence(this, 0, asm);
if (breakends.size() == 0) {
return true;
}
// too long
int breakendLength = SAMRecordUtil.getSoftClipLength(asm, attr.getAssemblyDirection());
if (breakendLength > ap.maxExpectedBreakendLengthMultiple * getMaxConcordantFragmentSize()) {
log.debug(String.format("Filtering %s at %s:%d due to misassembly (breakend %dbp)", asm.getReadName(), asm.getReferenceName(), asm.getAlignmentStart(), breakendLength));
return true;
}
// too few reads
if (attr.getAssemblySupportCount() < ap.minReads) {
return true;
}
// unanchored assembly that not actually any longer than any of the reads that were assembled together
if (attr.getAssemblySupportCountSoftClip() == 0 && breakendLength <= attr.getAssemblyReadPairLengthMax()) {
// at worst, we've created a misassembly.
return true;
}
return false;
}
use of au.edu.wehi.idsv.configuration.AssemblyConfiguration in project gridss by PapenfussLab.
the class AssemblyEvidenceSource method throttled.
private Iterator<DirectedEvidence> throttled(Iterator<DirectedEvidence> it) {
AssemblyConfiguration ap = getContext().getAssemblyParameters();
DirectedEvidenceDensityThrottlingIterator dit = new DirectedEvidenceDensityThrottlingIterator(throttled, getContext().getDictionary(), getContext().getLinear(), it, Math.max(ap.downsampling.minimumDensityWindowSize, getMaxConcordantFragmentSize()), ap.downsampling.acceptDensityPortion * ap.downsampling.targetEvidenceDensity, ap.downsampling.targetEvidenceDensity);
getContext().registerBuffer(AssemblyEvidenceSource.class.getName() + ".throttle", dit);
return dit;
}
use of au.edu.wehi.idsv.configuration.AssemblyConfiguration in project gridss by PapenfussLab.
the class PositionalAssembler method createAssembler.
private NonReferenceContigAssembler createAssembler() {
AssemblyConfiguration ap = context.getAssemblyParameters();
int maxKmerSupportIntervalWidth = source.getMaxConcordantFragmentSize() - source.getMinConcordantFragmentSize() + 1;
int maxReadLength = source.getMaxReadLength();
int k = ap.k;
int maxEvidenceSupportIntervalWidth = maxKmerSupportIntervalWidth + maxReadLength - k + 2;
int maxPathLength = ap.positional.maxPathLengthInBases(maxReadLength);
int maxPathCollapseLength = ap.errorCorrection.maxPathCollapseLengthInBases(maxReadLength);
int anchorAssemblyLength = ap.anchorLength;
int referenceIndex = it.peek().getBreakendSummary().referenceIndex;
int firstPosition = it.peek().getBreakendSummary().start;
currentContig = context.getDictionary().getSequence(referenceIndex).getSequenceName();
ReferenceIndexIterator evidenceIt = new ReferenceIndexIterator(it, referenceIndex);
EvidenceTracker evidenceTracker = new EvidenceTracker();
SupportNodeIterator supportIt = new SupportNodeIterator(k, evidenceIt, source.getMaxConcordantFragmentSize(), evidenceTracker, ap.includePairAnchors, ap.pairAnchorMismatchIgnoreEndBases);
AggregateNodeIterator agIt = new AggregateNodeIterator(supportIt);
Iterator<KmerNode> knIt = agIt;
if (Defaults.SANITY_CHECK_DE_BRUIJN) {
knIt = evidenceTracker.new AggregateNodeAssertionInterceptor(knIt);
}
PathNodeIterator pathNodeIt = new PathNodeIterator(knIt, maxPathLength, k);
Iterator<KmerPathNode> pnIt = pathNodeIt;
if (Defaults.SANITY_CHECK_DE_BRUIJN) {
pnIt = evidenceTracker.new PathNodeAssertionInterceptor(pnIt, "PathNodeIterator");
}
CollapseIterator collapseIt = null;
PathSimplificationIterator simplifyIt = null;
if (ap.errorCorrection.maxBaseMismatchForCollapse > 0) {
if (!ap.errorCorrection.collapseBubblesOnly) {
log.warn("Collapsing all paths is an exponential time operation. Gridss is likely to hang if your genome contains repetative sequence");
collapseIt = new PathCollapseIterator(pnIt, k, maxPathCollapseLength, ap.errorCorrection.maxBaseMismatchForCollapse, false, 0);
} else {
collapseIt = new LeafBubbleCollapseIterator(pnIt, k, maxPathCollapseLength, ap.errorCorrection.maxBaseMismatchForCollapse);
}
pnIt = collapseIt;
if (Defaults.SANITY_CHECK_DE_BRUIJN) {
pnIt = evidenceTracker.new PathNodeAssertionInterceptor(pnIt, "PathCollapseIterator");
}
simplifyIt = new PathSimplificationIterator(pnIt, maxPathLength, maxKmerSupportIntervalWidth);
pnIt = simplifyIt;
if (Defaults.SANITY_CHECK_DE_BRUIJN) {
pnIt = evidenceTracker.new PathNodeAssertionInterceptor(pnIt, "PathSimplificationIterator");
}
}
currentAssembler = new NonReferenceContigAssembler(pnIt, referenceIndex, maxEvidenceSupportIntervalWidth, anchorAssemblyLength, k, source, assemblyNameGenerator, evidenceTracker, currentContig);
VisualisationConfiguration vis = context.getConfig().getVisualisation();
if (vis.assemblyProgress) {
String filename = String.format("positional-%s_%d-%s.csv", context.getDictionary().getSequence(referenceIndex).getSequenceName(), firstPosition, direction);
File file = new File(vis.directory, filename);
PositionalDeBruijnGraphTracker exportTracker;
try {
exportTracker = new PositionalDeBruijnGraphTracker(file, supportIt, agIt, pathNodeIt, collapseIt, simplifyIt, evidenceTracker, currentAssembler);
exportTracker.writeHeader();
currentAssembler.setExportTracker(exportTracker);
} catch (IOException e) {
log.debug(e);
}
}
currentAssembler.setTelemetry(getTelemetry());
return currentAssembler;
}
Aggregations