use of htsjdk.samtools.Cigar in project gatk by broadinstitute.
the class CigarUtilsUnitTest method testContainsNOperator.
@Test(dataProvider = "testData_containsNOperator")
public void testContainsNOperator(final String cigarStrIn, final boolean expected) {
final Cigar cigarIn = TextCigarCodec.decode(cigarStrIn);
final boolean actual = CigarUtils.containsNOperator(cigarIn);
Assert.assertEquals(actual, expected, cigarStrIn);
}
use of htsjdk.samtools.Cigar in project gatk by broadinstitute.
the class CigarUtilsUnitTest method testReadHasNonClippedBases.
@Test(dataProvider = "testData_ReadHasNonClippedBases")
public void testReadHasNonClippedBases(final String cigarStrIn, final boolean expected) {
final Cigar cigarIn = TextCigarCodec.decode(cigarStrIn);
final boolean actual = CigarUtils.hasNonClippedBases(cigarIn);
Assert.assertEquals(actual, expected);
}
use of htsjdk.samtools.Cigar in project gatk-protected by broadinstitute.
the class ReadThreadingAssembler method findBestPaths.
private List<Haplotype> findBestPaths(final Collection<SeqGraph> graphs, final Haplotype refHaplotype, final SimpleInterval refLoc, final SimpleInterval activeRegionWindow, final Map<SeqGraph, AssemblyResult> assemblyResultByGraph, final AssemblyResultSet assemblyResultSet) {
// add the reference haplotype separately from all the others to ensure that it is present in the list of haplotypes
final Set<Haplotype> returnHaplotypes = new LinkedHashSet<>();
final int activeRegionStart = refHaplotype.getAlignmentStartHapwrtRef();
final Collection<KBestHaplotypeFinder> finders = new ArrayList<>(graphs.size());
int failedCigars = 0;
for (final SeqGraph graph : graphs) {
final SeqVertex source = graph.getReferenceSourceVertex();
final SeqVertex sink = graph.getReferenceSinkVertex();
Utils.validateArg(source != null && sink != null, () -> "Both source and sink cannot be null but got " + source + " and sink " + sink + " for graph " + graph);
final KBestHaplotypeFinder haplotypeFinder = new KBestHaplotypeFinder(graph, source, sink);
finders.add(haplotypeFinder);
final Iterator<KBestHaplotype> bestHaplotypes = haplotypeFinder.iterator(numBestHaplotypesPerGraph);
while (bestHaplotypes.hasNext()) {
final KBestHaplotype kBestHaplotype = bestHaplotypes.next();
final Haplotype h = kBestHaplotype.haplotype();
if (!returnHaplotypes.contains(h)) {
final Cigar cigar = CigarUtils.calculateCigar(refHaplotype.getBases(), h.getBases());
if (cigar == null) {
// couldn't produce a meaningful alignment of haplotype to reference, fail quietly
failedCigars++;
continue;
} else if (cigar.isEmpty()) {
throw new IllegalStateException("Smith-Waterman alignment failure. Cigar = " + cigar + " with reference length " + cigar.getReferenceLength() + " but expecting reference length of " + refHaplotype.getCigar().getReferenceLength());
} else if (pathIsTooDivergentFromReference(cigar) || cigar.getReferenceLength() < MIN_HAPLOTYPE_REFERENCE_LENGTH) {
// N cigar elements means that a bubble was too divergent from the reference so skip over this path
continue;
} else if (cigar.getReferenceLength() != refHaplotype.getCigar().getReferenceLength()) {
// SW failure
throw new IllegalStateException("Smith-Waterman alignment failure. Cigar = " + cigar + " with reference length " + cigar.getReferenceLength() + " but expecting reference length of " + refHaplotype.getCigar().getReferenceLength() + " ref = " + refHaplotype + " path " + new String(h.getBases()));
}
h.setCigar(cigar);
h.setAlignmentStartHapwrtRef(activeRegionStart);
h.setGenomeLocation(activeRegionWindow);
returnHaplotypes.add(h);
assemblyResultSet.add(h, assemblyResultByGraph.get(graph));
if (debug) {
logger.info("Adding haplotype " + h.getCigar() + " from graph with kmer " + graph.getKmerSize());
}
}
}
}
// the first returned by any finder.
if (!returnHaplotypes.contains(refHaplotype)) {
double refScore = Double.NaN;
for (final KBestHaplotypeFinder finder : finders) {
final double candidate = finder.score(refHaplotype);
if (Double.isNaN(candidate)) {
continue;
}
refScore = candidate;
break;
}
refHaplotype.setScore(refScore);
returnHaplotypes.add(refHaplotype);
}
if (failedCigars != 0) {
logger.debug(String.format("failed to align some haplotypes (%d) back to the reference (loc=%s); these will be ignored.", failedCigars, refLoc.toString()));
}
if (debug) {
if (returnHaplotypes.size() > 1) {
logger.info("Found " + returnHaplotypes.size() + " candidate haplotypes of " + returnHaplotypes.size() + " possible combinations to evaluate every read against.");
} else {
logger.info("Found only the reference haplotype in the assembly graph.");
}
for (final Haplotype h : returnHaplotypes) {
logger.info(h.toString());
logger.info("> Cigar = " + h.getCigar() + " : " + h.getCigar().getReferenceLength() + " score " + h.getScore() + " ref " + h.isReference());
}
}
return new ArrayList<>(returnHaplotypes);
}
use of htsjdk.samtools.Cigar in project gatk-protected by broadinstitute.
the class ReferenceConfidenceModel method createReferenceHaplotype.
/**
* Create a reference haplotype for an active region
*
* @param activeRegion the active region
* @param refBases the ref bases
* @param paddedReferenceLoc the location spanning of the refBases -- can be longer than activeRegion.getLocation()
* @return a reference haplotype
*/
public static Haplotype createReferenceHaplotype(final AssemblyRegion activeRegion, final byte[] refBases, final SimpleInterval paddedReferenceLoc) {
Utils.nonNull(activeRegion, "null region");
Utils.nonNull(refBases, "null refBases");
Utils.nonNull(paddedReferenceLoc, "null paddedReferenceLoc");
final int alignmentStart = activeRegion.getExtendedSpan().getStart() - paddedReferenceLoc.getStart();
if (alignmentStart < 0) {
throw new IllegalStateException("Bad alignment start in createReferenceHaplotype " + alignmentStart);
}
final Haplotype refHaplotype = new Haplotype(refBases, true);
refHaplotype.setAlignmentStartHapwrtRef(alignmentStart);
final Cigar c = new Cigar();
c.add(new CigarElement(refHaplotype.getBases().length, CigarOperator.M));
refHaplotype.setCigar(c);
return refHaplotype;
}
use of htsjdk.samtools.Cigar in project gatk by broadinstitute.
the class ReferenceConfidenceModel method createReferenceHaplotype.
/**
* Create a reference haplotype for an active region
*
* @param activeRegion the active region
* @param refBases the ref bases
* @param paddedReferenceLoc the location spanning of the refBases -- can be longer than activeRegion.getLocation()
* @return a reference haplotype
*/
public static Haplotype createReferenceHaplotype(final AssemblyRegion activeRegion, final byte[] refBases, final SimpleInterval paddedReferenceLoc) {
Utils.nonNull(activeRegion, "null region");
Utils.nonNull(refBases, "null refBases");
Utils.nonNull(paddedReferenceLoc, "null paddedReferenceLoc");
final int alignmentStart = activeRegion.getExtendedSpan().getStart() - paddedReferenceLoc.getStart();
if (alignmentStart < 0) {
throw new IllegalStateException("Bad alignment start in createReferenceHaplotype " + alignmentStart);
}
final Haplotype refHaplotype = new Haplotype(refBases, true);
refHaplotype.setAlignmentStartHapwrtRef(alignmentStart);
final Cigar c = new Cigar();
c.add(new CigarElement(refHaplotype.getBases().length, CigarOperator.M));
refHaplotype.setCigar(c);
return refHaplotype;
}
Aggregations