use of com.milaboratory.core.sequence.NucleotideSequence in project mixcr by milaboratory.
the class MergerTest method mAssert.
public static void mAssert(String originalSeq, String seq1, String seq2, String expectedSequence, String expectedQuality) {
NucleotideSequence originalSequence = new NucleotideSequence(originalSeq);
NSequenceWithQuality target1 = new NSequenceWithQuality(seq1, lets('A', seq1.length()));
NSequenceWithQuality target2 = new NSequenceWithQuality(seq2, lets('B', seq2.length()));
LinearGapAlignmentScoring<NucleotideSequence> scoring = new LinearGapAlignmentScoring<>(NucleotideSequence.ALPHABET, 4, -5, -9);
Alignment<NucleotideSequence> alignment1 = Aligner.alignLocalLinear(scoring, originalSequence, target1.getSequence());
Alignment<NucleotideSequence> alignment2 = Aligner.alignLocalLinear(scoring, originalSequence, target2.getSequence());
NSequenceWithQuality result = Merger.merge(new Range(0, originalSequence.size()), new Alignment[] { alignment1, alignment2 }, new NSequenceWithQuality[] { target1, target2 });
Assert.assertEquals(expectedSequence, result.getSequence().toString());
Assert.assertEquals(expectedQuality, result.getQuality().toString());
}
use of com.milaboratory.core.sequence.NucleotideSequence in project mixcr by milaboratory.
the class PartialAlignmentsAssemblerAlignerTest method basicTest1.
@Test
public void basicTest1() throws Exception {
Well44497b rg = new Well44497b(12312);
VDJCAlignerParameters rnaSeqParams = VDJCParametersPresets.getByName("rna-seq");
PartialAlignmentsAssemblerAligner aligner = new PartialAlignmentsAssemblerAligner(rnaSeqParams);
VDJCLibrary lib = VDJCLibraryRegistry.getDefault().getLibrary("default", "hs");
for (VDJCGene gene : VDJCLibraryRegistry.getDefault().getLibrary("default", "hs").getGenes()) if (gene.isFunctional())
aligner.addGene(gene);
TargetBuilder.VDJCGenes genes = new TargetBuilder.VDJCGenes(lib, "TRBV12-3*00", "TRBD1*00", "TRBJ1-3*00", "TRBC2*00");
// | 305
// 250V + 55CDR3 (20V 7N 10D 3N 15J) + 28J + 100C
NucleotideSequence baseSeq = TargetBuilder.generateSequence(genes, "{CDR3Begin(-250)}V*270 NNNNNNN {DBegin(0)}D*10 NNN {CDR3End(-15):FR4End} {CBegin}C*100", rg);
int len = 70;
NucleotideSequence seq1 = baseSeq.getRange(0, len);
NucleotideSequence seq2 = baseSeq.getRange(245, 245 + len);
NucleotideSequence seq3 = baseSeq.getRange(320, 320 + len);
VDJCAlignmentResult<VDJCMultiRead> alignment = aligner.process(MiXCRTestUtils.createMultiRead(seq1, seq2, seq3));
VDJCAlignments al = alignment.alignment;
Assert.assertNotNull(al);
assertInHits(genes.v, al);
assertInHits(genes.d, al);
assertInHits(genes.j, al);
assertInHits(genes.c, al);
VDJCHit bestV = al.getBestHit(GeneType.Variable);
VDJCHit bestD = al.getBestHit(GeneType.Diversity);
VDJCHit bestJ = al.getBestHit(GeneType.Joining);
VDJCHit bestC = al.getBestHit(GeneType.Constant);
Assert.assertNotNull(bestV.getAlignment(0));
Assert.assertNotNull(bestV.getAlignment(1));
Assert.assertNull(bestV.getAlignment(2));
Assert.assertNull(bestD.getAlignment(0));
Assert.assertNotNull(bestD.getAlignment(1));
Assert.assertNull(bestD.getAlignment(2));
Assert.assertNull(bestJ.getAlignment(0));
Assert.assertNotNull(bestJ.getAlignment(1));
Assert.assertNotNull(bestJ.getAlignment(2));
Assert.assertNull(bestC.getAlignment(0));
Assert.assertNull(bestC.getAlignment(1));
Assert.assertNotNull(bestC.getAlignment(2));
}
use of com.milaboratory.core.sequence.NucleotideSequence in project mixcr by milaboratory.
the class VDJCAlignerPVFirst method alignVThenJ.
@SuppressWarnings("unchecked")
PAlignmentHelper alignVThenJ(final Target target) {
/*
* Step 1: alignment of V gene
*/
List<AlignmentHit<NucleotideSequence, VDJCGene>> vAl1 = vAligner.align(target.targets[0].getSequence()).getHits(), vAl2 = vAligner.align(target.targets[1].getSequence()).getHits();
/*
* Step 1.5: eliminating conflicting alignments in favor of alignments covering CDR3 edge
*/
boolean vChimera = checkAndEliminateChimera(vAl1, vAl2, GeneType.Variable);
PairedHit[] vHits = extractDoubleHits(vAl1, vAl2);
for (PairedHit vHit : vHits) {
if (vHit.hit1 == null) {
AlignmentHit<NucleotideSequence, VDJCGene> leftHit = vHit.hit0;
// Checking whether alignment touches right read edge (taking to account tolerance)
if (leftHit.getAlignment().getSequence2Range().getTo() < target.targets[0].size() - parameters.getAlignmentBoundaryTolerance())
continue;
final AlignmentScoring<NucleotideSequence> scoring = parameters.getVAlignerParameters().getParameters().getScoring();
if (scoring instanceof AffineGapAlignmentScoring)
// TODO IMPLEMENT
continue;
final NucleotideSequence sequence2 = target.targets[1].getSequence();
final NucleotideSequence sequence1 = leftHit.getAlignment().getSequence1();
final int beginFR3 = leftHit.getRecordPayload().getPartitioning().getRelativePosition(parameters.getFeatureToAlign(GeneType.Variable), ReferencePoint.FR3Begin);
if (beginFR3 == -1)
continue;
final Alignment alignment = AlignerCustom.alignLinearSemiLocalLeft0((LinearGapAlignmentScoring) scoring, sequence1, sequence2, beginFR3, sequence1.size() - beginFR3, 0, sequence2.size(), false, true, NucleotideSequence.ALPHABET, linearMatrixCache.get());
if (alignment.getScore() < getAbsoluteMinScore(parameters.getVAlignerParameters().getParameters()))
continue;
vHit.set(1, new AlignmentHitImpl<NucleotideSequence, VDJCGene>(alignment, leftHit.getRecordPayload()));
vHit.calculateScore();
}
}
vHits = sortAndFilterHits(vHits, parameters.getVAlignerParameters());
/*
* Step 3: alignment of J gene
*/
List<AlignmentHit<NucleotideSequence, VDJCGene>> jAl1 = performJAlignment(target, vHits, 0), jAl2 = performJAlignment(target, vHits, 1);
/*
* Step 3.5: eliminating conflicting alignments in favor of alignments covering CDR3 edge
*/
boolean jChimera = checkAndEliminateChimera(jAl1, jAl2, GeneType.Joining);
PairedHit[] jHits = extractDoubleHits(jAl1, jAl2);
for (PairedHit jHit : jHits) {
if (jHit.hit0 == null) {
AlignmentHit<NucleotideSequence, VDJCGene> rightHit = jHit.hit1;
// Checking whether alignment touches left read edge (taking to account tolerance)
if (rightHit.getAlignment().getSequence2Range().getFrom() > parameters.getAlignmentBoundaryTolerance())
continue;
final AlignmentScoring<NucleotideSequence> scoring = parameters.getJAlignerParameters().getParameters().getScoring();
if (scoring instanceof AffineGapAlignmentScoring)
// TODO IMPLEMENT
continue;
final NucleotideSequence sequence2 = target.targets[0].getSequence();
final NucleotideSequence sequence1 = rightHit.getAlignment().getSequence1();
int begin = 0;
if (vHits.length != 0 && vHits[0].hit0 != null) {
begin = vHits[0].hit0.getAlignment().getSequence2Range().getTo() - parameters.getVJOverlapWindow();
if (begin < 0)
begin = 0;
}
final Alignment alignment = AlignerCustom.alignLinearSemiLocalRight0((LinearGapAlignmentScoring) scoring, sequence1, sequence2, 0, sequence1.size(), begin, sequence2.size() - begin, false, true, NucleotideSequence.ALPHABET, linearMatrixCache.get());
if (alignment.getScore() < getAbsoluteMinScore(parameters.getJAlignerParameters().getParameters()))
continue;
jHit.set(0, new AlignmentHitImpl<NucleotideSequence, VDJCGene>(alignment, rightHit.getRecordPayload()));
jHit.calculateScore();
}
}
jHits = sortAndFilterHits(jHits, parameters.getJAlignerParameters());
// Check if parameters allow chimeras
if (!parameters.isAllowChimeras()) {
// Calculate common chains
Chains commonChains = getVJCommonChains(vHits, jHits);
if (!commonChains.isEmpty()) {
// Filtering V genes
int filteredSize = 0;
for (PairedHit hit : vHits) if (hit.getGene().getChains().intersects(commonChains))
++filteredSize;
// Perform filtering (new array allocation) only if needed
if (vHits.length != filteredSize) {
PairedHit[] newHits = new PairedHit[filteredSize];
// Used as pointer
filteredSize = 0;
for (PairedHit hit : vHits) if (hit.getGene().getChains().intersects(commonChains))
newHits[filteredSize++] = hit;
assert newHits.length == filteredSize;
vHits = newHits;
}
// Filtering J genes
filteredSize = 0;
for (PairedHit hit : jHits) if (hit.getGene().getChains().intersects(commonChains))
++filteredSize;
// Perform filtering (new array allocation) only if needed
if (jHits.length != filteredSize) {
PairedHit[] newHits = new PairedHit[filteredSize];
// Used as pointer
filteredSize = 0;
for (PairedHit hit : jHits) if (hit.getGene().getChains().intersects(commonChains))
newHits[filteredSize++] = hit;
assert newHits.length == filteredSize;
jHits = newHits;
}
}
}
return new PAlignmentHelper(target, vHits, jHits, vChimera, jChimera);
}
use of com.milaboratory.core.sequence.NucleotideSequence in project mixcr by milaboratory.
the class PartialAlignmentsAssembler method searchOverlaps.
@SuppressWarnings("unchecked")
private OverlapSearchResult searchOverlaps(final VDJCAlignments rightAl, final boolean allowChimeras) {
final Chains jChains = rightAl.getAllChains(GeneType.Joining);
int rightTargetId = getRightPartitionedSequence(rightAl);
if (rightTargetId == -1)
return null;
rightParts.incrementAndGet();
final VDJCPartitionedSequence rightTarget = rightAl.getPartitionedTarget(rightTargetId);
NSequenceWithQuality rightSeqQ = rightTarget.getSequence();
NucleotideSequence rightSeq = rightSeqQ.getSequence();
int stop = rightTarget.getPartitioning().getPosition(ReferencePoint.JBeginTrimmed);
if (stop == -1)
stop = rightTarget.getSequence().size();
else
stop -= kOffset;
// black list of left parts failed due to inconsistent overlapped alignments (failed AMerge)
TLongHashSet blackList = new TLongHashSet();
SEARCH_LEFT_PARTS: while (true) {
int maxOverlap = -1, maxDelta = -1, maxOverlapIndexInList = -1, maxBegin = -1, maxEnd = -1;
List<KMerInfo> maxOverlapList = null;
boolean isMaxOverOverlapped = false;
for (int rFrom = 0; rFrom < stop && rFrom + kValue < rightSeqQ.size(); rFrom++) {
long kMer = kMer(rightSeqQ.getSequence(), rFrom, kValue);
List<KMerInfo> match = kToIndexLeft.get(kMer);
if (match == null)
continue;
out: for (int i = 0; i < match.size(); i++) {
boolean isOverOverlapped = false;
final VDJCAlignments leftAl = match.get(i).getAlignments();
if (blackList.contains(leftAl.getAlignmentsIndex()))
continue;
if (// You shall not merge with yourself
leftAl.getAlignmentsIndex() == rightAl.getAlignmentsIndex() || alreadyMergedIds.contains(leftAl.getAlignmentsIndex()))
continue;
// Checking chains compatibility
if (!allowChimeras && jChains != null && !leftAl.getAllChains(GeneType.Variable).intersects(jChains))
continue;
// Check for the same V
if (leftAl.getBestHit(GeneType.Variable) != null && rightAl.getBestHit(GeneType.Variable) != null && !leftAl.hasCommonGenes(GeneType.Variable, rightAl))
continue;
final NucleotideSequence leftSeq = leftAl.getPartitionedTarget(getLeftPartitionedSequence(leftAl)).getSequence().getSequence();
int lFrom = match.get(i).kMerPositionFrom;
// begin and end in the coordinates of left
int delta, begin = delta = lFrom - rFrom;
if (begin < 0) {
begin = 0;
isOverOverlapped = true;
}
int end = leftSeq.size();
if (end >= rightSeq.size() + delta) {
end = rightSeq.size() + delta;
isOverOverlapped = true;
}
for (int j = begin; j < end; j++) if (leftSeq.codeAt(j) != rightSeq.codeAt(j - delta))
continue out;
int overlap = end - begin;
if (maxOverlap < overlap) {
maxOverlap = overlap;
maxOverlapList = match;
maxOverlapIndexInList = i;
maxDelta = delta;
maxBegin = begin;
maxEnd = end;
isMaxOverOverlapped = isOverOverlapped;
}
}
}
if (maxOverlapList == null)
return null;
if (maxOverlap < minimalAssembleOverlap)
return null;
final KMerInfo left = maxOverlapList.remove(maxOverlapIndexInList);
VDJCAlignments leftAl = left.alignments;
// final long readId = rightAl.getReadId();
ArrayList<AlignedTarget> leftTargets = extractAlignedTargets(leftAl);
ArrayList<AlignedTarget> rightTargets = extractAlignedTargets(rightAl);
AlignedTarget leftCentral = leftTargets.get(left.targetId);
AlignedTarget rightCentral = rightTargets.get(rightTargetId);
AlignedTarget central = targetMerger.merge(leftCentral, rightCentral, maxDelta, SequenceHistory.OverlapType.CDR3Overlap, 0);
// Setting overlap position
central = AlignedTarget.setOverlapRange(central, maxBegin, maxEnd);
final List<AlignedTarget> leftDescriptors = new ArrayList<>(2), rightDescriptors = new ArrayList<>(2);
for (int i = 0; i < left.targetId; ++i) leftDescriptors.add(leftTargets.get(i));
for (int i = left.targetId + 1; i < leftAl.numberOfTargets(); ++i) rightDescriptors.add(leftTargets.get(i));
for (int i = 0; i < rightTargetId; ++i) leftDescriptors.add(rightTargets.get(i));
for (int i = rightTargetId + 1; i < rightAl.numberOfTargets(); ++i) rightDescriptors.add(rightTargets.get(i));
// Merging to VJ junction
List<AlignedTarget>[] allDescriptors = new List[] { leftDescriptors, rightDescriptors };
TargetMerger.TargetMergingResult bestResult = TargetMerger.FAILED_RESULT;
int bestI;
// Trying to merge left and right reads to central one
for (List<AlignedTarget> descriptors : allDescriptors) do {
bestI = -1;
for (int i = 0; i < descriptors.size(); i++) {
TargetMerger.TargetMergingResult result = targetMerger.merge(descriptors.get(i), central);
if (result.failedDueInconsistentAlignments()) {
// Inconsistent alignments -> retry
blackList.add(leftAl.getAlignmentsIndex());
continue SEARCH_LEFT_PARTS;
}
if (bestResult.getScore() < result.getScore()) {
bestResult = result;
bestI = i;
}
}
if (bestI != -1) {
assert bestResult != TargetMerger.FAILED_RESULT;
central = bestResult.getResult();
descriptors.remove(bestI);
}
} while (bestI != -1);
// Merging left+left / right+right
outer: for (int d = 0; d < allDescriptors.length; d++) {
List<AlignedTarget> descriptors = allDescriptors[d];
for (int i = 0; i < descriptors.size(); i++) for (int j = i + 1; j < descriptors.size(); j++) {
TargetMerger.TargetMergingResult result = targetMerger.merge(descriptors.get(i), descriptors.get(j));
if (result.failedDueInconsistentAlignments()) {
// Inconsistent alignments -> retry
blackList.add(leftAl.getAlignmentsIndex());
continue SEARCH_LEFT_PARTS;
}
if (result.isSuccessful()) {
descriptors.set(i, result.getResult());
descriptors.remove(j);
--d;
continue outer;
}
}
}
if (isMaxOverOverlapped)
overoverlapped.incrementAndGet();
// Creating pre-list of resulting targets
List<AlignedTarget> result = new ArrayList<>();
result.addAll(leftDescriptors);
result.add(central);
result.addAll(rightDescriptors);
// Ordering and filtering final targets
return new OverlapSearchResult(maxOverlapList, left, AlignedTarget.orderTargets(result));
}
}
use of com.milaboratory.core.sequence.NucleotideSequence in project mixcr by milaboratory.
the class SingleDAligner method align0.
List<PreVDJCHit> align0(NucleotideSequence sequence, Chains chains, int from, int to) {
if (from > to)
throw new IllegalArgumentException("from > to. from = " + from + " to = " + to + " .");
NucleotideSequence key = sequence.getRange(from, to);
try {
List<PreVDJCHit> cachedResult = resultsCache.get(key);
List<PreVDJCHit> result = new ArrayList<>(cachedResult.size());
PreVDJCHit h;
for (PreVDJCHit hit : cachedResult) {
// filter non-possible chains
if (!chains.intersects(sequences.get(hit.id).chains))
continue;
result.add(h = convert(hit, from));
assert sequence.getRange(h.alignment.getSequence2Range()).equals(h.alignment.getRelativeMutations().mutate(sequences.get(h.id).sequence.getRange(h.alignment.getSequence1Range())));
}
cutToScore(result);
return result;
} catch (ExecutionException e) {
throw new RuntimeException(e);
}
}
Aggregations