use of com.milaboratory.mixcr.basictypes.VDJCHit in project mixcr by milaboratory.
the class FullSeqAssembler method buildClone.
/* ================================= Re-align and build final clone ====================================== */
private Clone buildClone(double count, BranchSequences targets) {
Alignment<NucleotideSequence>[] vTopHitAlignments = new Alignment[targets.ranges.length], jTopHitAlignments = new Alignment[targets.ranges.length];
VDJCHit hit = clone.getBestHit(Variable);
if (hit == null)
throw new UnsupportedOperationException("No V hit.");
NucleotideSequence vTopReferenceSequence = hit.getGene().getFeature(hit.getAlignedFeature());
hit = clone.getBestHit(Joining);
if (hit == null)
throw new UnsupportedOperationException("No J hit.");
NucleotideSequence jTopReferenceSequence = hit.getGene().getFeature(hit.getAlignedFeature());
// Excessive optimization
CachedIntArray cachedIntArray = new CachedIntArray();
int assemblingFeatureLength = targets.assemblingFeatureLength;
for (int i = 0; i < targets.ranges.length; i++) {
Range range = targets.ranges[i];
NucleotideSequence sequence = targets.sequences[i].getSequence();
// Asserts
if (range.getFrom() < N_LEFT_DUMMIES + lengthV && range.getTo() >= N_LEFT_DUMMIES + lengthV && i != targets.assemblingFeatureTargetId)
throw new RuntimeException();
if (range.getTo() >= rightAssemblingFeatureBound && range.getFrom() < rightAssemblingFeatureBound && i != targets.assemblingFeatureTargetId)
throw new RuntimeException();
if (range.getTo() < N_LEFT_DUMMIES + lengthV) {
boolean floatingLeftBound = !parameters.alignedRegionsOnly && i == 0 && alignerParameters.getVAlignerParameters().getParameters().isFloatingLeftBound();
// Can be reduced to a single statement
if (range.getFrom() < N_LEFT_DUMMIES)
// This target contain extra non-V nucleotides on the left
vTopHitAlignments[i] = alignLinearSeq1FromRight(((LinearGapAlignmentScoring<NucleotideSequence>) alignerParameters.getVAlignerParameters().getScoring()), vTopReferenceSequence, sequence.getSequence(), 0, range.getTo() - N_LEFT_DUMMIES, 0, sequence.size(), !floatingLeftBound, cachedIntArray);
else if (floatingLeftBound)
vTopHitAlignments[i] = alignLinearSeq1FromRight(((LinearGapAlignmentScoring<NucleotideSequence>) alignerParameters.getVAlignerParameters().getScoring()), vTopReferenceSequence, sequence.getSequence(), range.getFrom() - N_LEFT_DUMMIES, range.length(), 0, sequence.size(), false, cachedIntArray);
else
vTopHitAlignments[i] = Aligner.alignGlobal(alignerParameters.getVAlignerParameters().getScoring(), vTopReferenceSequence, sequence, range.getFrom() - N_LEFT_DUMMIES, range.length(), 0, sequence.size());
} else if (i == targets.assemblingFeatureTargetId) {
/*
* V gene
*/
boolean vFloatingLeftBound = !parameters.alignedRegionsOnly && i == 0 && alignerParameters.getVAlignerParameters().getParameters().isFloatingLeftBound();
// Can be reduced to a single statement
if (range.getFrom() < N_LEFT_DUMMIES)
// This target contain extra non-V nucleotides on the left
vTopHitAlignments[i] = alignLinearSeq1FromRight(((LinearGapAlignmentScoring<NucleotideSequence>) alignerParameters.getVAlignerParameters().getScoring()), vTopReferenceSequence, sequence.getSequence(), 0, lengthV, 0, targets.assemblingFeatureOffset, !vFloatingLeftBound, cachedIntArray);
else if (vFloatingLeftBound)
vTopHitAlignments[i] = alignLinearSeq1FromRight(((LinearGapAlignmentScoring<NucleotideSequence>) alignerParameters.getVAlignerParameters().getScoring()), vTopReferenceSequence, sequence.getSequence(), range.getFrom() - N_LEFT_DUMMIES, lengthV - (range.getFrom() - N_LEFT_DUMMIES), 0, targets.assemblingFeatureOffset, false, cachedIntArray);
else
vTopHitAlignments[i] = Aligner.alignGlobal(alignerParameters.getVAlignerParameters().getScoring(), vTopReferenceSequence, sequence, range.getFrom() - N_LEFT_DUMMIES, lengthV - (range.getFrom() - N_LEFT_DUMMIES), 0, targets.assemblingFeatureOffset);
/*
* J gene
*/
boolean jFloatingRightBound = !parameters.alignedRegionsOnly && i == targets.ranges.length - 1 && alignerParameters.getJAlignerParameters().getParameters().isFloatingRightBound();
if (range.getTo() >= rightAssemblingFeatureBound + jLength)
// This target contain extra non-J nucleotides on the right
jTopHitAlignments[i] = alignLinearSeq1FromLeft(((LinearGapAlignmentScoring<NucleotideSequence>) alignerParameters.getJAlignerParameters().getScoring()), jTopReferenceSequence, sequence.getSequence(), jOffset, jLength, targets.assemblingFeatureOffset + assemblingFeatureLength, sequence.size() - (targets.assemblingFeatureOffset + assemblingFeatureLength), !jFloatingRightBound, cachedIntArray);
else if (jFloatingRightBound)
jTopHitAlignments[i] = alignLinearSeq1FromLeft(((LinearGapAlignmentScoring<NucleotideSequence>) alignerParameters.getJAlignerParameters().getScoring()), jTopReferenceSequence, sequence.getSequence(), jOffset, range.getTo() - rightAssemblingFeatureBound, targets.assemblingFeatureOffset + assemblingFeatureLength, sequence.size() - (targets.assemblingFeatureOffset + assemblingFeatureLength), false, cachedIntArray);
else
jTopHitAlignments[i] = Aligner.alignGlobal(alignerParameters.getJAlignerParameters().getScoring(), jTopReferenceSequence, sequence, jOffset, range.getTo() - rightAssemblingFeatureBound, targets.assemblingFeatureOffset + assemblingFeatureLength, sequence.size() - (targets.assemblingFeatureOffset + assemblingFeatureLength));
} else if (range.getFrom() > rightAssemblingFeatureBound) {
boolean floatingRightBound = !parameters.alignedRegionsOnly && i == targets.ranges.length - 1 && alignerParameters.getJAlignerParameters().getParameters().isFloatingRightBound();
if (range.getTo() >= rightAssemblingFeatureBound + jLength)
// This target contain extra non-J nucleotides on the right
jTopHitAlignments[i] = alignLinearSeq1FromLeft(((LinearGapAlignmentScoring<NucleotideSequence>) alignerParameters.getJAlignerParameters().getScoring()), jTopReferenceSequence, sequence.getSequence(), jOffset + (range.getFrom() - rightAssemblingFeatureBound), jLength - (range.getFrom() - rightAssemblingFeatureBound), 0, sequence.size(), !floatingRightBound, cachedIntArray);
else if (floatingRightBound)
jTopHitAlignments[i] = alignLinearSeq1FromLeft(((LinearGapAlignmentScoring<NucleotideSequence>) alignerParameters.getJAlignerParameters().getScoring()), jTopReferenceSequence, sequence.getSequence(), jOffset + (range.getFrom() - rightAssemblingFeatureBound), range.length(), 0, sequence.size(), false, cachedIntArray);
else
jTopHitAlignments[i] = Aligner.alignGlobal(alignerParameters.getJAlignerParameters().getScoring(), jTopReferenceSequence, sequence, jOffset + (range.getFrom() - rightAssemblingFeatureBound), range.length(), 0, sequence.size());
} else
throw new RuntimeException();
}
NSequenceWithQuality assemblingFeatureSeq = targets.sequences[targets.assemblingFeatureTargetId].getRange(targets.assemblingFeatureOffset, targets.assemblingFeatureOffset + targets.assemblingFeatureLength);
Clone clone = cloneFactory.create(0, count, geneScores, new NSequenceWithQuality[] { assemblingFeatureSeq });
vTopHitAlignments[targets.assemblingFeatureTargetId] = mergeTwoAlignments(vTopHitAlignments[targets.assemblingFeatureTargetId], clone.getBestHit(Variable).getAlignment(0).move(targets.assemblingFeatureOffset));
jTopHitAlignments[targets.assemblingFeatureTargetId] = mergeTwoAlignments(clone.getBestHit(Joining).getAlignment(0).move(targets.assemblingFeatureOffset), jTopHitAlignments[targets.assemblingFeatureTargetId]);
EnumMap<GeneType, VDJCHit[]> hits = new EnumMap<>(GeneType.class);
for (GeneType gt : GeneType.VDJC_REFERENCE) hits.put(gt, Arrays.stream(clone.getHits(gt)).map(h -> moveHitTarget(h, targets.assemblingFeatureTargetId, targets.assemblingFeatureOffset, targets.ranges.length)).toArray(VDJCHit[]::new));
VDJCHit[] tmp = hits.get(Variable);
tmp[0] = substituteAlignments(tmp[0], vTopHitAlignments);
tmp = hits.get(Joining);
tmp[0] = substituteAlignments(tmp[0], jTopHitAlignments);
return new Clone(targets.sequences, hits, count, 0);
}
use of com.milaboratory.mixcr.basictypes.VDJCHit in project mixcr by milaboratory.
the class FullSeqAssembler method toPointSequences.
List<PointSequence> toPointSequences(VDJCAlignments alignments, int iTarget) {
//
// N_LEFT_DUMMIES assemblingFeatureLength
// ------|--------------|--------------|------------------------>
// ↓ ↓ ↓
// 0000000vvvvvvvvvvvvvvCDR3CDR3CDR3CDR3jjjjjjjjjjjjjjjjCCCCCCCCC
VDJCPartitionedSequence target = alignments.getPartitionedTarget(iTarget);
NSequenceWithQuality targetSeq = alignments.getTarget(iTarget);
VDJCHit vHit = alignments.getBestHit(Variable);
Alignment<NucleotideSequence> vAlignment = (vHit == null || vHit.getAlignment(iTarget) == null || !Objects.equals(genes.v, vHit.getGene()) || vHit.getAlignment(iTarget).getSequence1Range().getFrom() > lengthV) ? null : vHit.getAlignment(iTarget);
VDJCHit jHit = alignments.getBestHit(Joining);
Alignment<NucleotideSequence> jAlignment = (jHit == null || jHit.getAlignment(iTarget) == null || !Objects.equals(genes.j, jHit.getGene()) || jHit.getAlignment(iTarget).getSequence1Range().getTo() < jOffset) ? null : jHit.getAlignment(iTarget);
List<PointSequence> points = new ArrayList<>();
if (target.getPartitioning().isAvailable(assemblingFeature.getFirstPoint())) {
// This target contains left edge of the assembling feature
int leftStop = target.getPartitioning().getPosition(assemblingFeature.getFirstPoint());
if (hasV) {
if (vAlignment != null)
toPointSequencesByAlignments(points, vAlignment, targetSeq, new Range(parameters.alignedRegionsOnly ? vAlignment.getSequence2Range().getFrom() : 0, leftStop), N_LEFT_DUMMIES);
} else if (!parameters.alignedRegionsOnly)
toPointSequencesNoAlignments(points, targetSeq, new Range(0, leftStop), N_LEFT_DUMMIES - leftStop);
} else if (hasV && vAlignment != null)
// This target ends before beginning (left edge) of the assembling feature
toPointSequencesByAlignments(points, vAlignment, targetSeq, new Range(parameters.alignedRegionsOnly ? vAlignment.getSequence2Range().getFrom() : 0, vAlignment.getSequence2Range().getTo()), N_LEFT_DUMMIES);
if (target.getPartitioning().isAvailable(assemblingFeature.getLastPoint())) {
// This target contains right edge of the assembling feature
int rightStart = target.getPartitioning().getPosition(assemblingFeature.getLastPoint());
if (hasJ) {
if (jAlignment != null)
toPointSequencesByAlignments(points, jAlignment, targetSeq, new Range(rightStart, parameters.alignedRegionsOnly ? jAlignment.getSequence2Range().getTo() : targetSeq.size()), N_LEFT_DUMMIES + lengthV + assemblingFeatureLength - jOffset);
} else
toPointSequencesNoAlignments(points, targetSeq, new Range(rightStart, targetSeq.size()), N_LEFT_DUMMIES + lengthV + assemblingFeatureLength - rightStart);
} else if (hasJ && jAlignment != null)
// This target starts after the end (right edge) of the assembling feature
toPointSequencesByAlignments(points, jAlignment, targetSeq, new Range(jAlignment.getSequence2Range().getFrom(), parameters.alignedRegionsOnly ? jAlignment.getSequence2Range().getTo() : targetSeq.size()), N_LEFT_DUMMIES + lengthV + assemblingFeatureLength - jOffset);
return points;
}
use of com.milaboratory.mixcr.basictypes.VDJCHit in project mixcr by milaboratory.
the class VDJCAlignerWithMergeTest method test1.
@Test
public void test1() throws Exception {
VDJCAlignerParameters parameters = VDJCParametersPresets.getByName("default");
ByteArrayOutputStream bos = new ByteArrayOutputStream();
List<VDJCAlignments> alignemntsList = new ArrayList<>();
int header;
int total = 0;
int leftHit = 0;
try (PairedFastqReader reader = new PairedFastqReader(VDJCAlignerSTest.class.getClassLoader().getResourceAsStream("sequences/sample_IGH_R1.fastq"), VDJCAlignerSTest.class.getClassLoader().getResourceAsStream("sequences/sample_IGH_R2.fastq"), true)) {
VDJCAlignerWithMerge aligner = new VDJCAlignerWithMerge(parameters);
for (VDJCGene gene : VDJCLibraryRegistry.getDefault().getLibrary("default", "hs").getGenes(Chains.IGH)) {
if (parameters.containsRequiredFeature(gene))
aligner.addGene(gene);
}
for (PairedRead read : CUtils.it(reader)) {
++total;
VDJCAlignmentResult<PairedRead> result = aligner.process(read);
if (result.alignment != null) {
alignemntsList.add(result.alignment);
for (VDJCHit hit : result.alignment.getHits(GeneType.Variable)) if (hit.getAlignment(0) != null && hit.getAlignment(1) != null)
++leftHit;
}
}
}
// for (VDJCAlignments alignments : alignemntsList) {
// for (int i = 0; i < alignments.numberOfTargets(); i++) {
// System.out.println(VDJCAlignmentsFormatter.getTargetAsMultiAlignment(alignments, i));
// }
// }
System.out.println(alignemntsList.size());
System.out.println(total);
System.out.println(leftHit);
Assert.assertTrue(alignemntsList.size() > 10);
// System.out.println("Bytes per alignment: " + (bos.size() - header) / alignemntsList.size());
//
// try (VDJCAlignmentsReader reader = new VDJCAlignmentsReader(new ByteArrayInputStream(bos.toByteArray()), ll)) {
// int i = 0;
// for (VDJCAlignments alignments : CUtils.it(reader))
// Assert.assertEquals(alignemntsList.get(i++), alignments);
// }
}
use of com.milaboratory.mixcr.basictypes.VDJCHit in project mixcr by milaboratory.
the class AlignedTarget method orderTargets.
public static List<AlignedTarget> orderTargets(List<AlignedTarget> targets) {
// Selecting best gene by total score
final EnumMap<GeneType, VDJCGene> bestGenes = new EnumMap<>(GeneType.class);
for (GeneType geneType : GeneType.VDJC_REFERENCE) {
TObjectLongMap<VDJCGene> scores = new TObjectLongHashMap<>();
for (AlignedTarget target : targets) {
for (VDJCHit hit : target.getAlignments().getHits(geneType)) {
Alignment<NucleotideSequence> alignment = hit.getAlignment(target.getTargetId());
if (alignment != null)
scores.adjustOrPutValue(hit.getGene(), (long) alignment.getScore(), (long) alignment.getScore());
}
}
VDJCGene bestGene = null;
long bestScore = Long.MIN_VALUE;
TObjectLongIterator<VDJCGene> it = scores.iterator();
while (it.hasNext()) {
it.advance();
if (bestScore < it.value()) {
bestScore = it.value();
bestGene = it.key();
}
}
if (bestGene != null)
bestGenes.put(geneType, bestGene);
}
// Class to facilitate comparison between targets
final class Wrapper implements Comparable<Wrapper> {
final AlignedTarget target;
final EnumMap<GeneType, Alignment<NucleotideSequence>> alignments = new EnumMap<>(GeneType.class);
Wrapper(AlignedTarget target) {
this.target = target;
for (VDJCGene gene : bestGenes.values()) for (VDJCHit hit : target.getAlignments().getHits(gene.getGeneType())) if (hit.getGene() == gene) {
Alignment<NucleotideSequence> alignment = hit.getAlignment(target.targetId);
if (alignment != null) {
alignments.put(gene.getGeneType(), alignment);
break;
}
}
}
GeneType firstAlignedGeneType() {
for (GeneType geneType : GeneType.VDJC_REFERENCE) if (alignments.containsKey(geneType))
return geneType;
return null;
}
@Override
public int compareTo(Wrapper o) {
GeneType thisFirstGene = firstAlignedGeneType();
GeneType otherFirstGene = o.firstAlignedGeneType();
int cmp = Byte.compare(thisFirstGene.getOrder(), otherFirstGene.getOrder());
return cmp != 0 ? cmp : Integer.compare(alignments.get(thisFirstGene).getSequence1Range().getLower(), o.alignments.get(thisFirstGene).getSequence1Range().getLower());
}
}
// Creating wrappers and sorting list
List<Wrapper> wrappers = new ArrayList<>(targets.size());
for (AlignedTarget target : targets) {
Wrapper wrapper = new Wrapper(target);
if (wrapper.firstAlignedGeneType() == null)
continue;
wrappers.add(wrapper);
}
Collections.sort(wrappers);
// Creating result
List<AlignedTarget> result = new ArrayList<>(wrappers.size());
for (Wrapper wrapper : wrappers) result.add(wrapper.target);
return result;
}
use of com.milaboratory.mixcr.basictypes.VDJCHit in project mixcr by milaboratory.
the class ReferencePointCoverageCollector method put.
@Override
public void put(VDJCAlignments alignments) {
totalCount.incrementAndGet();
VDJCHit hit = alignments.getBestHit(refPoint.getGeneType());
int left = -1, right = -1;
for (int i = 0; i < alignments.numberOfTargets(); ++i) {
int position = hit.getPosition(i, refPoint);
if (position == -1)
continue;
Range alignmentRange = hit.getAlignment(i).getSequence2Range();
left = Math.max(position - alignmentRange.getLower(), left);
right = Math.max(alignmentRange.getUpper() - position, right);
}
if (left == -1)
return;
left = Math.min(leftHist.length() - 1, left);
leftHist.incrementAndGet(left);
right = Math.min(rightHist.length() - 1, right);
rightHist.incrementAndGet(right);
}
Aggregations