Search in sources :

Example 1 with VDJCHit

use of com.milaboratory.mixcr.basictypes.VDJCHit in project mixcr by milaboratory.

the class FullSeqAssembler method buildClone.

/* ================================= Re-align and build final clone ====================================== */
private Clone buildClone(double count, BranchSequences targets) {
    Alignment<NucleotideSequence>[] vTopHitAlignments = new Alignment[targets.ranges.length], jTopHitAlignments = new Alignment[targets.ranges.length];
    VDJCHit hit = clone.getBestHit(Variable);
    if (hit == null)
        throw new UnsupportedOperationException("No V hit.");
    NucleotideSequence vTopReferenceSequence = hit.getGene().getFeature(hit.getAlignedFeature());
    hit = clone.getBestHit(Joining);
    if (hit == null)
        throw new UnsupportedOperationException("No J hit.");
    NucleotideSequence jTopReferenceSequence = hit.getGene().getFeature(hit.getAlignedFeature());
    // Excessive optimization
    CachedIntArray cachedIntArray = new CachedIntArray();
    int assemblingFeatureLength = targets.assemblingFeatureLength;
    for (int i = 0; i < targets.ranges.length; i++) {
        Range range = targets.ranges[i];
        NucleotideSequence sequence = targets.sequences[i].getSequence();
        // Asserts
        if (range.getFrom() < N_LEFT_DUMMIES + lengthV && range.getTo() >= N_LEFT_DUMMIES + lengthV && i != targets.assemblingFeatureTargetId)
            throw new RuntimeException();
        if (range.getTo() >= rightAssemblingFeatureBound && range.getFrom() < rightAssemblingFeatureBound && i != targets.assemblingFeatureTargetId)
            throw new RuntimeException();
        if (range.getTo() < N_LEFT_DUMMIES + lengthV) {
            boolean floatingLeftBound = !parameters.alignedRegionsOnly && i == 0 && alignerParameters.getVAlignerParameters().getParameters().isFloatingLeftBound();
            // Can be reduced to a single statement
            if (range.getFrom() < N_LEFT_DUMMIES)
                // This target contain extra non-V nucleotides on the left
                vTopHitAlignments[i] = alignLinearSeq1FromRight(((LinearGapAlignmentScoring<NucleotideSequence>) alignerParameters.getVAlignerParameters().getScoring()), vTopReferenceSequence, sequence.getSequence(), 0, range.getTo() - N_LEFT_DUMMIES, 0, sequence.size(), !floatingLeftBound, cachedIntArray);
            else if (floatingLeftBound)
                vTopHitAlignments[i] = alignLinearSeq1FromRight(((LinearGapAlignmentScoring<NucleotideSequence>) alignerParameters.getVAlignerParameters().getScoring()), vTopReferenceSequence, sequence.getSequence(), range.getFrom() - N_LEFT_DUMMIES, range.length(), 0, sequence.size(), false, cachedIntArray);
            else
                vTopHitAlignments[i] = Aligner.alignGlobal(alignerParameters.getVAlignerParameters().getScoring(), vTopReferenceSequence, sequence, range.getFrom() - N_LEFT_DUMMIES, range.length(), 0, sequence.size());
        } else if (i == targets.assemblingFeatureTargetId) {
            /*
                 *  V gene
                 */
            boolean vFloatingLeftBound = !parameters.alignedRegionsOnly && i == 0 && alignerParameters.getVAlignerParameters().getParameters().isFloatingLeftBound();
            // Can be reduced to a single statement
            if (range.getFrom() < N_LEFT_DUMMIES)
                // This target contain extra non-V nucleotides on the left
                vTopHitAlignments[i] = alignLinearSeq1FromRight(((LinearGapAlignmentScoring<NucleotideSequence>) alignerParameters.getVAlignerParameters().getScoring()), vTopReferenceSequence, sequence.getSequence(), 0, lengthV, 0, targets.assemblingFeatureOffset, !vFloatingLeftBound, cachedIntArray);
            else if (vFloatingLeftBound)
                vTopHitAlignments[i] = alignLinearSeq1FromRight(((LinearGapAlignmentScoring<NucleotideSequence>) alignerParameters.getVAlignerParameters().getScoring()), vTopReferenceSequence, sequence.getSequence(), range.getFrom() - N_LEFT_DUMMIES, lengthV - (range.getFrom() - N_LEFT_DUMMIES), 0, targets.assemblingFeatureOffset, false, cachedIntArray);
            else
                vTopHitAlignments[i] = Aligner.alignGlobal(alignerParameters.getVAlignerParameters().getScoring(), vTopReferenceSequence, sequence, range.getFrom() - N_LEFT_DUMMIES, lengthV - (range.getFrom() - N_LEFT_DUMMIES), 0, targets.assemblingFeatureOffset);
            /*
                 *  J gene
                 */
            boolean jFloatingRightBound = !parameters.alignedRegionsOnly && i == targets.ranges.length - 1 && alignerParameters.getJAlignerParameters().getParameters().isFloatingRightBound();
            if (range.getTo() >= rightAssemblingFeatureBound + jLength)
                // This target contain extra non-J nucleotides on the right
                jTopHitAlignments[i] = alignLinearSeq1FromLeft(((LinearGapAlignmentScoring<NucleotideSequence>) alignerParameters.getJAlignerParameters().getScoring()), jTopReferenceSequence, sequence.getSequence(), jOffset, jLength, targets.assemblingFeatureOffset + assemblingFeatureLength, sequence.size() - (targets.assemblingFeatureOffset + assemblingFeatureLength), !jFloatingRightBound, cachedIntArray);
            else if (jFloatingRightBound)
                jTopHitAlignments[i] = alignLinearSeq1FromLeft(((LinearGapAlignmentScoring<NucleotideSequence>) alignerParameters.getJAlignerParameters().getScoring()), jTopReferenceSequence, sequence.getSequence(), jOffset, range.getTo() - rightAssemblingFeatureBound, targets.assemblingFeatureOffset + assemblingFeatureLength, sequence.size() - (targets.assemblingFeatureOffset + assemblingFeatureLength), false, cachedIntArray);
            else
                jTopHitAlignments[i] = Aligner.alignGlobal(alignerParameters.getJAlignerParameters().getScoring(), jTopReferenceSequence, sequence, jOffset, range.getTo() - rightAssemblingFeatureBound, targets.assemblingFeatureOffset + assemblingFeatureLength, sequence.size() - (targets.assemblingFeatureOffset + assemblingFeatureLength));
        } else if (range.getFrom() > rightAssemblingFeatureBound) {
            boolean floatingRightBound = !parameters.alignedRegionsOnly && i == targets.ranges.length - 1 && alignerParameters.getJAlignerParameters().getParameters().isFloatingRightBound();
            if (range.getTo() >= rightAssemblingFeatureBound + jLength)
                // This target contain extra non-J nucleotides on the right
                jTopHitAlignments[i] = alignLinearSeq1FromLeft(((LinearGapAlignmentScoring<NucleotideSequence>) alignerParameters.getJAlignerParameters().getScoring()), jTopReferenceSequence, sequence.getSequence(), jOffset + (range.getFrom() - rightAssemblingFeatureBound), jLength - (range.getFrom() - rightAssemblingFeatureBound), 0, sequence.size(), !floatingRightBound, cachedIntArray);
            else if (floatingRightBound)
                jTopHitAlignments[i] = alignLinearSeq1FromLeft(((LinearGapAlignmentScoring<NucleotideSequence>) alignerParameters.getJAlignerParameters().getScoring()), jTopReferenceSequence, sequence.getSequence(), jOffset + (range.getFrom() - rightAssemblingFeatureBound), range.length(), 0, sequence.size(), false, cachedIntArray);
            else
                jTopHitAlignments[i] = Aligner.alignGlobal(alignerParameters.getJAlignerParameters().getScoring(), jTopReferenceSequence, sequence, jOffset + (range.getFrom() - rightAssemblingFeatureBound), range.length(), 0, sequence.size());
        } else
            throw new RuntimeException();
    }
    NSequenceWithQuality assemblingFeatureSeq = targets.sequences[targets.assemblingFeatureTargetId].getRange(targets.assemblingFeatureOffset, targets.assemblingFeatureOffset + targets.assemblingFeatureLength);
    Clone clone = cloneFactory.create(0, count, geneScores, new NSequenceWithQuality[] { assemblingFeatureSeq });
    vTopHitAlignments[targets.assemblingFeatureTargetId] = mergeTwoAlignments(vTopHitAlignments[targets.assemblingFeatureTargetId], clone.getBestHit(Variable).getAlignment(0).move(targets.assemblingFeatureOffset));
    jTopHitAlignments[targets.assemblingFeatureTargetId] = mergeTwoAlignments(clone.getBestHit(Joining).getAlignment(0).move(targets.assemblingFeatureOffset), jTopHitAlignments[targets.assemblingFeatureTargetId]);
    EnumMap<GeneType, VDJCHit[]> hits = new EnumMap<>(GeneType.class);
    for (GeneType gt : GeneType.VDJC_REFERENCE) hits.put(gt, Arrays.stream(clone.getHits(gt)).map(h -> moveHitTarget(h, targets.assemblingFeatureTargetId, targets.assemblingFeatureOffset, targets.ranges.length)).toArray(VDJCHit[]::new));
    VDJCHit[] tmp = hits.get(Variable);
    tmp[0] = substituteAlignments(tmp[0], vTopHitAlignments);
    tmp = hits.get(Joining);
    tmp[0] = substituteAlignments(tmp[0], jTopHitAlignments);
    return new Clone(targets.sequences, hits, count, 0);
}
Also used : IntStream(java.util.stream.IntStream) VDJCPartitionedSequence(com.milaboratory.mixcr.basictypes.VDJCPartitionedSequence) TIntObjectHashMap(gnu.trove.map.hash.TIntObjectHashMap) java.util(java.util) TObjectFloatHashMap(gnu.trove.map.hash.TObjectFloatHashMap) Clone(com.milaboratory.mixcr.basictypes.Clone) Supplier(java.util.function.Supplier) TIntIntIterator(gnu.trove.iterator.TIntIntIterator) com.milaboratory.core.alignment(com.milaboratory.core.alignment) VDJCAlignerParameters(com.milaboratory.mixcr.vdjaligners.VDJCAlignerParameters) CloneFactory(com.milaboratory.mixcr.assembler.CloneFactory) MutationsBuilder(com.milaboratory.core.mutations.MutationsBuilder) Constants(gnu.trove.impl.Constants) io.repseq.core(io.repseq.core) Range(com.milaboratory.core.Range) TObjectIntHashMap(gnu.trove.map.hash.TObjectIntHashMap) CUtils(cc.redberry.pipe.CUtils) TIntIntHashMap(gnu.trove.map.hash.TIntIntHashMap) OutputPort(cc.redberry.pipe.OutputPort) Collectors(java.util.stream.Collectors) com.milaboratory.core.sequence(com.milaboratory.core.sequence) TIntHashSet(gnu.trove.set.hash.TIntHashSet) Joining(io.repseq.core.GeneType.Joining) Stream(java.util.stream.Stream) VDJCHit(com.milaboratory.mixcr.basictypes.VDJCHit) VDJCGenes(io.repseq.gen.VDJCGenes) VDJCAlignments(com.milaboratory.mixcr.basictypes.VDJCAlignments) Variable(io.repseq.core.GeneType.Variable) Range(com.milaboratory.core.Range) VDJCHit(com.milaboratory.mixcr.basictypes.VDJCHit) Clone(com.milaboratory.mixcr.basictypes.Clone)

Example 2 with VDJCHit

use of com.milaboratory.mixcr.basictypes.VDJCHit in project mixcr by milaboratory.

the class FullSeqAssembler method toPointSequences.

List<PointSequence> toPointSequences(VDJCAlignments alignments, int iTarget) {
    // 
    // N_LEFT_DUMMIES     assemblingFeatureLength
    // ------|--------------|--------------|------------------------>
    // ↓              ↓              ↓
    // 0000000vvvvvvvvvvvvvvCDR3CDR3CDR3CDR3jjjjjjjjjjjjjjjjCCCCCCCCC
    VDJCPartitionedSequence target = alignments.getPartitionedTarget(iTarget);
    NSequenceWithQuality targetSeq = alignments.getTarget(iTarget);
    VDJCHit vHit = alignments.getBestHit(Variable);
    Alignment<NucleotideSequence> vAlignment = (vHit == null || vHit.getAlignment(iTarget) == null || !Objects.equals(genes.v, vHit.getGene()) || vHit.getAlignment(iTarget).getSequence1Range().getFrom() > lengthV) ? null : vHit.getAlignment(iTarget);
    VDJCHit jHit = alignments.getBestHit(Joining);
    Alignment<NucleotideSequence> jAlignment = (jHit == null || jHit.getAlignment(iTarget) == null || !Objects.equals(genes.j, jHit.getGene()) || jHit.getAlignment(iTarget).getSequence1Range().getTo() < jOffset) ? null : jHit.getAlignment(iTarget);
    List<PointSequence> points = new ArrayList<>();
    if (target.getPartitioning().isAvailable(assemblingFeature.getFirstPoint())) {
        // This target contains left edge of the assembling feature
        int leftStop = target.getPartitioning().getPosition(assemblingFeature.getFirstPoint());
        if (hasV) {
            if (vAlignment != null)
                toPointSequencesByAlignments(points, vAlignment, targetSeq, new Range(parameters.alignedRegionsOnly ? vAlignment.getSequence2Range().getFrom() : 0, leftStop), N_LEFT_DUMMIES);
        } else if (!parameters.alignedRegionsOnly)
            toPointSequencesNoAlignments(points, targetSeq, new Range(0, leftStop), N_LEFT_DUMMIES - leftStop);
    } else if (hasV && vAlignment != null)
        // This target ends before beginning (left edge) of the assembling feature
        toPointSequencesByAlignments(points, vAlignment, targetSeq, new Range(parameters.alignedRegionsOnly ? vAlignment.getSequence2Range().getFrom() : 0, vAlignment.getSequence2Range().getTo()), N_LEFT_DUMMIES);
    if (target.getPartitioning().isAvailable(assemblingFeature.getLastPoint())) {
        // This target contains right edge of the assembling feature
        int rightStart = target.getPartitioning().getPosition(assemblingFeature.getLastPoint());
        if (hasJ) {
            if (jAlignment != null)
                toPointSequencesByAlignments(points, jAlignment, targetSeq, new Range(rightStart, parameters.alignedRegionsOnly ? jAlignment.getSequence2Range().getTo() : targetSeq.size()), N_LEFT_DUMMIES + lengthV + assemblingFeatureLength - jOffset);
        } else
            toPointSequencesNoAlignments(points, targetSeq, new Range(rightStart, targetSeq.size()), N_LEFT_DUMMIES + lengthV + assemblingFeatureLength - rightStart);
    } else if (hasJ && jAlignment != null)
        // This target starts after the end (right edge) of the assembling feature
        toPointSequencesByAlignments(points, jAlignment, targetSeq, new Range(jAlignment.getSequence2Range().getFrom(), parameters.alignedRegionsOnly ? jAlignment.getSequence2Range().getTo() : targetSeq.size()), N_LEFT_DUMMIES + lengthV + assemblingFeatureLength - jOffset);
    return points;
}
Also used : VDJCPartitionedSequence(com.milaboratory.mixcr.basictypes.VDJCPartitionedSequence) Range(com.milaboratory.core.Range) VDJCHit(com.milaboratory.mixcr.basictypes.VDJCHit)

Example 3 with VDJCHit

use of com.milaboratory.mixcr.basictypes.VDJCHit in project mixcr by milaboratory.

the class VDJCAlignerWithMergeTest method test1.

@Test
public void test1() throws Exception {
    VDJCAlignerParameters parameters = VDJCParametersPresets.getByName("default");
    ByteArrayOutputStream bos = new ByteArrayOutputStream();
    List<VDJCAlignments> alignemntsList = new ArrayList<>();
    int header;
    int total = 0;
    int leftHit = 0;
    try (PairedFastqReader reader = new PairedFastqReader(VDJCAlignerSTest.class.getClassLoader().getResourceAsStream("sequences/sample_IGH_R1.fastq"), VDJCAlignerSTest.class.getClassLoader().getResourceAsStream("sequences/sample_IGH_R2.fastq"), true)) {
        VDJCAlignerWithMerge aligner = new VDJCAlignerWithMerge(parameters);
        for (VDJCGene gene : VDJCLibraryRegistry.getDefault().getLibrary("default", "hs").getGenes(Chains.IGH)) {
            if (parameters.containsRequiredFeature(gene))
                aligner.addGene(gene);
        }
        for (PairedRead read : CUtils.it(reader)) {
            ++total;
            VDJCAlignmentResult<PairedRead> result = aligner.process(read);
            if (result.alignment != null) {
                alignemntsList.add(result.alignment);
                for (VDJCHit hit : result.alignment.getHits(GeneType.Variable)) if (hit.getAlignment(0) != null && hit.getAlignment(1) != null)
                    ++leftHit;
            }
        }
    }
    // for (VDJCAlignments alignments : alignemntsList) {
    // for (int i = 0; i < alignments.numberOfTargets(); i++) {
    // System.out.println(VDJCAlignmentsFormatter.getTargetAsMultiAlignment(alignments, i));
    // }
    // }
    System.out.println(alignemntsList.size());
    System.out.println(total);
    System.out.println(leftHit);
    Assert.assertTrue(alignemntsList.size() > 10);
// System.out.println("Bytes per alignment: " + (bos.size() - header) / alignemntsList.size());
// 
// try (VDJCAlignmentsReader reader = new VDJCAlignmentsReader(new ByteArrayInputStream(bos.toByteArray()), ll)) {
// int i = 0;
// for (VDJCAlignments alignments : CUtils.it(reader))
// Assert.assertEquals(alignemntsList.get(i++), alignments);
// }
}
Also used : ArrayList(java.util.ArrayList) VDJCGene(io.repseq.core.VDJCGene) ByteArrayOutputStream(java.io.ByteArrayOutputStream) PairedRead(com.milaboratory.core.io.sequence.PairedRead) VDJCAlignments(com.milaboratory.mixcr.basictypes.VDJCAlignments) PairedFastqReader(com.milaboratory.core.io.sequence.fastq.PairedFastqReader) VDJCHit(com.milaboratory.mixcr.basictypes.VDJCHit) Test(org.junit.Test)

Example 4 with VDJCHit

use of com.milaboratory.mixcr.basictypes.VDJCHit in project mixcr by milaboratory.

the class AlignedTarget method orderTargets.

public static List<AlignedTarget> orderTargets(List<AlignedTarget> targets) {
    // Selecting best gene by total score
    final EnumMap<GeneType, VDJCGene> bestGenes = new EnumMap<>(GeneType.class);
    for (GeneType geneType : GeneType.VDJC_REFERENCE) {
        TObjectLongMap<VDJCGene> scores = new TObjectLongHashMap<>();
        for (AlignedTarget target : targets) {
            for (VDJCHit hit : target.getAlignments().getHits(geneType)) {
                Alignment<NucleotideSequence> alignment = hit.getAlignment(target.getTargetId());
                if (alignment != null)
                    scores.adjustOrPutValue(hit.getGene(), (long) alignment.getScore(), (long) alignment.getScore());
            }
        }
        VDJCGene bestGene = null;
        long bestScore = Long.MIN_VALUE;
        TObjectLongIterator<VDJCGene> it = scores.iterator();
        while (it.hasNext()) {
            it.advance();
            if (bestScore < it.value()) {
                bestScore = it.value();
                bestGene = it.key();
            }
        }
        if (bestGene != null)
            bestGenes.put(geneType, bestGene);
    }
    // Class to facilitate comparison between targets
    final class Wrapper implements Comparable<Wrapper> {

        final AlignedTarget target;

        final EnumMap<GeneType, Alignment<NucleotideSequence>> alignments = new EnumMap<>(GeneType.class);

        Wrapper(AlignedTarget target) {
            this.target = target;
            for (VDJCGene gene : bestGenes.values()) for (VDJCHit hit : target.getAlignments().getHits(gene.getGeneType())) if (hit.getGene() == gene) {
                Alignment<NucleotideSequence> alignment = hit.getAlignment(target.targetId);
                if (alignment != null) {
                    alignments.put(gene.getGeneType(), alignment);
                    break;
                }
            }
        }

        GeneType firstAlignedGeneType() {
            for (GeneType geneType : GeneType.VDJC_REFERENCE) if (alignments.containsKey(geneType))
                return geneType;
            return null;
        }

        @Override
        public int compareTo(Wrapper o) {
            GeneType thisFirstGene = firstAlignedGeneType();
            GeneType otherFirstGene = o.firstAlignedGeneType();
            int cmp = Byte.compare(thisFirstGene.getOrder(), otherFirstGene.getOrder());
            return cmp != 0 ? cmp : Integer.compare(alignments.get(thisFirstGene).getSequence1Range().getLower(), o.alignments.get(thisFirstGene).getSequence1Range().getLower());
        }
    }
    // Creating wrappers and sorting list
    List<Wrapper> wrappers = new ArrayList<>(targets.size());
    for (AlignedTarget target : targets) {
        Wrapper wrapper = new Wrapper(target);
        if (wrapper.firstAlignedGeneType() == null)
            continue;
        wrappers.add(wrapper);
    }
    Collections.sort(wrappers);
    // Creating result
    List<AlignedTarget> result = new ArrayList<>(wrappers.size());
    for (Wrapper wrapper : wrappers) result.add(wrapper.target);
    return result;
}
Also used : TObjectLongHashMap(gnu.trove.map.hash.TObjectLongHashMap) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence) VDJCGene(io.repseq.core.VDJCGene) GeneType(io.repseq.core.GeneType) VDJCHit(com.milaboratory.mixcr.basictypes.VDJCHit)

Example 5 with VDJCHit

use of com.milaboratory.mixcr.basictypes.VDJCHit in project mixcr by milaboratory.

the class ReferencePointCoverageCollector method put.

@Override
public void put(VDJCAlignments alignments) {
    totalCount.incrementAndGet();
    VDJCHit hit = alignments.getBestHit(refPoint.getGeneType());
    int left = -1, right = -1;
    for (int i = 0; i < alignments.numberOfTargets(); ++i) {
        int position = hit.getPosition(i, refPoint);
        if (position == -1)
            continue;
        Range alignmentRange = hit.getAlignment(i).getSequence2Range();
        left = Math.max(position - alignmentRange.getLower(), left);
        right = Math.max(alignmentRange.getUpper() - position, right);
    }
    if (left == -1)
        return;
    left = Math.min(leftHist.length() - 1, left);
    leftHist.incrementAndGet(left);
    right = Math.min(rightHist.length() - 1, right);
    rightHist.incrementAndGet(right);
}
Also used : Range(com.milaboratory.core.Range) ReferencePoint(io.repseq.core.ReferencePoint) VDJCHit(com.milaboratory.mixcr.basictypes.VDJCHit)

Aggregations

VDJCHit (com.milaboratory.mixcr.basictypes.VDJCHit)22 NucleotideSequence (com.milaboratory.core.sequence.NucleotideSequence)8 VDJCAlignments (com.milaboratory.mixcr.basictypes.VDJCAlignments)8 GeneType (io.repseq.core.GeneType)7 VDJCGene (io.repseq.core.VDJCGene)7 Range (com.milaboratory.core.Range)4 ArrayList (java.util.ArrayList)4 Alignment (com.milaboratory.core.alignment.Alignment)3 NSequenceWithQuality (com.milaboratory.core.sequence.NSequenceWithQuality)3 Clone (com.milaboratory.mixcr.basictypes.Clone)3 ReferencePoint (io.repseq.core.ReferencePoint)3 VDJCGeneId (io.repseq.core.VDJCGeneId)3 PairedRead (com.milaboratory.core.io.sequence.PairedRead)2 PairedFastqReader (com.milaboratory.core.io.sequence.fastq.PairedFastqReader)2 VDJCPartitionedSequence (com.milaboratory.mixcr.basictypes.VDJCPartitionedSequence)2 VDJCAlignerParameters (com.milaboratory.mixcr.vdjaligners.VDJCAlignerParameters)2 Chains (io.repseq.core.Chains)2 GeneFeature (io.repseq.core.GeneFeature)2 Test (org.junit.Test)2 CUtils (cc.redberry.pipe.CUtils)1