Search in sources :

Example 46 with NucleotideSequence

use of com.milaboratory.core.sequence.NucleotideSequence in project mixcr by milaboratory.

the class MergerTest method mAssert.

public static void mAssert(String originalSeq, String seq1, String seq2, String expectedSequence, String expectedQuality) {
    NucleotideSequence originalSequence = new NucleotideSequence(originalSeq);
    NSequenceWithQuality target1 = new NSequenceWithQuality(seq1, lets('A', seq1.length()));
    NSequenceWithQuality target2 = new NSequenceWithQuality(seq2, lets('B', seq2.length()));
    LinearGapAlignmentScoring<NucleotideSequence> scoring = new LinearGapAlignmentScoring<>(NucleotideSequence.ALPHABET, 4, -5, -9);
    Alignment<NucleotideSequence> alignment1 = Aligner.alignLocalLinear(scoring, originalSequence, target1.getSequence());
    Alignment<NucleotideSequence> alignment2 = Aligner.alignLocalLinear(scoring, originalSequence, target2.getSequence());
    NSequenceWithQuality result = Merger.merge(new Range(0, originalSequence.size()), new Alignment[] { alignment1, alignment2 }, new NSequenceWithQuality[] { target1, target2 });
    Assert.assertEquals(expectedSequence, result.getSequence().toString());
    Assert.assertEquals(expectedQuality, result.getQuality().toString());
}
Also used : LinearGapAlignmentScoring(com.milaboratory.core.alignment.LinearGapAlignmentScoring) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence) NSequenceWithQuality(com.milaboratory.core.sequence.NSequenceWithQuality) Range(com.milaboratory.core.Range)

Example 47 with NucleotideSequence

use of com.milaboratory.core.sequence.NucleotideSequence in project mixcr by milaboratory.

the class PartialAlignmentsAssemblerAlignerTest method basicTest1.

@Test
public void basicTest1() throws Exception {
    Well44497b rg = new Well44497b(12312);
    VDJCAlignerParameters rnaSeqParams = VDJCParametersPresets.getByName("rna-seq");
    PartialAlignmentsAssemblerAligner aligner = new PartialAlignmentsAssemblerAligner(rnaSeqParams);
    VDJCLibrary lib = VDJCLibraryRegistry.getDefault().getLibrary("default", "hs");
    for (VDJCGene gene : VDJCLibraryRegistry.getDefault().getLibrary("default", "hs").getGenes()) if (gene.isFunctional())
        aligner.addGene(gene);
    TargetBuilder.VDJCGenes genes = new TargetBuilder.VDJCGenes(lib, "TRBV12-3*00", "TRBD1*00", "TRBJ1-3*00", "TRBC2*00");
    // | 305
    // 250V + 55CDR3 (20V 7N 10D 3N 15J) + 28J + 100C
    NucleotideSequence baseSeq = TargetBuilder.generateSequence(genes, "{CDR3Begin(-250)}V*270 NNNNNNN {DBegin(0)}D*10 NNN {CDR3End(-15):FR4End} {CBegin}C*100", rg);
    int len = 70;
    NucleotideSequence seq1 = baseSeq.getRange(0, len);
    NucleotideSequence seq2 = baseSeq.getRange(245, 245 + len);
    NucleotideSequence seq3 = baseSeq.getRange(320, 320 + len);
    VDJCAlignmentResult<VDJCMultiRead> alignment = aligner.process(MiXCRTestUtils.createMultiRead(seq1, seq2, seq3));
    VDJCAlignments al = alignment.alignment;
    Assert.assertNotNull(al);
    assertInHits(genes.v, al);
    assertInHits(genes.d, al);
    assertInHits(genes.j, al);
    assertInHits(genes.c, al);
    VDJCHit bestV = al.getBestHit(GeneType.Variable);
    VDJCHit bestD = al.getBestHit(GeneType.Diversity);
    VDJCHit bestJ = al.getBestHit(GeneType.Joining);
    VDJCHit bestC = al.getBestHit(GeneType.Constant);
    Assert.assertNotNull(bestV.getAlignment(0));
    Assert.assertNotNull(bestV.getAlignment(1));
    Assert.assertNull(bestV.getAlignment(2));
    Assert.assertNull(bestD.getAlignment(0));
    Assert.assertNotNull(bestD.getAlignment(1));
    Assert.assertNull(bestD.getAlignment(2));
    Assert.assertNull(bestJ.getAlignment(0));
    Assert.assertNotNull(bestJ.getAlignment(1));
    Assert.assertNotNull(bestJ.getAlignment(2));
    Assert.assertNull(bestC.getAlignment(0));
    Assert.assertNull(bestC.getAlignment(1));
    Assert.assertNotNull(bestC.getAlignment(2));
}
Also used : VDJCAlignerParameters(com.milaboratory.mixcr.vdjaligners.VDJCAlignerParameters) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence) Well44497b(org.apache.commons.math3.random.Well44497b) VDJCLibrary(io.repseq.core.VDJCLibrary) VDJCGene(io.repseq.core.VDJCGene) TargetBuilder(com.milaboratory.mixcr.tests.TargetBuilder) VDJCAlignments(com.milaboratory.mixcr.basictypes.VDJCAlignments) VDJCHit(com.milaboratory.mixcr.basictypes.VDJCHit) Test(org.junit.Test)

Example 48 with NucleotideSequence

use of com.milaboratory.core.sequence.NucleotideSequence in project mixcr by milaboratory.

the class VDJCAlignerPVFirst method alignVThenJ.

@SuppressWarnings("unchecked")
PAlignmentHelper alignVThenJ(final Target target) {
    /*
         * Step 1: alignment of V gene
         */
    List<AlignmentHit<NucleotideSequence, VDJCGene>> vAl1 = vAligner.align(target.targets[0].getSequence()).getHits(), vAl2 = vAligner.align(target.targets[1].getSequence()).getHits();
    /*
         * Step 1.5: eliminating conflicting alignments in favor of alignments covering CDR3 edge
         */
    boolean vChimera = checkAndEliminateChimera(vAl1, vAl2, GeneType.Variable);
    PairedHit[] vHits = extractDoubleHits(vAl1, vAl2);
    for (PairedHit vHit : vHits) {
        if (vHit.hit1 == null) {
            AlignmentHit<NucleotideSequence, VDJCGene> leftHit = vHit.hit0;
            // Checking whether alignment touches right read edge (taking to account tolerance)
            if (leftHit.getAlignment().getSequence2Range().getTo() < target.targets[0].size() - parameters.getAlignmentBoundaryTolerance())
                continue;
            final AlignmentScoring<NucleotideSequence> scoring = parameters.getVAlignerParameters().getParameters().getScoring();
            if (scoring instanceof AffineGapAlignmentScoring)
                // TODO IMPLEMENT
                continue;
            final NucleotideSequence sequence2 = target.targets[1].getSequence();
            final NucleotideSequence sequence1 = leftHit.getAlignment().getSequence1();
            final int beginFR3 = leftHit.getRecordPayload().getPartitioning().getRelativePosition(parameters.getFeatureToAlign(GeneType.Variable), ReferencePoint.FR3Begin);
            if (beginFR3 == -1)
                continue;
            final Alignment alignment = AlignerCustom.alignLinearSemiLocalLeft0((LinearGapAlignmentScoring) scoring, sequence1, sequence2, beginFR3, sequence1.size() - beginFR3, 0, sequence2.size(), false, true, NucleotideSequence.ALPHABET, linearMatrixCache.get());
            if (alignment.getScore() < getAbsoluteMinScore(parameters.getVAlignerParameters().getParameters()))
                continue;
            vHit.set(1, new AlignmentHitImpl<NucleotideSequence, VDJCGene>(alignment, leftHit.getRecordPayload()));
            vHit.calculateScore();
        }
    }
    vHits = sortAndFilterHits(vHits, parameters.getVAlignerParameters());
    /*
         * Step 3: alignment of J gene
         */
    List<AlignmentHit<NucleotideSequence, VDJCGene>> jAl1 = performJAlignment(target, vHits, 0), jAl2 = performJAlignment(target, vHits, 1);
    /*
         * Step 3.5: eliminating conflicting alignments in favor of alignments covering CDR3 edge
         */
    boolean jChimera = checkAndEliminateChimera(jAl1, jAl2, GeneType.Joining);
    PairedHit[] jHits = extractDoubleHits(jAl1, jAl2);
    for (PairedHit jHit : jHits) {
        if (jHit.hit0 == null) {
            AlignmentHit<NucleotideSequence, VDJCGene> rightHit = jHit.hit1;
            // Checking whether alignment touches left read edge (taking to account tolerance)
            if (rightHit.getAlignment().getSequence2Range().getFrom() > parameters.getAlignmentBoundaryTolerance())
                continue;
            final AlignmentScoring<NucleotideSequence> scoring = parameters.getJAlignerParameters().getParameters().getScoring();
            if (scoring instanceof AffineGapAlignmentScoring)
                // TODO IMPLEMENT
                continue;
            final NucleotideSequence sequence2 = target.targets[0].getSequence();
            final NucleotideSequence sequence1 = rightHit.getAlignment().getSequence1();
            int begin = 0;
            if (vHits.length != 0 && vHits[0].hit0 != null) {
                begin = vHits[0].hit0.getAlignment().getSequence2Range().getTo() - parameters.getVJOverlapWindow();
                if (begin < 0)
                    begin = 0;
            }
            final Alignment alignment = AlignerCustom.alignLinearSemiLocalRight0((LinearGapAlignmentScoring) scoring, sequence1, sequence2, 0, sequence1.size(), begin, sequence2.size() - begin, false, true, NucleotideSequence.ALPHABET, linearMatrixCache.get());
            if (alignment.getScore() < getAbsoluteMinScore(parameters.getJAlignerParameters().getParameters()))
                continue;
            jHit.set(0, new AlignmentHitImpl<NucleotideSequence, VDJCGene>(alignment, rightHit.getRecordPayload()));
            jHit.calculateScore();
        }
    }
    jHits = sortAndFilterHits(jHits, parameters.getJAlignerParameters());
    // Check if parameters allow chimeras
    if (!parameters.isAllowChimeras()) {
        // Calculate common chains
        Chains commonChains = getVJCommonChains(vHits, jHits);
        if (!commonChains.isEmpty()) {
            // Filtering V genes
            int filteredSize = 0;
            for (PairedHit hit : vHits) if (hit.getGene().getChains().intersects(commonChains))
                ++filteredSize;
            // Perform filtering (new array allocation) only if needed
            if (vHits.length != filteredSize) {
                PairedHit[] newHits = new PairedHit[filteredSize];
                // Used as pointer
                filteredSize = 0;
                for (PairedHit hit : vHits) if (hit.getGene().getChains().intersects(commonChains))
                    newHits[filteredSize++] = hit;
                assert newHits.length == filteredSize;
                vHits = newHits;
            }
            // Filtering J genes
            filteredSize = 0;
            for (PairedHit hit : jHits) if (hit.getGene().getChains().intersects(commonChains))
                ++filteredSize;
            // Perform filtering (new array allocation) only if needed
            if (jHits.length != filteredSize) {
                PairedHit[] newHits = new PairedHit[filteredSize];
                // Used as pointer
                filteredSize = 0;
                for (PairedHit hit : jHits) if (hit.getGene().getChains().intersects(commonChains))
                    newHits[filteredSize++] = hit;
                assert newHits.length == filteredSize;
                jHits = newHits;
            }
        }
    }
    return new PAlignmentHelper(target, vHits, jHits, vChimera, jChimera);
}
Also used : AlignmentHit(com.milaboratory.core.alignment.batch.AlignmentHit) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence)

Example 49 with NucleotideSequence

use of com.milaboratory.core.sequence.NucleotideSequence in project mixcr by milaboratory.

the class PartialAlignmentsAssembler method searchOverlaps.

@SuppressWarnings("unchecked")
private OverlapSearchResult searchOverlaps(final VDJCAlignments rightAl, final boolean allowChimeras) {
    final Chains jChains = rightAl.getAllChains(GeneType.Joining);
    int rightTargetId = getRightPartitionedSequence(rightAl);
    if (rightTargetId == -1)
        return null;
    rightParts.incrementAndGet();
    final VDJCPartitionedSequence rightTarget = rightAl.getPartitionedTarget(rightTargetId);
    NSequenceWithQuality rightSeqQ = rightTarget.getSequence();
    NucleotideSequence rightSeq = rightSeqQ.getSequence();
    int stop = rightTarget.getPartitioning().getPosition(ReferencePoint.JBeginTrimmed);
    if (stop == -1)
        stop = rightTarget.getSequence().size();
    else
        stop -= kOffset;
    // black list of left parts failed due to inconsistent overlapped alignments (failed AMerge)
    TLongHashSet blackList = new TLongHashSet();
    SEARCH_LEFT_PARTS: while (true) {
        int maxOverlap = -1, maxDelta = -1, maxOverlapIndexInList = -1, maxBegin = -1, maxEnd = -1;
        List<KMerInfo> maxOverlapList = null;
        boolean isMaxOverOverlapped = false;
        for (int rFrom = 0; rFrom < stop && rFrom + kValue < rightSeqQ.size(); rFrom++) {
            long kMer = kMer(rightSeqQ.getSequence(), rFrom, kValue);
            List<KMerInfo> match = kToIndexLeft.get(kMer);
            if (match == null)
                continue;
            out: for (int i = 0; i < match.size(); i++) {
                boolean isOverOverlapped = false;
                final VDJCAlignments leftAl = match.get(i).getAlignments();
                if (blackList.contains(leftAl.getAlignmentsIndex()))
                    continue;
                if (// You shall not merge with yourself
                leftAl.getAlignmentsIndex() == rightAl.getAlignmentsIndex() || alreadyMergedIds.contains(leftAl.getAlignmentsIndex()))
                    continue;
                // Checking chains compatibility
                if (!allowChimeras && jChains != null && !leftAl.getAllChains(GeneType.Variable).intersects(jChains))
                    continue;
                // Check for the same V
                if (leftAl.getBestHit(GeneType.Variable) != null && rightAl.getBestHit(GeneType.Variable) != null && !leftAl.hasCommonGenes(GeneType.Variable, rightAl))
                    continue;
                final NucleotideSequence leftSeq = leftAl.getPartitionedTarget(getLeftPartitionedSequence(leftAl)).getSequence().getSequence();
                int lFrom = match.get(i).kMerPositionFrom;
                // begin and end in the coordinates of left
                int delta, begin = delta = lFrom - rFrom;
                if (begin < 0) {
                    begin = 0;
                    isOverOverlapped = true;
                }
                int end = leftSeq.size();
                if (end >= rightSeq.size() + delta) {
                    end = rightSeq.size() + delta;
                    isOverOverlapped = true;
                }
                for (int j = begin; j < end; j++) if (leftSeq.codeAt(j) != rightSeq.codeAt(j - delta))
                    continue out;
                int overlap = end - begin;
                if (maxOverlap < overlap) {
                    maxOverlap = overlap;
                    maxOverlapList = match;
                    maxOverlapIndexInList = i;
                    maxDelta = delta;
                    maxBegin = begin;
                    maxEnd = end;
                    isMaxOverOverlapped = isOverOverlapped;
                }
            }
        }
        if (maxOverlapList == null)
            return null;
        if (maxOverlap < minimalAssembleOverlap)
            return null;
        final KMerInfo left = maxOverlapList.remove(maxOverlapIndexInList);
        VDJCAlignments leftAl = left.alignments;
        // final long readId = rightAl.getReadId();
        ArrayList<AlignedTarget> leftTargets = extractAlignedTargets(leftAl);
        ArrayList<AlignedTarget> rightTargets = extractAlignedTargets(rightAl);
        AlignedTarget leftCentral = leftTargets.get(left.targetId);
        AlignedTarget rightCentral = rightTargets.get(rightTargetId);
        AlignedTarget central = targetMerger.merge(leftCentral, rightCentral, maxDelta, SequenceHistory.OverlapType.CDR3Overlap, 0);
        // Setting overlap position
        central = AlignedTarget.setOverlapRange(central, maxBegin, maxEnd);
        final List<AlignedTarget> leftDescriptors = new ArrayList<>(2), rightDescriptors = new ArrayList<>(2);
        for (int i = 0; i < left.targetId; ++i) leftDescriptors.add(leftTargets.get(i));
        for (int i = left.targetId + 1; i < leftAl.numberOfTargets(); ++i) rightDescriptors.add(leftTargets.get(i));
        for (int i = 0; i < rightTargetId; ++i) leftDescriptors.add(rightTargets.get(i));
        for (int i = rightTargetId + 1; i < rightAl.numberOfTargets(); ++i) rightDescriptors.add(rightTargets.get(i));
        // Merging to VJ junction
        List<AlignedTarget>[] allDescriptors = new List[] { leftDescriptors, rightDescriptors };
        TargetMerger.TargetMergingResult bestResult = TargetMerger.FAILED_RESULT;
        int bestI;
        // Trying to merge left and right reads to central one
        for (List<AlignedTarget> descriptors : allDescriptors) do {
            bestI = -1;
            for (int i = 0; i < descriptors.size(); i++) {
                TargetMerger.TargetMergingResult result = targetMerger.merge(descriptors.get(i), central);
                if (result.failedDueInconsistentAlignments()) {
                    // Inconsistent alignments -> retry
                    blackList.add(leftAl.getAlignmentsIndex());
                    continue SEARCH_LEFT_PARTS;
                }
                if (bestResult.getScore() < result.getScore()) {
                    bestResult = result;
                    bestI = i;
                }
            }
            if (bestI != -1) {
                assert bestResult != TargetMerger.FAILED_RESULT;
                central = bestResult.getResult();
                descriptors.remove(bestI);
            }
        } while (bestI != -1);
        // Merging left+left / right+right
        outer: for (int d = 0; d < allDescriptors.length; d++) {
            List<AlignedTarget> descriptors = allDescriptors[d];
            for (int i = 0; i < descriptors.size(); i++) for (int j = i + 1; j < descriptors.size(); j++) {
                TargetMerger.TargetMergingResult result = targetMerger.merge(descriptors.get(i), descriptors.get(j));
                if (result.failedDueInconsistentAlignments()) {
                    // Inconsistent alignments -> retry
                    blackList.add(leftAl.getAlignmentsIndex());
                    continue SEARCH_LEFT_PARTS;
                }
                if (result.isSuccessful()) {
                    descriptors.set(i, result.getResult());
                    descriptors.remove(j);
                    --d;
                    continue outer;
                }
            }
        }
        if (isMaxOverOverlapped)
            overoverlapped.incrementAndGet();
        // Creating pre-list of resulting targets
        List<AlignedTarget> result = new ArrayList<>();
        result.addAll(leftDescriptors);
        result.add(central);
        result.addAll(rightDescriptors);
        // Ordering and filtering final targets
        return new OverlapSearchResult(maxOverlapList, left, AlignedTarget.orderTargets(result));
    }
}
Also used : ArrayList(java.util.ArrayList) NSequenceWithQuality(com.milaboratory.core.sequence.NSequenceWithQuality) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence) TLongHashSet(gnu.trove.set.hash.TLongHashSet) ArrayList(java.util.ArrayList) List(java.util.List)

Example 50 with NucleotideSequence

use of com.milaboratory.core.sequence.NucleotideSequence in project mixcr by milaboratory.

the class SingleDAligner method align0.

List<PreVDJCHit> align0(NucleotideSequence sequence, Chains chains, int from, int to) {
    if (from > to)
        throw new IllegalArgumentException("from > to. from = " + from + " to = " + to + " .");
    NucleotideSequence key = sequence.getRange(from, to);
    try {
        List<PreVDJCHit> cachedResult = resultsCache.get(key);
        List<PreVDJCHit> result = new ArrayList<>(cachedResult.size());
        PreVDJCHit h;
        for (PreVDJCHit hit : cachedResult) {
            // filter non-possible chains
            if (!chains.intersects(sequences.get(hit.id).chains))
                continue;
            result.add(h = convert(hit, from));
            assert sequence.getRange(h.alignment.getSequence2Range()).equals(h.alignment.getRelativeMutations().mutate(sequences.get(h.id).sequence.getRange(h.alignment.getSequence1Range())));
        }
        cutToScore(result);
        return result;
    } catch (ExecutionException e) {
        throw new RuntimeException(e);
    }
}
Also used : NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence) ArrayList(java.util.ArrayList) ExecutionException(java.util.concurrent.ExecutionException)

Aggregations

NucleotideSequence (com.milaboratory.core.sequence.NucleotideSequence)81 Test (org.junit.Test)32 Range (com.milaboratory.core.Range)19 VDJCGene (io.repseq.core.VDJCGene)15 GeneType (io.repseq.core.GeneType)14 NSequenceWithQuality (com.milaboratory.core.sequence.NSequenceWithQuality)13 GeneFeature (io.repseq.core.GeneFeature)9 VDJCAlignments (com.milaboratory.mixcr.basictypes.VDJCAlignments)8 VDJCHit (com.milaboratory.mixcr.basictypes.VDJCHit)8 ReferencePoint (io.repseq.core.ReferencePoint)7 VDJCLibrary (io.repseq.core.VDJCLibrary)7 ArrayList (java.util.ArrayList)7 VDJCAlignerParameters (com.milaboratory.mixcr.vdjaligners.VDJCAlignerParameters)6 AlignmentHit (com.milaboratory.core.alignment.batch.AlignmentHit)5 PairedRead (com.milaboratory.core.io.sequence.PairedRead)5 AminoAcidSequence (com.milaboratory.core.sequence.AminoAcidSequence)5 Path (java.nio.file.Path)5 Alignment (com.milaboratory.core.alignment.Alignment)4 Pattern (java.util.regex.Pattern)4 AlignmentHelper (com.milaboratory.core.alignment.AlignmentHelper)3