Search in sources :

Example 6 with VDJCAlignments

use of com.milaboratory.mixcr.basictypes.VDJCAlignments in project mixcr by milaboratory.

the class VDJCAlignerSTest method testSerialization1.

@Test
public void testSerialization1() throws Exception {
    VDJCAlignerParameters parameters = VDJCParametersPresets.getByName("default");
    // LociLibrary ll = LociLibraryManager.getDefault().getLibrary("mi");
    ByteArrayOutputStream bos = new ByteArrayOutputStream();
    List<VDJCAlignments> alignemntsList = new ArrayList<>();
    int header;
    try (SingleFastqReader reader = new SingleFastqReader(VDJCAlignerSTest.class.getClassLoader().getResourceAsStream("sequences/sample_IGH_R1.fastq"), true)) {
        VDJCAlignerS aligner = new VDJCAlignerS(parameters);
        for (VDJCGene gene : VDJCLibraryRegistry.getDefault().getLibrary("default", "hs").getGenes(Chains.IGH)) if (parameters.containsRequiredFeature(gene))
            aligner.addGene(gene);
        try (VDJCAlignmentsWriter writer = new VDJCAlignmentsWriter(bos)) {
            writer.header(aligner);
            header = bos.size();
            for (SingleRead read : CUtils.it(reader)) {
                VDJCAlignmentResult<SingleRead> result = aligner.process(read);
                if (result.alignment != null) {
                    writer.write(result.alignment);
                    alignemntsList.add(result.alignment);
                }
            }
        }
    }
    Assert.assertTrue(alignemntsList.size() > 10);
    System.out.println("Bytes per alignment: " + (bos.size() - header) / alignemntsList.size());
    try (VDJCAlignmentsReader reader = new VDJCAlignmentsReader(new ByteArrayInputStream(bos.toByteArray()))) {
        int i = 0;
        for (VDJCAlignments alignments : CUtils.it(reader)) Assert.assertEquals(alignemntsList.get(i++), alignments);
    }
}
Also used : SingleFastqReader(com.milaboratory.core.io.sequence.fastq.SingleFastqReader) VDJCAlignmentsReader(com.milaboratory.mixcr.basictypes.VDJCAlignmentsReader) ArrayList(java.util.ArrayList) VDJCAlignmentsWriter(com.milaboratory.mixcr.basictypes.VDJCAlignmentsWriter) ByteArrayOutputStream(java.io.ByteArrayOutputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) VDJCGene(io.repseq.core.VDJCGene) SingleRead(com.milaboratory.core.io.sequence.SingleRead) VDJCAlignments(com.milaboratory.mixcr.basictypes.VDJCAlignments) Test(org.junit.Test)

Example 7 with VDJCAlignments

use of com.milaboratory.mixcr.basictypes.VDJCAlignments in project mixcr by milaboratory.

the class VDJCAlignerWithMergeTest method test1.

@Test
public void test1() throws Exception {
    VDJCAlignerParameters parameters = VDJCParametersPresets.getByName("default");
    ByteArrayOutputStream bos = new ByteArrayOutputStream();
    List<VDJCAlignments> alignemntsList = new ArrayList<>();
    int header;
    int total = 0;
    int leftHit = 0;
    try (PairedFastqReader reader = new PairedFastqReader(VDJCAlignerSTest.class.getClassLoader().getResourceAsStream("sequences/sample_IGH_R1.fastq"), VDJCAlignerSTest.class.getClassLoader().getResourceAsStream("sequences/sample_IGH_R2.fastq"), true)) {
        VDJCAlignerWithMerge aligner = new VDJCAlignerWithMerge(parameters);
        for (VDJCGene gene : VDJCLibraryRegistry.getDefault().getLibrary("default", "hs").getGenes(Chains.IGH)) {
            if (parameters.containsRequiredFeature(gene))
                aligner.addGene(gene);
        }
        for (PairedRead read : CUtils.it(reader)) {
            ++total;
            VDJCAlignmentResult<PairedRead> result = aligner.process(read);
            if (result.alignment != null) {
                alignemntsList.add(result.alignment);
                for (VDJCHit hit : result.alignment.getHits(GeneType.Variable)) if (hit.getAlignment(0) != null && hit.getAlignment(1) != null)
                    ++leftHit;
            }
        }
    }
    // for (VDJCAlignments alignments : alignemntsList) {
    // for (int i = 0; i < alignments.numberOfTargets(); i++) {
    // System.out.println(VDJCAlignmentsFormatter.getTargetAsMultiAlignment(alignments, i));
    // }
    // }
    System.out.println(alignemntsList.size());
    System.out.println(total);
    System.out.println(leftHit);
    Assert.assertTrue(alignemntsList.size() > 10);
// System.out.println("Bytes per alignment: " + (bos.size() - header) / alignemntsList.size());
// 
// try (VDJCAlignmentsReader reader = new VDJCAlignmentsReader(new ByteArrayInputStream(bos.toByteArray()), ll)) {
// int i = 0;
// for (VDJCAlignments alignments : CUtils.it(reader))
// Assert.assertEquals(alignemntsList.get(i++), alignments);
// }
}
Also used : ArrayList(java.util.ArrayList) VDJCGene(io.repseq.core.VDJCGene) ByteArrayOutputStream(java.io.ByteArrayOutputStream) PairedRead(com.milaboratory.core.io.sequence.PairedRead) VDJCAlignments(com.milaboratory.mixcr.basictypes.VDJCAlignments) PairedFastqReader(com.milaboratory.core.io.sequence.fastq.PairedFastqReader) VDJCHit(com.milaboratory.mixcr.basictypes.VDJCHit) Test(org.junit.Test)

Example 8 with VDJCAlignments

use of com.milaboratory.mixcr.basictypes.VDJCAlignments in project mixcr by milaboratory.

the class PartialAlignmentsAssemblerAligner method process0.

@Override
@SuppressWarnings("unchecked")
protected VDJCAlignmentResult<VDJCMultiRead> process0(VDJCMultiRead input) {
    final int nReads = input.numberOfReads();
    EnumMap<GeneType, VDJCHit[]> vdjcHits = new EnumMap<>(GeneType.class);
    NSequenceWithQuality[] targets = new NSequenceWithQuality[nReads];
    Chains currentChains = Chains.ALL;
    // Across all gene types
    int lastAlignedTarget = 0;
    int firstJTarget = -1;
    int lastVTarget = -1;
    for (int g = 0; g < GeneType.VJC_REFERENCE.length; g++) {
        GeneType gt = GeneType.VJC_REFERENCE[g];
        AlignmentHit<NucleotideSequence, VDJCGene>[][] alignmentHits = new AlignmentHit[nReads][];
        Arrays.fill(alignmentHits, new AlignmentHit[0]);
        for (int targetId = lastAlignedTarget; targetId < nReads; targetId++) {
            targets[targetId] = input.getRead(targetId).getData();
            final NucleotideSequence sequence = input.getRead(targetId).getData().getSequence();
            AlignmentResult<AlignmentHit<NucleotideSequence, VDJCGene>> als;
            final BatchAlignerWithBaseWithFilter<NucleotideSequence, VDJCGene, AlignmentHit<NucleotideSequence, VDJCGene>> aligner = getAligner(gt);
            if (aligner != null) {
                int pointer = 0;
                if (g != 0) {
                    // Not V gene
                    VDJCHit[] vdjcHits1 = vdjcHits.get(GeneType.VJC_REFERENCE[g - 1]);
                    Alignment<NucleotideSequence> alignment;
                    if (vdjcHits1.length != 0 && (alignment = vdjcHits1[0].getAlignment(targetId)) != null)
                        pointer = alignment.getSequence2Range().getTo();
                }
                als = aligner.align(sequence, pointer, sequence.size(), getFilter(gt, currentChains));
                if (als != null && als.hasHits()) {
                    lastAlignedTarget = targetId;
                    if (// V
                    g == 0)
                        lastVTarget = targetId;
                    if (// J
                    g == 1)
                        firstJTarget = targetId;
                    alignmentHits[targetId] = als.getHits().toArray(new AlignmentHit[als.getHits().size()]);
                }
            }
        }
        Chains chains = Chains.EMPTY;
        for (AlignmentHit<NucleotideSequence, VDJCGene>[] alignmentHit0 : alignmentHits) if (alignmentHit0 != null)
            for (AlignmentHit<NucleotideSequence, VDJCGene> hit : alignmentHit0) chains = chains.merge(hit.getRecordPayload().getChains());
        currentChains = currentChains.intersection(chains);
        vdjcHits.put(gt, combine(parameters.getFeatureToAlign(gt), alignmentHits));
    }
    boolean fineVAlignmentPerformed = false, fineJAlignmentPerformed = false;
    // Additional (fine) alignment step for V gene
    VDJCHit[] vHits = vdjcHits.get(GeneType.Variable);
    final AlignmentScoring<NucleotideSequence> vScoring = parameters.getVAlignerParameters().getParameters().getScoring();
    if (// TODO implement AffineGapAlignmentScoring
    vHits != null && vHits.length > 0 && !(vScoring instanceof AffineGapAlignmentScoring) && vdjcHits.get(GeneType.Joining) != null && vdjcHits.get(GeneType.Joining).length > 0) {
        int minimalVSpace = getAbsoluteMinScore(parameters.getVAlignerParameters().getParameters()) / vScoring.getMaximalMatchScore();
        // Assert
        if (firstJTarget == -1)
            throw new AssertionError();
        for (int targetId = 1; targetId <= firstJTarget; targetId++) {
            int vSpace;
            final NucleotideSequence sequence2 = targets[targetId].getSequence();
            if (vdjcHits.get(GeneType.Joining)[0].getAlignment(targetId) != null && (vSpace = vdjcHits.get(GeneType.Joining)[0].getAlignment(targetId).getSequence2Range().getFrom()) >= minimalVSpace) {
                for (int vHitIndex = 0; vHitIndex < vHits.length; vHitIndex++) {
                    VDJCHit vHit = vHits[vHitIndex];
                    // Perform fine alignment only if target is not already aligned by fast aligner
                    if (vHit.getAlignment(targetId) != null)
                        continue;
                    Alignment<NucleotideSequence> leftAlignment = vHit.getAlignment(targetId - 1);
                    if (leftAlignment == null)
                        continue;
                    final NucleotideSequence sequence1 = leftAlignment.getSequence1();
                    final int beginFR3 = vHit.getGene().getPartitioning().getRelativePosition(parameters.getFeatureToAlign(GeneType.Variable), ReferencePoint.FR3Begin);
                    if (beginFR3 == -1)
                        continue;
                    final Alignment alignment = AlignerCustom.alignLinearSemiLocalLeft0((LinearGapAlignmentScoring<NucleotideSequence>) vScoring, sequence1, sequence2, beginFR3, sequence1.size() - beginFR3, 0, vSpace, false, true, NucleotideSequence.ALPHABET, linearMatrixCache.get());
                    if (alignment.getScore() < getAbsoluteMinScore(parameters.getVAlignerParameters().getParameters()))
                        continue;
                    fineVAlignmentPerformed = true;
                    vHits[vHitIndex] = vHit.setAlignment(targetId, alignment);
                }
            }
        }
    }
    Arrays.sort(vHits);
    vdjcHits.put(GeneType.Variable, cutRelativeScore(vHits, parameters.getVAlignerParameters().getRelativeMinScore(), parameters.getVAlignerParameters().getParameters().getMaxHits()));
    // Additional (fine) alignment step for J gene
    VDJCHit[] jHits = vdjcHits.get(GeneType.Joining);
    final AlignmentScoring<NucleotideSequence> jScoring = parameters.getJAlignerParameters().getParameters().getScoring();
    if (// TODO implement AffineGapAlignmentScoring
    jHits != null && jHits.length > 0 && !(jScoring instanceof AffineGapAlignmentScoring) && vdjcHits.get(GeneType.Variable) != null && vdjcHits.get(GeneType.Variable).length > 0) {
        int minimalJSpace = getAbsoluteMinScore(parameters.getJAlignerParameters().getParameters()) / jScoring.getMaximalMatchScore();
        // Assert
        if (lastVTarget == -1)
            throw new AssertionError();
        for (int targetId = lastVTarget; targetId < nReads - 1; targetId++) {
            int jSpaceBegin;
            final NucleotideSequence sequence2 = targets[targetId].getSequence();
            if (vdjcHits.get(GeneType.Variable)[0].getAlignment(targetId) != null && (sequence2.size() - (jSpaceBegin = vdjcHits.get(GeneType.Variable)[0].getAlignment(targetId).getSequence2Range().getTo())) >= minimalJSpace) {
                for (int jHitIndex = 0; jHitIndex < jHits.length; jHitIndex++) {
                    VDJCHit jHit = jHits[jHitIndex];
                    // Perform fine alignment only if target is not already aligned by fast aligner
                    if (jHit.getAlignment(targetId) != null)
                        continue;
                    Alignment<NucleotideSequence> rightAlignment = jHit.getAlignment(targetId + 1);
                    if (rightAlignment == null)
                        continue;
                    final NucleotideSequence sequence1 = rightAlignment.getSequence1();
                    final Alignment alignment = AlignerCustom.alignLinearSemiLocalRight0((LinearGapAlignmentScoring) jScoring, sequence1, sequence2, 0, sequence1.size(), jSpaceBegin, sequence2.size() - jSpaceBegin, false, true, NucleotideSequence.ALPHABET, linearMatrixCache.get());
                    if (alignment.getScore() < getAbsoluteMinScore(parameters.getJAlignerParameters().getParameters()))
                        continue;
                    fineJAlignmentPerformed = true;
                    jHits[jHitIndex] = jHit.setAlignment(targetId, alignment);
                }
            }
        }
    }
    Arrays.sort(jHits);
    vdjcHits.put(GeneType.Joining, cutRelativeScore(jHits, parameters.getJAlignerParameters().getRelativeMinScore(), parameters.getJAlignerParameters().getParameters().getMaxHits()));
    int dGeneTarget = -1;
    VDJCHit[] vResult = vdjcHits.get(GeneType.Variable);
    VDJCHit[] jResult = vdjcHits.get(GeneType.Joining);
    if (vResult.length != 0 && jResult.length != 0)
        for (int i = 0; i < nReads; i++) if (vResult[0].getAlignment(i) != null && jResult[0].getAlignment(i) != null) {
            dGeneTarget = i;
            break;
        }
    // if (fineVAlignmentPerformed && fineJAlignmentPerformed)
    // System.out.println("sd");
    VDJCHit[] dResult;
    if (dGeneTarget == -1)
        dResult = new VDJCHit[0];
    else {
        final Alignment<NucleotideSequence> vAl = vResult[0].getAlignment(dGeneTarget);
        final Alignment<NucleotideSequence> jAl = jResult[0].getAlignment(dGeneTarget);
        if (vAl == null || jAl == null || singleDAligner == null)
            dResult = new VDJCHit[0];
        else
            dResult = singleDAligner.align(targets[dGeneTarget].getSequence(), getPossibleDLoci(vResult, jResult), vAl.getSequence2Range().getTo(), jAl.getSequence2Range().getFrom(), dGeneTarget, nReads);
    }
    final VDJCAlignments alignment = new VDJCAlignments(vResult, dResult, jResult, cutRelativeScore(vdjcHits.get(GeneType.Constant), parameters.getCAlignerParameters().getRelativeMinScore(), parameters.getMaxHits()), targets, input.getHistory(), input.getOriginalReads());
    return new VDJCAlignmentResult<>(input, alignment);
}
Also used : Chains(io.repseq.core.Chains) NSequenceWithQuality(com.milaboratory.core.sequence.NSequenceWithQuality) EnumMap(java.util.EnumMap) VDJCAlignments(com.milaboratory.mixcr.basictypes.VDJCAlignments) VDJCAlignmentResult(com.milaboratory.mixcr.vdjaligners.VDJCAlignmentResult) ReferencePoint(io.repseq.core.ReferencePoint) AlignmentHit(com.milaboratory.core.alignment.batch.AlignmentHit) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence) VDJCGene(io.repseq.core.VDJCGene) GeneType(io.repseq.core.GeneType) VDJCHit(com.milaboratory.mixcr.basictypes.VDJCHit)

Example 9 with VDJCAlignments

use of com.milaboratory.mixcr.basictypes.VDJCAlignments in project mixcr by milaboratory.

the class VDJCAlignerPVFirst method process0.

@Override
protected VDJCAlignmentResult<PairedRead> process0(final PairedRead input) {
    Target[] targets = getTargets(input);
    // Creates helper classes for each PTarget
    PAlignmentHelper[] helpers = createInitialHelpers(targets);
    VDJCAlignmentResult<PairedRead> result = parameters.getAllowPartialAlignments() ? processPartial(input, helpers) : processStrict(input, helpers);
    // if sAligner == null (which means --no-merge option), no merge will be performed
    if (result.alignment != null && sAligner != null) {
        final VDJCAlignments alignment = result.alignment;
        final TargetMerger.TargetMergingResult mergeResult = alignmentsMerger.merge(new AlignedTarget(alignment, 0), new AlignedTarget(alignment, 1), false);
        if (mergeResult.failedDueInconsistentAlignments()) {
            GeneType geneType = mergeResult.getFailedMergedGeneType();
            int removeId = alignment.getBestHit(geneType).getAlignment(0).getScore() > alignment.getBestHit(geneType).getAlignment(1).getScore() ? 1 : 0;
            if (listener != null)
                listener.onTopHitSequenceConflict(input, alignment, geneType);
            return new VDJCAlignmentResult<>(input, alignment.removeBestHitAlignment(geneType, removeId));
        } else if (mergeResult.isSuccessful()) {
            assert mergeResult.isUsingAlignments();
            NSequenceWithQuality alignedTarget = mergeResult.getResult().getTarget();
            SingleRead sRead = new SingleReadImpl(input.getId(), alignedTarget, "");
            VDJCAlignmentResult<SingleRead> sResult = sAligner.process0(sRead);
            if (sResult.alignment == null)
                return result;
            VDJCAlignments sAlignment = sResult.alignment.setHistory(new SequenceHistory[] { new SequenceHistory.Merge(SequenceHistory.OverlapType.AlignmentOverlap, result.alignment.getHistory(0), result.alignment.getHistory(1), mergeResult.getOffset(), mergeResult.getMismatched()) }, new SequenceRead[] { input });
            if (listener != null)
                listener.onSuccessfulAlignmentOverlap(input, sAlignment);
            return new VDJCAlignmentResult<>(input, sAlignment);
        }
    }
    return result;
}
Also used : SingleReadImpl(com.milaboratory.core.io.sequence.SingleReadImpl) TargetMerger(com.milaboratory.mixcr.partialassembler.TargetMerger) SequenceHistory(com.milaboratory.mixcr.basictypes.SequenceHistory) PairedRead(com.milaboratory.core.io.sequence.PairedRead) AlignedTarget(com.milaboratory.mixcr.partialassembler.AlignedTarget) Target(com.milaboratory.core.Target) AlignedTarget(com.milaboratory.mixcr.partialassembler.AlignedTarget) NSequenceWithQuality(com.milaboratory.core.sequence.NSequenceWithQuality) SequenceRead(com.milaboratory.core.io.sequence.SequenceRead) SingleRead(com.milaboratory.core.io.sequence.SingleRead) VDJCAlignments(com.milaboratory.mixcr.basictypes.VDJCAlignments)

Example 10 with VDJCAlignments

use of com.milaboratory.mixcr.basictypes.VDJCAlignments in project mixcr by milaboratory.

the class VDJCAlignerS method processStrict.

private VDJCAlignmentResult<SingleRead> processStrict(SingleRead input) {
    Target[] targets = parameters.getReadsLayout().createTargets(input);
    // Algorithm below relies on this fact
    assert targets.length <= 2;
    boolean anyIsFull = false, anyHasJ = false, anyHasV = false;
    KVJResultsForSingle[] results = new KVJResultsForSingle[targets.length];
    for (int i = 0; i < results.length; i++) {
        results[i] = align(targets[i]);
        results[i].performChainFilteringIfNeeded();
        anyIsFull |= results[i].hasKVAndJHits();
        anyHasJ |= results[i].hasKJHits();
        anyHasV |= results[i].hasKVHits();
    }
    if (!anyIsFull) {
        if (!anyHasJ && !anyHasV)
            onFailedAlignment(input, VDJCAlignmentFailCause.NoHits);
        else if (!anyHasJ)
            onFailedAlignment(input, VDJCAlignmentFailCause.NoJHits);
        else
            onFailedAlignment(input, VDJCAlignmentFailCause.NoVHits);
        return new VDJCAlignmentResult<>(input);
    }
    KVJResultsForSingle topResult = null;
    // Calculating best result
    for (KVJResultsForSingle result : results) if (result.hasKVAndJHits())
        topResult = topResult != null ? null : result;
    if (topResult == null) {
        // Finalizing alignment for both results to determine who is the best
        for (KVJResultsForSingle result : results) {
            result.alignDC();
            if (topResult == null || topResult.sumScore() < result.sumScore())
                topResult = result;
        }
    } else
        // Align C and D genes only for best result
        topResult.alignDC();
    // Checking minimal sum score
    if (topResult.sumScore() < parameters.getMinSumScore()) {
        onFailedAlignment(input, VDJCAlignmentFailCause.LowTotalScore);
        return new VDJCAlignmentResult<>(input);
    }
    // Filtering hits basing on minSumScore
    topResult.calculateHits(parameters.getMinSumScore(), parameters.getMaxHits());
    if (topResult.hasVAndJHits()) {
        VDJCAlignments alignment = topResult.toVDJCAlignments(input.getId(), input);
        onSuccessfulAlignment(input, alignment);
        return new VDJCAlignmentResult<>(input, alignment);
    } else {
        onFailedAlignment(input, VDJCAlignmentFailCause.LowTotalScore);
        return new VDJCAlignmentResult<>(input);
    }
}
Also used : Target(com.milaboratory.core.Target) VDJCAlignments(com.milaboratory.mixcr.basictypes.VDJCAlignments)

Aggregations

VDJCAlignments (com.milaboratory.mixcr.basictypes.VDJCAlignments)29 NucleotideSequence (com.milaboratory.core.sequence.NucleotideSequence)11 Test (org.junit.Test)10 SequenceRead (com.milaboratory.core.io.sequence.SequenceRead)8 NSequenceWithQuality (com.milaboratory.core.sequence.NSequenceWithQuality)8 VDJCHit (com.milaboratory.mixcr.basictypes.VDJCHit)8 PairedRead (com.milaboratory.core.io.sequence.PairedRead)7 VDJCGene (io.repseq.core.VDJCGene)7 SingleReadImpl (com.milaboratory.core.io.sequence.SingleReadImpl)6 VDJCAlignerParameters (com.milaboratory.mixcr.vdjaligners.VDJCAlignerParameters)6 GeneType (io.repseq.core.GeneType)6 Well44497b (org.apache.commons.math3.random.Well44497b)6 Clone (com.milaboratory.mixcr.basictypes.Clone)5 VDJCAlignmentsReader (com.milaboratory.mixcr.basictypes.VDJCAlignmentsReader)5 VDJCAlignmentsWriter (com.milaboratory.mixcr.basictypes.VDJCAlignmentsWriter)5 ArrayList (java.util.ArrayList)5 CUtils (cc.redberry.pipe.CUtils)4 Target (com.milaboratory.core.Target)4 CanReportProgress (com.milaboratory.util.CanReportProgress)4 GeneFeature (io.repseq.core.GeneFeature)4