Search in sources :

Example 11 with NSequenceWithQuality

use of com.milaboratory.core.sequence.NSequenceWithQuality in project mixcr by milaboratory.

the class FullSeqAssemblerTest method test1.

@Test
public void test1() throws Exception {
    int len = 140;
    PairedRead read1 = new PairedRead(new SingleReadImpl(0, new NSequenceWithQuality(masterSeq1WT.getRangeFromCDR3Begin(-20, len)), "R1"), new SingleReadImpl(0, new NSequenceWithQuality(masterSeq1WT.getRangeFromCDR3Begin(-200, len).getReverseComplement()), "R2"));
    PairedRead read2 = new PairedRead(new SingleReadImpl(1, new NSequenceWithQuality(masterSeq1WT.getRangeFromCDR3Begin(-30, len)), "R1"), new SingleReadImpl(1, new NSequenceWithQuality(masterSeq1WT.getRangeFromCDR3Begin(-150, len).getReverseComplement()), "R2"));
    RunMiXCR.RunMiXCRAnalysis params = new RunMiXCR.RunMiXCRAnalysis(read1, read2);
    // [-200, -60]  [-20, 120]
    // [-150, 110]
    // 
    // [-200, -150], [110, 120] = 60
    // [-60, -20] = 40
    params.alignerParameters = VDJCParametersPresets.getByName("rna-seq");
    params.alignerParameters.setSaveOriginalReads(true);
    params.cloneAssemblerParameters.updateFrom(params.alignerParameters);
    RunMiXCR.AlignResult align = RunMiXCR.align(params);
    // for (VDJCAlignments al : align.alignments) {
    // for (int i = 0; i < al.numberOfTargets(); i++)
    // System.out.println(VDJCAlignmentsFormatter.getTargetAsMultiAlignment(al, i));
    // System.out.println();
    // }
    RunMiXCR.AssembleResult assemble = RunMiXCR.assemble(align);
    CloneFactory cloneFactory = new CloneFactory(align.parameters.cloneAssemblerParameters.getCloneFactoryParameters(), align.parameters.cloneAssemblerParameters.getAssemblingFeatures(), align.usedGenes, align.parameters.alignerParameters.getFeaturesToAlignMap());
    FullSeqAssembler agg = new FullSeqAssembler(cloneFactory, DEFAULT_PARAMETERS, assemble.cloneSet.get(0), align.parameters.alignerParameters);
    PointSequence[] r2s = agg.toPointSequences(align.alignments.get(1));
    TIntHashSet p2 = new TIntHashSet(Arrays.stream(r2s).mapToInt(s -> s.point).toArray());
    Assert.assertEquals(261 - masterSeq1WT.cdr3Part, p2.size());
    PointSequence[] r1s = agg.toPointSequences(align.alignments.get(0));
    TIntHashSet p1 = new TIntHashSet(Arrays.stream(r1s).mapToInt(s -> s.point).toArray());
    Assert.assertEquals(281 - masterSeq1WT.cdr3Part, p1.size());
    FullSeqAssembler.RawVariantsData prep = agg.calculateRawData(() -> CUtils.asOutputPort(align.alignments));
    long uniq1 = StreamSupport.stream(CUtils.it(prep.createPort()).spliterator(), false).mapToInt(l -> l[0]).filter(c -> c == 0xFFFFFFFF).count();
    long uniq2 = StreamSupport.stream(CUtils.it(prep.createPort()).spliterator(), false).mapToInt(l -> l[1]).filter(c -> c == 0xFFFFFFFF).count();
    Assert.assertEquals(40, uniq1);
    Assert.assertEquals(60, uniq2);
    for (Clone clone : new CloneSet(Arrays.asList(agg.callVariants(prep))).getClones()) {
        ActionExportClonesPretty.outputCompact(System.out, clone);
        System.out.println();
        System.out.println(" ================================================ ");
        System.out.println();
    }
}
Also used : java.util(java.util) SequenceRead(com.milaboratory.core.io.sequence.SequenceRead) Well44497b(org.apache.commons.math3.random.Well44497b) SequenceQuality(com.milaboratory.core.sequence.SequenceQuality) Clone(com.milaboratory.mixcr.basictypes.Clone) GeneFeature(io.repseq.core.GeneFeature) CloneFactory(com.milaboratory.mixcr.assembler.CloneFactory) Main(com.milaboratory.mixcr.cli.Main) StreamSupport(java.util.stream.StreamSupport) PairedRead(com.milaboratory.core.io.sequence.PairedRead) RunMiXCR(com.milaboratory.mixcr.util.RunMiXCR) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence) VDJCAlignmentsFormatter(com.milaboratory.mixcr.basictypes.VDJCAlignmentsFormatter) CUtils(cc.redberry.pipe.CUtils) Test(org.junit.Test) Collectors(java.util.stream.Collectors) TIntHashSet(gnu.trove.set.hash.TIntHashSet) RandomDataGenerator(org.apache.commons.math3.random.RandomDataGenerator) Well19937c(org.apache.commons.math3.random.Well19937c) CloneSet(com.milaboratory.mixcr.basictypes.CloneSet) SingleReadImpl(com.milaboratory.core.io.sequence.SingleReadImpl) GeneType(io.repseq.core.GeneType) NSequenceWithQuality(com.milaboratory.core.sequence.NSequenceWithQuality) ActionExportClonesPretty(com.milaboratory.mixcr.cli.ActionExportClonesPretty) VDJCParametersPresets(com.milaboratory.mixcr.vdjaligners.VDJCParametersPresets) Assert(org.junit.Assert) SequenceReaderCloseable(com.milaboratory.core.io.sequence.SequenceReaderCloseable) VDJCAlignments(com.milaboratory.mixcr.basictypes.VDJCAlignments) SingleReadImpl(com.milaboratory.core.io.sequence.SingleReadImpl) PairedRead(com.milaboratory.core.io.sequence.PairedRead) TIntHashSet(gnu.trove.set.hash.TIntHashSet) CloneSet(com.milaboratory.mixcr.basictypes.CloneSet) NSequenceWithQuality(com.milaboratory.core.sequence.NSequenceWithQuality) RunMiXCR(com.milaboratory.mixcr.util.RunMiXCR) CloneFactory(com.milaboratory.mixcr.assembler.CloneFactory) Clone(com.milaboratory.mixcr.basictypes.Clone) Test(org.junit.Test)

Example 12 with NSequenceWithQuality

use of com.milaboratory.core.sequence.NSequenceWithQuality in project mixcr by milaboratory.

the class FullSeqAssemblerTest method testLargeCloneNoMismatches.

@Test
public void testLargeCloneNoMismatches() throws Exception {
    MasterSequence master = FullSeqAssemblerTest.masterSeq1WT;
    NSequenceWithQuality seq = new NSequenceWithQuality(master.getRange(-master.vPart + 10, 80), SequenceQuality.GOOD_QUALITY_VALUE);
    RunMiXCR.RunMiXCRAnalysis params0 = new RunMiXCR.RunMiXCRAnalysis(new SingleReadImpl(0, seq, ""));
    params0.cloneAssemblerParameters.setAssemblingFeatures(new GeneFeature[] { GeneFeature.VDJRegion });
    Clone largeClone = RunMiXCR.assemble(RunMiXCR.align(params0)).cloneSet.get(0);
    // ActionExportClonesPretty.outputCompact(System.out, largeClone);
    // System.exit(0);
    Well44497b rnd = new Well44497b(1234567889L);
    int nReads = 100_000;
    int readLength = 75, readGap = 150;
    // slice seq randomly
    PairedRead[] slicedReads = new PairedRead[nReads];
    for (int i = 0; i < nReads; ++i) {
        int r1from = rnd.nextInt(seq.size() - readLength - 1), r1to = r1from + readLength, r2from = r1from + 1 + rnd.nextInt(seq.size() - r1from - readLength - 1), r2to = r2from + readLength;
        assert r2from > r1from;
        slicedReads[i] = new PairedRead(new SingleReadImpl(i, seq.getRange(r1from, r1to), "" + i), new SingleReadImpl(i, seq.getRange(r2from, r2to).getReverseComplement(), "" + i));
    }
    RunMiXCR.RunMiXCRAnalysis params = new RunMiXCR.RunMiXCRAnalysis(slicedReads);
    // params.alignerParameters = VDJCParametersPresets.getByName("rna-seq");
    params.alignerParameters.setSaveOriginalReads(true);
    RunMiXCR.AlignResult align = RunMiXCR.align(params);
    RunMiXCR.AssembleResult assemble = RunMiXCR.assemble(align);
    // for (VDJCAlignments al : align.alignments) {
    // for (int i = 0; i < al.numberOfTargets(); i++) {
    // System.out.println(VDJCAlignmentsFormatter.getTargetAsMultiAlignment(al, i));
    // System.out.println();
    // }
    // System.out.println();
    // System.out.println(" ================================================ ");
    // System.out.println();
    // }
    Assert.assertEquals(1, assemble.cloneSet.size());
    Clone initialClone = assemble.cloneSet.get(0);
    NSequenceWithQuality cdr3 = initialClone.getFeature(GeneFeature.CDR3);
    List<VDJCAlignments> alignments = align.alignments.stream().filter(al -> cdr3.equals(al.getFeature(GeneFeature.CDR3))).collect(Collectors.toList());
    alignments.stream().filter(al -> Arrays.stream(al.getBestHit(GeneType.Variable).getAlignments()).filter(Objects::nonNull).anyMatch(a -> !a.getAbsoluteMutations().isEmpty())).filter(al -> al.getBestHit(GeneType.Variable).getGene().getName().contains("3-74")).forEach(al -> {
        for (int i = 0; i < al.numberOfTargets(); i++) {
            System.out.println(VDJCAlignmentsFormatter.getTargetAsMultiAlignment(al, i));
            System.out.println();
        }
        System.out.println();
        System.out.println(" ================================================ ");
        System.out.println();
    });
    // System.exit(0);
    System.out.println("=> Agg");
    CloneFactory cloneFactory = new CloneFactory(align.parameters.cloneAssemblerParameters.getCloneFactoryParameters(), align.parameters.cloneAssemblerParameters.getAssemblingFeatures(), align.usedGenes, align.parameters.alignerParameters.getFeaturesToAlignMap());
    FullSeqAssembler agg = new FullSeqAssembler(cloneFactory, DEFAULT_PARAMETERS, initialClone, align.parameters.alignerParameters);
    FullSeqAssembler.RawVariantsData prep = agg.calculateRawData(() -> CUtils.asOutputPort(alignments));
    List<Clone> clones = new ArrayList<>(new CloneSet(Arrays.asList(agg.callVariants(prep))).getClones());
    clones.sort(Comparator.comparingDouble(Clone::getCount).reversed());
    for (Clone clone : clones) {
        ActionExportClonesPretty.outputCompact(System.out, clone);
        System.out.println();
        System.out.println(" ================================================ ");
        System.out.println();
    }
}
Also used : java.util(java.util) SequenceRead(com.milaboratory.core.io.sequence.SequenceRead) Well44497b(org.apache.commons.math3.random.Well44497b) SequenceQuality(com.milaboratory.core.sequence.SequenceQuality) Clone(com.milaboratory.mixcr.basictypes.Clone) GeneFeature(io.repseq.core.GeneFeature) CloneFactory(com.milaboratory.mixcr.assembler.CloneFactory) Main(com.milaboratory.mixcr.cli.Main) StreamSupport(java.util.stream.StreamSupport) PairedRead(com.milaboratory.core.io.sequence.PairedRead) RunMiXCR(com.milaboratory.mixcr.util.RunMiXCR) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence) VDJCAlignmentsFormatter(com.milaboratory.mixcr.basictypes.VDJCAlignmentsFormatter) CUtils(cc.redberry.pipe.CUtils) Test(org.junit.Test) Collectors(java.util.stream.Collectors) TIntHashSet(gnu.trove.set.hash.TIntHashSet) RandomDataGenerator(org.apache.commons.math3.random.RandomDataGenerator) Well19937c(org.apache.commons.math3.random.Well19937c) CloneSet(com.milaboratory.mixcr.basictypes.CloneSet) SingleReadImpl(com.milaboratory.core.io.sequence.SingleReadImpl) GeneType(io.repseq.core.GeneType) NSequenceWithQuality(com.milaboratory.core.sequence.NSequenceWithQuality) ActionExportClonesPretty(com.milaboratory.mixcr.cli.ActionExportClonesPretty) VDJCParametersPresets(com.milaboratory.mixcr.vdjaligners.VDJCParametersPresets) Assert(org.junit.Assert) SequenceReaderCloseable(com.milaboratory.core.io.sequence.SequenceReaderCloseable) VDJCAlignments(com.milaboratory.mixcr.basictypes.VDJCAlignments) SingleReadImpl(com.milaboratory.core.io.sequence.SingleReadImpl) PairedRead(com.milaboratory.core.io.sequence.PairedRead) CloneSet(com.milaboratory.mixcr.basictypes.CloneSet) NSequenceWithQuality(com.milaboratory.core.sequence.NSequenceWithQuality) Well44497b(org.apache.commons.math3.random.Well44497b) RunMiXCR(com.milaboratory.mixcr.util.RunMiXCR) VDJCAlignments(com.milaboratory.mixcr.basictypes.VDJCAlignments) CloneFactory(com.milaboratory.mixcr.assembler.CloneFactory) Clone(com.milaboratory.mixcr.basictypes.Clone) Test(org.junit.Test)

Example 13 with NSequenceWithQuality

use of com.milaboratory.core.sequence.NSequenceWithQuality in project mixcr by milaboratory.

the class ClonalSequenceTest method createRandomTestDara.

private TestData createRandomTestDara(int size, RandomGenerator random) {
    MutationsBuilder<NucleotideSequence> builder = new MutationsBuilder<>(NucleotideSequence.ALPHABET);
    NSequenceWithQuality[] c1 = new NSequenceWithQuality[size];
    NSequenceWithQuality[] c2 = new NSequenceWithQuality[size];
    int offset = 0;
    for (int i = 0; i < size; ++i) {
        NucleotideSequence s1 = TestUtil.randomSequence(NucleotideSequence.ALPHABET, random, 5, 15);
        Mutations<NucleotideSequence> muts = MutationsGenerator.generateMutations(s1, MutationModels.getEmpiricalNucleotideMutationModel(), 1, s1.size() - 1);
        NucleotideSequence s2 = muts.mutate(s1);
        c1[i] = new NSequenceWithQuality(s1, SequenceQuality.getUniformQuality((byte) 0, s1.size()));
        c2[i] = new NSequenceWithQuality(s2, SequenceQuality.getUniformQuality((byte) 0, s2.size()));
        builder.append(muts.move(offset));
        offset += s1.size();
    }
    return new TestData(new ClonalSequence(c1), new ClonalSequence(c2), builder.createAndDestroy());
}
Also used : NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence) NSequenceWithQuality(com.milaboratory.core.sequence.NSequenceWithQuality) MutationsBuilder(com.milaboratory.core.mutations.MutationsBuilder)

Example 14 with NSequenceWithQuality

use of com.milaboratory.core.sequence.NSequenceWithQuality in project mixcr by milaboratory.

the class ClonalSequenceTest method create.

private ClonalSequence create(String... strings) {
    NSequenceWithQuality[] data = new NSequenceWithQuality[strings.length];
    for (int i = 0; i < strings.length; ++i) {
        NucleotideSequence s = new NucleotideSequence(strings[i]);
        SequenceQuality q = SequenceQuality.getUniformQuality((byte) 0, s.size());
        data[i] = new NSequenceWithQuality(s, q);
    }
    return new ClonalSequence(data);
}
Also used : SequenceQuality(com.milaboratory.core.sequence.SequenceQuality) NSequenceWithQuality(com.milaboratory.core.sequence.NSequenceWithQuality) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence)

Example 15 with NSequenceWithQuality

use of com.milaboratory.core.sequence.NSequenceWithQuality in project mixcr by milaboratory.

the class TargetMerger method merge.

@SuppressWarnings("unchecked")
public AlignedTarget merge(AlignedTarget targetLeft, AlignedTarget targetRight, int offset, OverlapType overlapType, int nMismatches) {
    if (offset < 0)
        return merge(targetRight, targetLeft, -offset, overlapType, nMismatches);
    final NSequenceWithQuality mergedTarget = merger.overlap(targetLeft.getTarget(), targetRight.getTarget(), offset);
    EnumMap<GeneType, VDJCHit[]> result = new EnumMap<>(GeneType.class);
    for (GeneType geneType : GeneType.VJC_REFERENCE) {
        final BatchAlignerWithBaseParameters bp = ((KGeneAlignmentParameters) alignerParameters.getGeneAlignerParameters(geneType)).getParameters();
        final VDJCHit[] leftHits = targetLeft.getAlignments().getHits(geneType);
        final VDJCHit[] rightHits = targetRight.getAlignments().getHits(geneType);
        GeneFeature alignedFeature = leftHits.length == 0 ? rightHits.length == 0 ? null : rightHits[0].getAlignedFeature() : leftHits[0].getAlignedFeature();
        Map<VDJCGeneId, HitMappingRecord> map = extractHitsMapping(targetLeft, targetRight, geneType);
        ArrayList<VDJCHit> resultingHits = new ArrayList<>();
        for (Map.Entry<VDJCGeneId, HitMappingRecord> mE : map.entrySet()) {
            final VDJCGene gene = mE.getValue().gene;
            Alignment<NucleotideSequence> mergedAl = merge(bp.getScoring(), extractBandedWidth(bp), mergedTarget.getSequence(), offset, mE.getValue().alignments[0], mE.getValue().alignments[1]);
            resultingHits.add(new VDJCHit(gene, mergedAl, alignedFeature));
        }
        Collections.sort(resultingHits);
        // final float relativeMinScore = extractRelativeMinScore(bp);
        // int threshold = (int) (resultingHits.size() > 0 ? resultingHits.get(0).getScore() * relativeMinScore : 0);
        // for (int i = resultingHits.size() - 1; i > 0; --i)
        // if (resultingHits.get(i).getScore() < threshold)
        // resultingHits.remove(i);
        result.put(geneType, resultingHits.toArray(new VDJCHit[resultingHits.size()]));
    }
    VDJCAlignments alignments = new VDJCAlignments(result, new NSequenceWithQuality[] { mergedTarget }, new SequenceHistory[] { new SequenceHistory.Merge(overlapType, targetLeft.getHistory(), targetRight.getHistory(), offset, nMismatches) }, VDJCAlignments.mergeOriginalReads(targetLeft.getAlignments(), targetRight.getAlignments()));
    AlignedTarget resultTarget = new AlignedTarget(alignments, 0);
    for (BPoint bPoint : BPoint.values()) {
        int leftPoint = targetLeft.getBPoint(bPoint);
        int rightPoint = targetRight.getBPoint(bPoint);
        if (leftPoint != -1 && rightPoint != -1)
            throw new IllegalArgumentException("Same bPoint defined in both input targets.");
        else if (leftPoint != -1)
            resultTarget = resultTarget.setBPoint(bPoint, leftPoint);
        else if (rightPoint != -1)
            resultTarget = resultTarget.setBPoint(bPoint, offset + rightPoint);
    }
    return resultTarget;
}
Also used : GeneFeature(io.repseq.core.GeneFeature) SequenceHistory(com.milaboratory.mixcr.basictypes.SequenceHistory) VDJCGeneId(io.repseq.core.VDJCGeneId) NSequenceWithQuality(com.milaboratory.core.sequence.NSequenceWithQuality) VDJCAlignments(com.milaboratory.mixcr.basictypes.VDJCAlignments) BatchAlignerWithBaseParameters(com.milaboratory.core.alignment.batch.BatchAlignerWithBaseParameters) KGeneAlignmentParameters(com.milaboratory.mixcr.vdjaligners.KGeneAlignmentParameters) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence) VDJCGene(io.repseq.core.VDJCGene) GeneType(io.repseq.core.GeneType) VDJCHit(com.milaboratory.mixcr.basictypes.VDJCHit)

Aggregations

NSequenceWithQuality (com.milaboratory.core.sequence.NSequenceWithQuality)21 NucleotideSequence (com.milaboratory.core.sequence.NucleotideSequence)16 VDJCAlignments (com.milaboratory.mixcr.basictypes.VDJCAlignments)8 GeneType (io.repseq.core.GeneType)7 SequenceRead (com.milaboratory.core.io.sequence.SequenceRead)6 SingleReadImpl (com.milaboratory.core.io.sequence.SingleReadImpl)6 SequenceQuality (com.milaboratory.core.sequence.SequenceQuality)6 PairedRead (com.milaboratory.core.io.sequence.PairedRead)5 GeneFeature (io.repseq.core.GeneFeature)5 Test (org.junit.Test)5 Clone (com.milaboratory.mixcr.basictypes.Clone)4 RunMiXCR (com.milaboratory.mixcr.util.RunMiXCR)4 ArrayList (java.util.ArrayList)4 CUtils (cc.redberry.pipe.CUtils)3 Alignment (com.milaboratory.core.alignment.Alignment)3 SequenceReaderCloseable (com.milaboratory.core.io.sequence.SequenceReaderCloseable)3 CloneFactory (com.milaboratory.mixcr.assembler.CloneFactory)3 CloneSet (com.milaboratory.mixcr.basictypes.CloneSet)3 VDJCAlignmentsFormatter (com.milaboratory.mixcr.basictypes.VDJCAlignmentsFormatter)3 VDJCHit (com.milaboratory.mixcr.basictypes.VDJCHit)3