Search in sources :

Example 6 with Clone

use of com.milaboratory.mixcr.basictypes.Clone in project mixcr by milaboratory.

the class FullSeqAssemblerTest method test1.

@Test
public void test1() throws Exception {
    int len = 140;
    PairedRead read1 = new PairedRead(new SingleReadImpl(0, new NSequenceWithQuality(masterSeq1WT.getRangeFromCDR3Begin(-20, len)), "R1"), new SingleReadImpl(0, new NSequenceWithQuality(masterSeq1WT.getRangeFromCDR3Begin(-200, len).getReverseComplement()), "R2"));
    PairedRead read2 = new PairedRead(new SingleReadImpl(1, new NSequenceWithQuality(masterSeq1WT.getRangeFromCDR3Begin(-30, len)), "R1"), new SingleReadImpl(1, new NSequenceWithQuality(masterSeq1WT.getRangeFromCDR3Begin(-150, len).getReverseComplement()), "R2"));
    RunMiXCR.RunMiXCRAnalysis params = new RunMiXCR.RunMiXCRAnalysis(read1, read2);
    // [-200, -60]  [-20, 120]
    // [-150, 110]
    // 
    // [-200, -150], [110, 120] = 60
    // [-60, -20] = 40
    params.alignerParameters = VDJCParametersPresets.getByName("rna-seq");
    params.alignerParameters.setSaveOriginalReads(true);
    params.cloneAssemblerParameters.updateFrom(params.alignerParameters);
    RunMiXCR.AlignResult align = RunMiXCR.align(params);
    // for (VDJCAlignments al : align.alignments) {
    // for (int i = 0; i < al.numberOfTargets(); i++)
    // System.out.println(VDJCAlignmentsFormatter.getTargetAsMultiAlignment(al, i));
    // System.out.println();
    // }
    RunMiXCR.AssembleResult assemble = RunMiXCR.assemble(align);
    CloneFactory cloneFactory = new CloneFactory(align.parameters.cloneAssemblerParameters.getCloneFactoryParameters(), align.parameters.cloneAssemblerParameters.getAssemblingFeatures(), align.usedGenes, align.parameters.alignerParameters.getFeaturesToAlignMap());
    FullSeqAssembler agg = new FullSeqAssembler(cloneFactory, DEFAULT_PARAMETERS, assemble.cloneSet.get(0), align.parameters.alignerParameters);
    PointSequence[] r2s = agg.toPointSequences(align.alignments.get(1));
    TIntHashSet p2 = new TIntHashSet(Arrays.stream(r2s).mapToInt(s -> s.point).toArray());
    Assert.assertEquals(261 - masterSeq1WT.cdr3Part, p2.size());
    PointSequence[] r1s = agg.toPointSequences(align.alignments.get(0));
    TIntHashSet p1 = new TIntHashSet(Arrays.stream(r1s).mapToInt(s -> s.point).toArray());
    Assert.assertEquals(281 - masterSeq1WT.cdr3Part, p1.size());
    FullSeqAssembler.RawVariantsData prep = agg.calculateRawData(() -> CUtils.asOutputPort(align.alignments));
    long uniq1 = StreamSupport.stream(CUtils.it(prep.createPort()).spliterator(), false).mapToInt(l -> l[0]).filter(c -> c == 0xFFFFFFFF).count();
    long uniq2 = StreamSupport.stream(CUtils.it(prep.createPort()).spliterator(), false).mapToInt(l -> l[1]).filter(c -> c == 0xFFFFFFFF).count();
    Assert.assertEquals(40, uniq1);
    Assert.assertEquals(60, uniq2);
    for (Clone clone : new CloneSet(Arrays.asList(agg.callVariants(prep))).getClones()) {
        ActionExportClonesPretty.outputCompact(System.out, clone);
        System.out.println();
        System.out.println(" ================================================ ");
        System.out.println();
    }
}
Also used : java.util(java.util) SequenceRead(com.milaboratory.core.io.sequence.SequenceRead) Well44497b(org.apache.commons.math3.random.Well44497b) SequenceQuality(com.milaboratory.core.sequence.SequenceQuality) Clone(com.milaboratory.mixcr.basictypes.Clone) GeneFeature(io.repseq.core.GeneFeature) CloneFactory(com.milaboratory.mixcr.assembler.CloneFactory) Main(com.milaboratory.mixcr.cli.Main) StreamSupport(java.util.stream.StreamSupport) PairedRead(com.milaboratory.core.io.sequence.PairedRead) RunMiXCR(com.milaboratory.mixcr.util.RunMiXCR) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence) VDJCAlignmentsFormatter(com.milaboratory.mixcr.basictypes.VDJCAlignmentsFormatter) CUtils(cc.redberry.pipe.CUtils) Test(org.junit.Test) Collectors(java.util.stream.Collectors) TIntHashSet(gnu.trove.set.hash.TIntHashSet) RandomDataGenerator(org.apache.commons.math3.random.RandomDataGenerator) Well19937c(org.apache.commons.math3.random.Well19937c) CloneSet(com.milaboratory.mixcr.basictypes.CloneSet) SingleReadImpl(com.milaboratory.core.io.sequence.SingleReadImpl) GeneType(io.repseq.core.GeneType) NSequenceWithQuality(com.milaboratory.core.sequence.NSequenceWithQuality) ActionExportClonesPretty(com.milaboratory.mixcr.cli.ActionExportClonesPretty) VDJCParametersPresets(com.milaboratory.mixcr.vdjaligners.VDJCParametersPresets) Assert(org.junit.Assert) SequenceReaderCloseable(com.milaboratory.core.io.sequence.SequenceReaderCloseable) VDJCAlignments(com.milaboratory.mixcr.basictypes.VDJCAlignments) SingleReadImpl(com.milaboratory.core.io.sequence.SingleReadImpl) PairedRead(com.milaboratory.core.io.sequence.PairedRead) TIntHashSet(gnu.trove.set.hash.TIntHashSet) CloneSet(com.milaboratory.mixcr.basictypes.CloneSet) NSequenceWithQuality(com.milaboratory.core.sequence.NSequenceWithQuality) RunMiXCR(com.milaboratory.mixcr.util.RunMiXCR) CloneFactory(com.milaboratory.mixcr.assembler.CloneFactory) Clone(com.milaboratory.mixcr.basictypes.Clone) Test(org.junit.Test)

Example 7 with Clone

use of com.milaboratory.mixcr.basictypes.Clone in project mixcr by milaboratory.

the class FullSeqAssemblerTest method testLargeCloneNoMismatches.

@Test
public void testLargeCloneNoMismatches() throws Exception {
    MasterSequence master = FullSeqAssemblerTest.masterSeq1WT;
    NSequenceWithQuality seq = new NSequenceWithQuality(master.getRange(-master.vPart + 10, 80), SequenceQuality.GOOD_QUALITY_VALUE);
    RunMiXCR.RunMiXCRAnalysis params0 = new RunMiXCR.RunMiXCRAnalysis(new SingleReadImpl(0, seq, ""));
    params0.cloneAssemblerParameters.setAssemblingFeatures(new GeneFeature[] { GeneFeature.VDJRegion });
    Clone largeClone = RunMiXCR.assemble(RunMiXCR.align(params0)).cloneSet.get(0);
    // ActionExportClonesPretty.outputCompact(System.out, largeClone);
    // System.exit(0);
    Well44497b rnd = new Well44497b(1234567889L);
    int nReads = 100_000;
    int readLength = 75, readGap = 150;
    // slice seq randomly
    PairedRead[] slicedReads = new PairedRead[nReads];
    for (int i = 0; i < nReads; ++i) {
        int r1from = rnd.nextInt(seq.size() - readLength - 1), r1to = r1from + readLength, r2from = r1from + 1 + rnd.nextInt(seq.size() - r1from - readLength - 1), r2to = r2from + readLength;
        assert r2from > r1from;
        slicedReads[i] = new PairedRead(new SingleReadImpl(i, seq.getRange(r1from, r1to), "" + i), new SingleReadImpl(i, seq.getRange(r2from, r2to).getReverseComplement(), "" + i));
    }
    RunMiXCR.RunMiXCRAnalysis params = new RunMiXCR.RunMiXCRAnalysis(slicedReads);
    // params.alignerParameters = VDJCParametersPresets.getByName("rna-seq");
    params.alignerParameters.setSaveOriginalReads(true);
    RunMiXCR.AlignResult align = RunMiXCR.align(params);
    RunMiXCR.AssembleResult assemble = RunMiXCR.assemble(align);
    // for (VDJCAlignments al : align.alignments) {
    // for (int i = 0; i < al.numberOfTargets(); i++) {
    // System.out.println(VDJCAlignmentsFormatter.getTargetAsMultiAlignment(al, i));
    // System.out.println();
    // }
    // System.out.println();
    // System.out.println(" ================================================ ");
    // System.out.println();
    // }
    Assert.assertEquals(1, assemble.cloneSet.size());
    Clone initialClone = assemble.cloneSet.get(0);
    NSequenceWithQuality cdr3 = initialClone.getFeature(GeneFeature.CDR3);
    List<VDJCAlignments> alignments = align.alignments.stream().filter(al -> cdr3.equals(al.getFeature(GeneFeature.CDR3))).collect(Collectors.toList());
    alignments.stream().filter(al -> Arrays.stream(al.getBestHit(GeneType.Variable).getAlignments()).filter(Objects::nonNull).anyMatch(a -> !a.getAbsoluteMutations().isEmpty())).filter(al -> al.getBestHit(GeneType.Variable).getGene().getName().contains("3-74")).forEach(al -> {
        for (int i = 0; i < al.numberOfTargets(); i++) {
            System.out.println(VDJCAlignmentsFormatter.getTargetAsMultiAlignment(al, i));
            System.out.println();
        }
        System.out.println();
        System.out.println(" ================================================ ");
        System.out.println();
    });
    // System.exit(0);
    System.out.println("=> Agg");
    CloneFactory cloneFactory = new CloneFactory(align.parameters.cloneAssemblerParameters.getCloneFactoryParameters(), align.parameters.cloneAssemblerParameters.getAssemblingFeatures(), align.usedGenes, align.parameters.alignerParameters.getFeaturesToAlignMap());
    FullSeqAssembler agg = new FullSeqAssembler(cloneFactory, DEFAULT_PARAMETERS, initialClone, align.parameters.alignerParameters);
    FullSeqAssembler.RawVariantsData prep = agg.calculateRawData(() -> CUtils.asOutputPort(alignments));
    List<Clone> clones = new ArrayList<>(new CloneSet(Arrays.asList(agg.callVariants(prep))).getClones());
    clones.sort(Comparator.comparingDouble(Clone::getCount).reversed());
    for (Clone clone : clones) {
        ActionExportClonesPretty.outputCompact(System.out, clone);
        System.out.println();
        System.out.println(" ================================================ ");
        System.out.println();
    }
}
Also used : java.util(java.util) SequenceRead(com.milaboratory.core.io.sequence.SequenceRead) Well44497b(org.apache.commons.math3.random.Well44497b) SequenceQuality(com.milaboratory.core.sequence.SequenceQuality) Clone(com.milaboratory.mixcr.basictypes.Clone) GeneFeature(io.repseq.core.GeneFeature) CloneFactory(com.milaboratory.mixcr.assembler.CloneFactory) Main(com.milaboratory.mixcr.cli.Main) StreamSupport(java.util.stream.StreamSupport) PairedRead(com.milaboratory.core.io.sequence.PairedRead) RunMiXCR(com.milaboratory.mixcr.util.RunMiXCR) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence) VDJCAlignmentsFormatter(com.milaboratory.mixcr.basictypes.VDJCAlignmentsFormatter) CUtils(cc.redberry.pipe.CUtils) Test(org.junit.Test) Collectors(java.util.stream.Collectors) TIntHashSet(gnu.trove.set.hash.TIntHashSet) RandomDataGenerator(org.apache.commons.math3.random.RandomDataGenerator) Well19937c(org.apache.commons.math3.random.Well19937c) CloneSet(com.milaboratory.mixcr.basictypes.CloneSet) SingleReadImpl(com.milaboratory.core.io.sequence.SingleReadImpl) GeneType(io.repseq.core.GeneType) NSequenceWithQuality(com.milaboratory.core.sequence.NSequenceWithQuality) ActionExportClonesPretty(com.milaboratory.mixcr.cli.ActionExportClonesPretty) VDJCParametersPresets(com.milaboratory.mixcr.vdjaligners.VDJCParametersPresets) Assert(org.junit.Assert) SequenceReaderCloseable(com.milaboratory.core.io.sequence.SequenceReaderCloseable) VDJCAlignments(com.milaboratory.mixcr.basictypes.VDJCAlignments) SingleReadImpl(com.milaboratory.core.io.sequence.SingleReadImpl) PairedRead(com.milaboratory.core.io.sequence.PairedRead) CloneSet(com.milaboratory.mixcr.basictypes.CloneSet) NSequenceWithQuality(com.milaboratory.core.sequence.NSequenceWithQuality) Well44497b(org.apache.commons.math3.random.Well44497b) RunMiXCR(com.milaboratory.mixcr.util.RunMiXCR) VDJCAlignments(com.milaboratory.mixcr.basictypes.VDJCAlignments) CloneFactory(com.milaboratory.mixcr.assembler.CloneFactory) Clone(com.milaboratory.mixcr.basictypes.Clone) Test(org.junit.Test)

Example 8 with Clone

use of com.milaboratory.mixcr.basictypes.Clone in project mixcr by milaboratory.

the class ActionClonesDiff method populate.

private void populate(Map<CKey, CRec> recs, CloneSet cs, int i) {
    for (Clone clone : cs) {
        CKey key = getKey(clone);
        CRec cRec = recs.get(key);
        if (cRec == null)
            recs.put(key, cRec = new CRec());
        if (cRec.clones[i] != null) {
            String error = "";
            char letter = 'X';
            if (!Objects.equals(getBestGene(cRec.clones[i], GeneType.Variable), getBestGene(clone, GeneType.Variable)))
                letter = 'v';
            if (!Objects.equals(getBestGene(cRec.clones[i], GeneType.Joining), getBestGene(clone, GeneType.Joining)))
                letter = 'j';
            if (!Objects.equals(getBestGene(cRec.clones[i], GeneType.Constant), getBestGene(clone, GeneType.Constant)))
                letter = 'c';
            if (letter != 'X')
                error = "Error: clones with the same key present in one of the clonesets. Seems that clones were assembled " + "using -OseparateBy" + Character.toUpperCase(letter) + "=true option, please add -" + letter + " option to this command.";
            throw new ParameterException(error);
        }
        cRec.clones[i] = clone;
    }
}
Also used : ParameterException(com.beust.jcommander.ParameterException) Clone(com.milaboratory.mixcr.basictypes.Clone)

Example 9 with Clone

use of com.milaboratory.mixcr.basictypes.Clone in project mixcr by milaboratory.

the class CloneFactory method create.

public Clone create(int id, double count, EnumMap<GeneType, TObjectFloatHashMap<VDJCGeneId>> geneScores, NSequenceWithQuality[] targets) {
    EnumMap<GeneType, VDJCHit[]> hits = new EnumMap<>(GeneType.class);
    for (GeneType geneType : GeneType.VJC_REFERENCE) {
        VJCClonalAlignerParameters vjcParameters = parameters.getVJCParameters(geneType);
        if (vjcParameters == null)
            continue;
        GeneFeature featureToAlign = featuresToAlign.get(geneType);
        TObjectFloatHashMap<VDJCGeneId> gtGeneScores = geneScores.get(geneType);
        if (gtGeneScores == null)
            continue;
        GeneFeature[] intersectingFeatures = new GeneFeature[assemblingFeatures.length];
        for (int i = 0; i < assemblingFeatures.length; ++i) {
            intersectingFeatures[i] = GeneFeature.intersection(featureToAlign, assemblingFeatures[i]);
            if (intersectingFeatures[i] != null)
                switch(geneType) {
                    case Variable:
                        if (!intersectingFeatures[i].getFirstPoint().equals(assemblingFeatures[i].getFirstPoint()))
                            throw new IllegalArgumentException();
                        break;
                    case Joining:
                        if (!intersectingFeatures[i].getLastPoint().equals(assemblingFeatures[i].getLastPoint()))
                            throw new IllegalArgumentException();
                        break;
                }
        }
        VDJCHit[] result = new VDJCHit[gtGeneScores.size()];
        int pointer = 0;
        TObjectFloatIterator<VDJCGeneId> iterator = gtGeneScores.iterator();
        while (iterator.hasNext()) {
            iterator.advance();
            VDJCGene gene = usedGenes.get(iterator.key());
            Alignment<NucleotideSequence>[] alignments = new Alignment[assemblingFeatures.length];
            for (int i = 0; i < assemblingFeatures.length; ++i) {
                if (intersectingFeatures[i] == null)
                    continue;
                NucleotideSequence referenceSequence = gene.getFeature(featureToAlign);
                Range rangeInReference = gene.getPartitioning().getRelativeRange(featureToAlign, intersectingFeatures[i]);
                if (rangeInReference == null || referenceSequence == null)
                    continue;
                Boolean leftSide;
                switch(geneType) {
                    case Variable:
                        leftSide = intersectingFeatures[i].getLastPoint().isTrimmable() ? true : null;
                        break;
                    case Joining:
                        leftSide = intersectingFeatures[i].getFirstPoint().isTrimmable() ? false : null;
                        break;
                    case Constant:
                        leftSide = null;
                        break;
                    default:
                        throw new RuntimeException();
                }
                BandedAlignerParameters<NucleotideSequence> alignmentParameters = vjcParameters.getAlignmentParameters();
                int referenceLength = rangeInReference.length();
                NucleotideSequence target = targets[i].getSequence();
                if (alignmentParameters.getScoring() instanceof LinearGapAlignmentScoring) {
                    if (leftSide == null) {
                        alignments[i] = BandedLinearAligner.align((LinearGapAlignmentScoring<NucleotideSequence>) alignmentParameters.getScoring(), referenceSequence, target, rangeInReference.getFrom(), referenceLength, 0, target.size(), alignmentParameters.getWidth());
                    } else if (leftSide) {
                        assert rangeInReference.getFrom() + referenceLength == referenceSequence.size();
                        alignments[i] = BandedLinearAligner.alignSemiLocalLeft((LinearGapAlignmentScoring<NucleotideSequence>) alignmentParameters.getScoring(), referenceSequence, target, rangeInReference.getFrom(), referenceLength, 0, target.size(), alignmentParameters.getWidth(), alignmentParameters.getStopPenalty());
                    } else {
                        assert rangeInReference.getFrom() == 0;
                        // int offset2 = Math.max(0, target.size() - referenceLength);
                        alignments[i] = BandedLinearAligner.alignSemiLocalRight((LinearGapAlignmentScoring<NucleotideSequence>) alignmentParameters.getScoring(), referenceSequence, target, rangeInReference.getFrom(), referenceLength, 0, target.size(), alignmentParameters.getWidth(), alignmentParameters.getStopPenalty());
                    }
                } else {
                    if (leftSide == null) {
                        alignments[i] = BandedAffineAligner.align((AffineGapAlignmentScoring<NucleotideSequence>) alignmentParameters.getScoring(), referenceSequence, target, rangeInReference.getFrom(), referenceLength, 0, target.size(), alignmentParameters.getWidth());
                    } else if (leftSide) {
                        assert rangeInReference.getFrom() + referenceLength == referenceSequence.size();
                        alignments[i] = BandedAffineAligner.semiLocalRight((AffineGapAlignmentScoring<NucleotideSequence>) alignmentParameters.getScoring(), referenceSequence, target, rangeInReference.getFrom(), referenceLength, 0, target.size(), alignmentParameters.getWidth());
                    } else {
                        assert rangeInReference.getFrom() == 0;
                        // int offset2 = Math.max(0, target.size() - referenceLength);
                        alignments[i] = BandedAffineAligner.semiLocalLeft((AffineGapAlignmentScoring<NucleotideSequence>) alignmentParameters.getScoring(), referenceSequence, target, rangeInReference.getFrom(), referenceLength, 0, target.size(), alignmentParameters.getWidth());
                    }
                }
            }
            result[pointer++] = new VDJCHit(gene, alignments, featureToAlign, iterator.value());
        }
        Arrays.sort(result, 0, pointer);
        hits.put(geneType, pointer < result.length ? Arrays.copyOf(result, pointer) : result);
    }
    // D
    NucleotideSequence sequenceToAlign = targets[indexOfAssemblingFeatureWithD].getSequence();
    int from = 0;
    int to = sequenceToAlign.size();
    VDJCHit[] hs = hits.get(GeneType.Variable);
    if (hs != null && hs.length > 0) {
        int p = hs[0].getPartitioningForTarget(indexOfAssemblingFeatureWithD).getPosition(ReferencePoint.VEndTrimmed);
        if (p != -1) {
            if (p < 0)
                p = -2 - p;
            from = p;
        }
    }
    hs = hits.get(GeneType.Joining);
    if (hs != null && hs.length > 0) {
        int p = hs[0].getPartitioningForTarget(indexOfAssemblingFeatureWithD).getPosition(ReferencePoint.JBeginTrimmed);
        if (p != -1) {
            if (p < 0)
                p = -2 - p;
            to = p;
        }
    }
    if (from < to)
        hits.put(GeneType.Diversity, dAligner.align(sequenceToAlign, VDJCAligner.getPossibleDLoci(hits.get(GeneType.Variable), hits.get(GeneType.Joining)), from, to, indexOfAssemblingFeatureWithD, assemblingFeatures.length));
    else
        hits.put(GeneType.Diversity, new VDJCHit[0]);
    return new Clone(targets, hits, count, id);
}
Also used : Range(com.milaboratory.core.Range) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence) VDJCHit(com.milaboratory.mixcr.basictypes.VDJCHit) Clone(com.milaboratory.mixcr.basictypes.Clone)

Aggregations

Clone (com.milaboratory.mixcr.basictypes.Clone)9 NucleotideSequence (com.milaboratory.core.sequence.NucleotideSequence)5 CloneSet (com.milaboratory.mixcr.basictypes.CloneSet)5 VDJCAlignments (com.milaboratory.mixcr.basictypes.VDJCAlignments)5 CUtils (cc.redberry.pipe.CUtils)4 SequenceRead (com.milaboratory.core.io.sequence.SequenceRead)4 NSequenceWithQuality (com.milaboratory.core.sequence.NSequenceWithQuality)4 CloneFactory (com.milaboratory.mixcr.assembler.CloneFactory)4 TIntHashSet (gnu.trove.set.hash.TIntHashSet)4 GeneFeature (io.repseq.core.GeneFeature)4 GeneType (io.repseq.core.GeneType)4 java.util (java.util)4 Test (org.junit.Test)4 PairedRead (com.milaboratory.core.io.sequence.PairedRead)3 SequenceReaderCloseable (com.milaboratory.core.io.sequence.SequenceReaderCloseable)3 SingleReadImpl (com.milaboratory.core.io.sequence.SingleReadImpl)3 SequenceQuality (com.milaboratory.core.sequence.SequenceQuality)3 VDJCAlignmentsFormatter (com.milaboratory.mixcr.basictypes.VDJCAlignmentsFormatter)3 VDJCHit (com.milaboratory.mixcr.basictypes.VDJCHit)3 ActionExportClonesPretty (com.milaboratory.mixcr.cli.ActionExportClonesPretty)3