Search in sources :

Example 1 with CloneSet

use of com.milaboratory.mixcr.basictypes.CloneSet in project mixcr by milaboratory.

the class ActionExportClones method go0.

@Override
public void go0() throws Exception {
    CloneExportParameters parameters = (CloneExportParameters) this.parameters;
    try (InputStream inputStream = IOUtil.createIS(parameters.getInputFile());
        InfoWriter<Clone> writer = new InfoWriter<>(parameters.getOutputFile())) {
        CloneSet set = CloneSetIO.readClns(inputStream, VDJCLibraryRegistry.getDefault());
        set = CloneSet.transform(set, parameters.getFilter());
        writer.attachInfoProviders((List) parameters.exporters);
        writer.ensureHeader();
        long limit = parameters.getLimit();
        for (int i = 0; i < set.size(); i++) {
            if (set.get(i).getFraction() < parameters.minFraction || set.get(i).getCount() < parameters.minCount) {
                limit = i;
                break;
            }
        }
        ExportClones exportClones = new ExportClones(set, writer, limit);
        SmartProgressReporter.startProgressReport(exportClones, System.err);
        exportClones.run();
    }
}
Also used : InfoWriter(com.milaboratory.mixcr.export.InfoWriter) CloneSet(com.milaboratory.mixcr.basictypes.CloneSet) InputStream(java.io.InputStream) Clone(com.milaboratory.mixcr.basictypes.Clone)

Example 2 with CloneSet

use of com.milaboratory.mixcr.basictypes.CloneSet in project mixcr by milaboratory.

the class CloneAssemblerRunnerTest method runFullPipeline.

private static CloneSet runFullPipeline(String... fastqFiles) throws IOException, InterruptedException {
    // building alignments
    VDJCAlignerParameters alignerParameters = VDJCParametersPresets.getByName("default");
    VDJCAligner aligner = fastqFiles.length == 1 ? new VDJCAlignerS(alignerParameters) : new VDJCAlignerWithMerge(alignerParameters);
    for (VDJCGene gene : VDJCLibraryRegistry.getDefault().getLibrary("mi", "hs").getGenes(Chains.IGH)) if (alignerParameters.containsRequiredFeature(gene))
        aligner.addGene(gene);
    SequenceReader reader;
    if (fastqFiles.length == 1)
        reader = new SingleFastqReader(CloneAssemblerRunnerTest.class.getClassLoader().getResourceAsStream(fastqFiles[0]), true);
    else
        reader = new PairedFastqReader(CloneAssemblerRunnerTest.class.getClassLoader().getResourceAsStream(fastqFiles[0]), CloneAssemblerRunnerTest.class.getClassLoader().getResourceAsStream(fastqFiles[1]), true);
    // write alignments to byte array
    ByteArrayOutputStream alignmentsSerialized = new ByteArrayOutputStream();
    try (VDJCAlignmentsWriter writer = new VDJCAlignmentsWriter(alignmentsSerialized)) {
        writer.header(aligner);
        for (Object read : CUtils.it(reader)) {
            VDJCAlignmentResult result = (VDJCAlignmentResult) aligner.process((SequenceRead) read);
            if (result.alignment != null)
                writer.write(result.alignment);
        }
    }
    AlignmentsProvider alignmentsProvider = AlignmentsProvider.Util.createProvider(alignmentsSerialized.toByteArray(), VDJCLibraryRegistry.getDefault());
    LinearGapAlignmentScoring<NucleotideSequence> scoring = new LinearGapAlignmentScoring<>(NucleotideSequence.ALPHABET, 5, -9, -12);
    CloneFactoryParameters factoryParameters = new CloneFactoryParameters(new VJCClonalAlignerParameters(0.8f, scoring, 5), new VJCClonalAlignerParameters(0.8f, scoring, 5), null, new DAlignerParameters(GeneFeature.DRegion, 0.85f, 30.0f, 3, scoring));
    CloneAssemblerParameters assemblerParameters = new CloneAssemblerParameters(new GeneFeature[] { GeneFeature.CDR3 }, 12, QualityAggregationType.Average, new CloneClusteringParameters(2, 1, TreeSearchParameters.ONE_MISMATCH, new RelativeConcentrationFilter(1.0E-6)), factoryParameters, true, true, false, 0.4, true, (byte) 20, .8, "2 of 6", (byte) 15);
    System.out.println(GlobalObjectMappers.toOneLine(assemblerParameters));
    CloneAssemblerRunner assemblerRunner = new CloneAssemblerRunner(alignmentsProvider, new CloneAssembler(assemblerParameters, true, aligner.getUsedGenes(), alignerParameters), 2);
    SmartProgressReporter.startProgressReport(assemblerRunner);
    assemblerRunner.run();
    CloneSet cloneSet = assemblerRunner.getCloneSet(null);
    ByteArrayOutputStream bos = new ByteArrayOutputStream();
    CloneSetIO.write(cloneSet, bos);
    CloneSet cloneSetDeserialized = CloneSetIO.readClns(new ByteArrayInputStream(bos.toByteArray()));
    assertCSEquals(cloneSet, cloneSetDeserialized);
    OutputPortCloseable<ReadToCloneMapping> rrr = assemblerRunner.assembler.getAssembledReadsPort();
    ReadToCloneMapping take;
    while ((take = rrr.take()) != null) System.out.println(take);
    return cloneSet;
}
Also used : SingleFastqReader(com.milaboratory.core.io.sequence.fastq.SingleFastqReader) VDJCAlignmentsWriter(com.milaboratory.mixcr.basictypes.VDJCAlignmentsWriter) CloneSet(com.milaboratory.mixcr.basictypes.CloneSet) LinearGapAlignmentScoring(com.milaboratory.core.alignment.LinearGapAlignmentScoring) ByteArrayOutputStream(java.io.ByteArrayOutputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence) SequenceRead(com.milaboratory.core.io.sequence.SequenceRead) SequenceReader(com.milaboratory.core.io.sequence.SequenceReader) PairedFastqReader(com.milaboratory.core.io.sequence.fastq.PairedFastqReader)

Example 3 with CloneSet

use of com.milaboratory.mixcr.basictypes.CloneSet in project mixcr by milaboratory.

the class CloneAssemblerRunnerTest method test1.

@Ignore
@Test
public void test1() throws Exception {
    String[] str = { "sequences/sample_IGH_R1.fastq", "sequences/sample_IGH_R2.fastq" };
    CloneSet cloneSet = runFullPipeline(str);
    System.out.println("\n\n");
    for (Clone clone : cloneSet) {
        System.out.println(clone);
        System.out.println(Arrays.toString(clone.getHits(GeneType.Variable)));
        System.out.println(Arrays.toString(clone.getHits(GeneType.Joining)));
        System.out.println(clone.getFeature(GeneFeature.CDR3));
        System.out.println("" + clone.getFeature(GeneFeature.VCDR3Part) + " " + clone.getFeature(GeneFeature.VJJunction) + " " + clone.getFeature(GeneFeature.JCDR3Part));
    }
}
Also used : CloneSet(com.milaboratory.mixcr.basictypes.CloneSet) Clone(com.milaboratory.mixcr.basictypes.Clone) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 4 with CloneSet

use of com.milaboratory.mixcr.basictypes.CloneSet in project mixcr by milaboratory.

the class FullSeqAssemblerTest method testRandom1.

@Test
public void testRandom1() throws Exception {
    CloneFraction[] clones = { new CloneFraction(750, masterSeq1WT), // V: S346:G->T
    new CloneFraction(1000, masterSeq1VSub1), // J: D55:A
    new CloneFraction(1000, masterSeq1VDel1JDel1), // J: D62:C
    new CloneFraction(500, masterSeq1VDel1JDelVSub2) };
    Well19937c rand = new Well19937c();
    rand.setSeed(12345);
    RandomDataGenerator rdg = new RandomDataGenerator(rand);
    List<SequenceRead> readsOrig = new ArrayList<>();
    int readLength = 100;
    int id = -1;
    for (CloneFraction clone : clones) {
        for (int i = 0; i < clone.count; i++) {
            // Left read with CDR3
            ++id;
            readsOrig.add(new PairedRead(new SingleReadImpl(id, new NSequenceWithQuality(clone.seq.getRangeFromCDR3Begin(-rand.nextInt(readLength - clone.seq.cdr3Part), readLength)), "R1_" + id), new SingleReadImpl(id, new NSequenceWithQuality(clone.seq.getRangeFromCDR3End(rdg.nextInt(-clone.seq.cdr3Part / 2, clone.seq.jPart), readLength).getReverseComplement()), "R2_" + id)));
            ++id;
            readsOrig.add(new PairedRead(new SingleReadImpl(id, new NSequenceWithQuality(clone.seq.getRangeFromCDR3Begin(rdg.nextInt(-clone.seq.vPart, clone.seq.cdr3Part / 2 - readLength), readLength)), "R1_" + id), new SingleReadImpl(id, new NSequenceWithQuality(clone.seq.getRangeFromCDR3Begin(-rand.nextInt(readLength - clone.seq.cdr3Part), readLength)).getReverseComplement(), "R2_" + id)));
        }
    }
    // readsOrig = Arrays.asList(setReadId(0, readsOrig.get(12)), setReadId(1, readsOrig.get(13)));
    int[] perm = rdg.nextPermutation(readsOrig.size(), readsOrig.size());
    List<SequenceRead> reads = new ArrayList<>();
    for (int i = 0; i < readsOrig.size(); i++) reads.add(readsOrig.get(perm[i]));
    RunMiXCR.RunMiXCRAnalysis params = new RunMiXCR.RunMiXCRAnalysis(new SequenceReaderCloseable<SequenceRead>() {

        int counter = 0;

        @Override
        public void close() {
        }

        @Override
        public long getNumberOfReads() {
            return counter;
        }

        @Override
        public synchronized SequenceRead take() {
            if (counter == reads.size())
                return null;
            return reads.get(counter++);
        }
    }, true);
    params.alignerParameters = VDJCParametersPresets.getByName("rna-seq");
    params.alignerParameters.setSaveOriginalReads(true);
    params.alignerParameters.setVAlignmentParameters(params.alignerParameters.getVAlignerParameters().setGeneFeatureToAlign(GeneFeature.VTranscriptWithP));
    RunMiXCR.AlignResult align = RunMiXCR.align(params);
    // // TODO exception for translation
    // for (VDJCAlignments al : align.alignments) {
    // for (int i = 0; i < al.numberOfTargets(); i++) {
    // System.out.println(VDJCAlignmentsFormatter.getTargetAsMultiAlignment(al, i));
    // System.out.println();
    // }
    // System.out.println();
    // System.out.println(" ================================================ ");
    // System.out.println();
    // }
    RunMiXCR.AssembleResult assemble = RunMiXCR.assemble(align);
    Assert.assertEquals(1, assemble.cloneSet.size());
    CloneFactory cloneFactory = new CloneFactory(align.parameters.cloneAssemblerParameters.getCloneFactoryParameters(), align.parameters.cloneAssemblerParameters.getAssemblingFeatures(), align.usedGenes, align.parameters.alignerParameters.getFeaturesToAlignMap());
    FullSeqAssembler agg = new FullSeqAssembler(cloneFactory, DEFAULT_PARAMETERS, assemble.cloneSet.get(0), align.parameters.alignerParameters);
    FullSeqAssembler.RawVariantsData prep = agg.calculateRawData(() -> CUtils.asOutputPort(align.alignments.stream().filter(a -> a.getFeature(GeneFeature.CDR3) != null).collect(Collectors.toList())));
    List<Clone> clns = new ArrayList<>(new CloneSet(Arrays.asList(agg.callVariants(prep))).getClones());
    clns.sort(Comparator.comparingDouble(Clone::getCount).reversed());
    System.out.println("# Clones: " + clns.size());
    id = 0;
    for (Clone clone : clns) {
        clone = clone.setId(id++);
        System.out.println(clone.numberOfTargets());
        System.out.println(clone.getCount());
        System.out.println(clone.getFraction());
        System.out.println(clone.getBestHit(GeneType.Variable).getAlignment(0).getAbsoluteMutations());
        System.out.println(clone.getBestHit(GeneType.Joining).getAlignment(0).getAbsoluteMutations());
        System.out.println();
    // ActionExportClonesPretty.outputCompact(System.out, clone);
    }
}
Also used : java.util(java.util) SequenceRead(com.milaboratory.core.io.sequence.SequenceRead) Well44497b(org.apache.commons.math3.random.Well44497b) SequenceQuality(com.milaboratory.core.sequence.SequenceQuality) Clone(com.milaboratory.mixcr.basictypes.Clone) GeneFeature(io.repseq.core.GeneFeature) CloneFactory(com.milaboratory.mixcr.assembler.CloneFactory) Main(com.milaboratory.mixcr.cli.Main) StreamSupport(java.util.stream.StreamSupport) PairedRead(com.milaboratory.core.io.sequence.PairedRead) RunMiXCR(com.milaboratory.mixcr.util.RunMiXCR) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence) VDJCAlignmentsFormatter(com.milaboratory.mixcr.basictypes.VDJCAlignmentsFormatter) CUtils(cc.redberry.pipe.CUtils) Test(org.junit.Test) Collectors(java.util.stream.Collectors) TIntHashSet(gnu.trove.set.hash.TIntHashSet) RandomDataGenerator(org.apache.commons.math3.random.RandomDataGenerator) Well19937c(org.apache.commons.math3.random.Well19937c) CloneSet(com.milaboratory.mixcr.basictypes.CloneSet) SingleReadImpl(com.milaboratory.core.io.sequence.SingleReadImpl) GeneType(io.repseq.core.GeneType) NSequenceWithQuality(com.milaboratory.core.sequence.NSequenceWithQuality) ActionExportClonesPretty(com.milaboratory.mixcr.cli.ActionExportClonesPretty) VDJCParametersPresets(com.milaboratory.mixcr.vdjaligners.VDJCParametersPresets) Assert(org.junit.Assert) SequenceReaderCloseable(com.milaboratory.core.io.sequence.SequenceReaderCloseable) VDJCAlignments(com.milaboratory.mixcr.basictypes.VDJCAlignments) SingleReadImpl(com.milaboratory.core.io.sequence.SingleReadImpl) PairedRead(com.milaboratory.core.io.sequence.PairedRead) Well19937c(org.apache.commons.math3.random.Well19937c) CloneSet(com.milaboratory.mixcr.basictypes.CloneSet) NSequenceWithQuality(com.milaboratory.core.sequence.NSequenceWithQuality) Clone(com.milaboratory.mixcr.basictypes.Clone) RandomDataGenerator(org.apache.commons.math3.random.RandomDataGenerator) SequenceRead(com.milaboratory.core.io.sequence.SequenceRead) RunMiXCR(com.milaboratory.mixcr.util.RunMiXCR) CloneFactory(com.milaboratory.mixcr.assembler.CloneFactory) Test(org.junit.Test)

Example 5 with CloneSet

use of com.milaboratory.mixcr.basictypes.CloneSet in project mixcr by milaboratory.

the class FullSeqAssemblerTest method test1.

@Test
public void test1() throws Exception {
    int len = 140;
    PairedRead read1 = new PairedRead(new SingleReadImpl(0, new NSequenceWithQuality(masterSeq1WT.getRangeFromCDR3Begin(-20, len)), "R1"), new SingleReadImpl(0, new NSequenceWithQuality(masterSeq1WT.getRangeFromCDR3Begin(-200, len).getReverseComplement()), "R2"));
    PairedRead read2 = new PairedRead(new SingleReadImpl(1, new NSequenceWithQuality(masterSeq1WT.getRangeFromCDR3Begin(-30, len)), "R1"), new SingleReadImpl(1, new NSequenceWithQuality(masterSeq1WT.getRangeFromCDR3Begin(-150, len).getReverseComplement()), "R2"));
    RunMiXCR.RunMiXCRAnalysis params = new RunMiXCR.RunMiXCRAnalysis(read1, read2);
    // [-200, -60]  [-20, 120]
    // [-150, 110]
    // 
    // [-200, -150], [110, 120] = 60
    // [-60, -20] = 40
    params.alignerParameters = VDJCParametersPresets.getByName("rna-seq");
    params.alignerParameters.setSaveOriginalReads(true);
    params.cloneAssemblerParameters.updateFrom(params.alignerParameters);
    RunMiXCR.AlignResult align = RunMiXCR.align(params);
    // for (VDJCAlignments al : align.alignments) {
    // for (int i = 0; i < al.numberOfTargets(); i++)
    // System.out.println(VDJCAlignmentsFormatter.getTargetAsMultiAlignment(al, i));
    // System.out.println();
    // }
    RunMiXCR.AssembleResult assemble = RunMiXCR.assemble(align);
    CloneFactory cloneFactory = new CloneFactory(align.parameters.cloneAssemblerParameters.getCloneFactoryParameters(), align.parameters.cloneAssemblerParameters.getAssemblingFeatures(), align.usedGenes, align.parameters.alignerParameters.getFeaturesToAlignMap());
    FullSeqAssembler agg = new FullSeqAssembler(cloneFactory, DEFAULT_PARAMETERS, assemble.cloneSet.get(0), align.parameters.alignerParameters);
    PointSequence[] r2s = agg.toPointSequences(align.alignments.get(1));
    TIntHashSet p2 = new TIntHashSet(Arrays.stream(r2s).mapToInt(s -> s.point).toArray());
    Assert.assertEquals(261 - masterSeq1WT.cdr3Part, p2.size());
    PointSequence[] r1s = agg.toPointSequences(align.alignments.get(0));
    TIntHashSet p1 = new TIntHashSet(Arrays.stream(r1s).mapToInt(s -> s.point).toArray());
    Assert.assertEquals(281 - masterSeq1WT.cdr3Part, p1.size());
    FullSeqAssembler.RawVariantsData prep = agg.calculateRawData(() -> CUtils.asOutputPort(align.alignments));
    long uniq1 = StreamSupport.stream(CUtils.it(prep.createPort()).spliterator(), false).mapToInt(l -> l[0]).filter(c -> c == 0xFFFFFFFF).count();
    long uniq2 = StreamSupport.stream(CUtils.it(prep.createPort()).spliterator(), false).mapToInt(l -> l[1]).filter(c -> c == 0xFFFFFFFF).count();
    Assert.assertEquals(40, uniq1);
    Assert.assertEquals(60, uniq2);
    for (Clone clone : new CloneSet(Arrays.asList(agg.callVariants(prep))).getClones()) {
        ActionExportClonesPretty.outputCompact(System.out, clone);
        System.out.println();
        System.out.println(" ================================================ ");
        System.out.println();
    }
}
Also used : java.util(java.util) SequenceRead(com.milaboratory.core.io.sequence.SequenceRead) Well44497b(org.apache.commons.math3.random.Well44497b) SequenceQuality(com.milaboratory.core.sequence.SequenceQuality) Clone(com.milaboratory.mixcr.basictypes.Clone) GeneFeature(io.repseq.core.GeneFeature) CloneFactory(com.milaboratory.mixcr.assembler.CloneFactory) Main(com.milaboratory.mixcr.cli.Main) StreamSupport(java.util.stream.StreamSupport) PairedRead(com.milaboratory.core.io.sequence.PairedRead) RunMiXCR(com.milaboratory.mixcr.util.RunMiXCR) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence) VDJCAlignmentsFormatter(com.milaboratory.mixcr.basictypes.VDJCAlignmentsFormatter) CUtils(cc.redberry.pipe.CUtils) Test(org.junit.Test) Collectors(java.util.stream.Collectors) TIntHashSet(gnu.trove.set.hash.TIntHashSet) RandomDataGenerator(org.apache.commons.math3.random.RandomDataGenerator) Well19937c(org.apache.commons.math3.random.Well19937c) CloneSet(com.milaboratory.mixcr.basictypes.CloneSet) SingleReadImpl(com.milaboratory.core.io.sequence.SingleReadImpl) GeneType(io.repseq.core.GeneType) NSequenceWithQuality(com.milaboratory.core.sequence.NSequenceWithQuality) ActionExportClonesPretty(com.milaboratory.mixcr.cli.ActionExportClonesPretty) VDJCParametersPresets(com.milaboratory.mixcr.vdjaligners.VDJCParametersPresets) Assert(org.junit.Assert) SequenceReaderCloseable(com.milaboratory.core.io.sequence.SequenceReaderCloseable) VDJCAlignments(com.milaboratory.mixcr.basictypes.VDJCAlignments) SingleReadImpl(com.milaboratory.core.io.sequence.SingleReadImpl) PairedRead(com.milaboratory.core.io.sequence.PairedRead) TIntHashSet(gnu.trove.set.hash.TIntHashSet) CloneSet(com.milaboratory.mixcr.basictypes.CloneSet) NSequenceWithQuality(com.milaboratory.core.sequence.NSequenceWithQuality) RunMiXCR(com.milaboratory.mixcr.util.RunMiXCR) CloneFactory(com.milaboratory.mixcr.assembler.CloneFactory) Clone(com.milaboratory.mixcr.basictypes.Clone) Test(org.junit.Test)

Aggregations

CloneSet (com.milaboratory.mixcr.basictypes.CloneSet)10 Clone (com.milaboratory.mixcr.basictypes.Clone)5 SequenceRead (com.milaboratory.core.io.sequence.SequenceRead)4 NucleotideSequence (com.milaboratory.core.sequence.NucleotideSequence)4 Test (org.junit.Test)4 CUtils (cc.redberry.pipe.CUtils)3 PairedRead (com.milaboratory.core.io.sequence.PairedRead)3 SequenceReaderCloseable (com.milaboratory.core.io.sequence.SequenceReaderCloseable)3 SingleReadImpl (com.milaboratory.core.io.sequence.SingleReadImpl)3 NSequenceWithQuality (com.milaboratory.core.sequence.NSequenceWithQuality)3 SequenceQuality (com.milaboratory.core.sequence.SequenceQuality)3 CloneFactory (com.milaboratory.mixcr.assembler.CloneFactory)3 VDJCAlignments (com.milaboratory.mixcr.basictypes.VDJCAlignments)3 VDJCAlignmentsFormatter (com.milaboratory.mixcr.basictypes.VDJCAlignmentsFormatter)3 ActionExportClonesPretty (com.milaboratory.mixcr.cli.ActionExportClonesPretty)3 Main (com.milaboratory.mixcr.cli.Main)3 RunMiXCR (com.milaboratory.mixcr.util.RunMiXCR)3 VDJCParametersPresets (com.milaboratory.mixcr.vdjaligners.VDJCParametersPresets)3 TIntHashSet (gnu.trove.set.hash.TIntHashSet)3 GeneFeature (io.repseq.core.GeneFeature)3