Search in sources :

Example 26 with VDJCGene

use of io.repseq.core.VDJCGene in project mixcr by milaboratory.

the class PartialAlignmentsAssemblerTest method createTestData.

public static InputTestData createTestData(long seed) throws Exception {
    EnumMap<GeneType, String> geneNames = new EnumMap<GeneType, String>(GeneType.class) {

        {
            put(Variable, "TRBV20-1*00");
            put(Diversity, "TRBD2*00");
            put(Joining, "TRBJ2-6*00");
            put(Constant, "TRBC2*00");
        }
    };
    // config
    RandomGenerator rnd = RandomUtil.getThreadLocalRandom();
    rnd.setSeed(seed);
    final VDJCAlignerParameters defaultFeatures = VDJCParametersPresets.getByName("default");
    defaultFeatures.getVAlignerParameters().setGeneFeatureToAlign(VRegion);
    defaultFeatures.getDAlignerParameters().setGeneFeatureToAlign(DRegion);
    defaultFeatures.getJAlignerParameters().setGeneFeatureToAlign(JRegion);
    // used alleles
    EnumMap<GeneType, VDJCGene> genes = new EnumMap<>(GeneType.class);
    // germline parts of sequences
    EnumMap<GeneType, NucleotideSequence> germlineRegions = gtMap();
    // left, right cut of germline
    EnumMap<GeneType, int[]> germlineCuts = gtMap();
    // begin, end positions in assembled sequence
    EnumMap<GeneType, int[]> refPositions = gtMap();
    // single assembled sequence
    SequenceBuilder<NucleotideSequence> referenceBuilder = NucleotideSequence.ALPHABET.createBuilder();
    NucleotideSequence VDJunction = TestUtil.randomSequence(NucleotideSequence.ALPHABET, 3, 10);
    NucleotideSequence DJJunction = TestUtil.randomSequence(NucleotideSequence.ALPHABET, 3, 10);
    for (GeneType gt : GeneType.VDJC_REFERENCE) {
        VDJCGene gene = VDJCLibraryRegistry.getDefault().getLibrary("default", "hs").get(geneNames.get(gt));
        NucleotideSequence seq = gene.getFeature(defaultFeatures.getFeatureToAlign(gt));
        int[] cuts = null;
        switch(gt) {
            case Variable:
                cuts = new int[] { 0, rnd.nextInt(gene.getFeature(GermlineVCDR3Part).size() - 5) };
                break;
            case Diversity:
                cuts = new int[] { rnd.nextInt(seq.size() / 3), rnd.nextInt(seq.size() / 3) };
                break;
            case Joining:
                cuts = new int[] { rnd.nextInt(gene.getFeature(GermlineJCDR3Part).size() - 5), 0 };
                break;
            case Constant:
                cuts = new int[] { 0, rnd.nextInt(seq.size() / 2) };
                break;
        }
        NucleotideSequence gSeq = seq.getRange(cuts[0], seq.size() - cuts[1]);
        int[] positions = new int[2];
        positions[0] = referenceBuilder.size();
        referenceBuilder.append(gSeq);
        positions[1] = referenceBuilder.size();
        if (gt == Variable)
            referenceBuilder.append(VDJunction);
        if (gt == Diversity)
            referenceBuilder.append(DJJunction);
        genes.put(gt, gene);
        germlineCuts.put(gt, cuts);
        germlineRegions.put(gt, gSeq);
        refPositions.put(gt, positions);
    }
    NucleotideSequence VJJunction = NucleotideSequence.ALPHABET.createBuilder().append(VDJunction).append(germlineRegions.get(Diversity)).append(DJJunction).createAndDestroy();
    NucleotideSequence reference = referenceBuilder.createAndDestroy();
    return new InputTestData(genes, germlineRegions, germlineCuts, refPositions, VDJunction, DJJunction, reference, VJJunction);
}
Also used : VDJCAlignerParameters(com.milaboratory.mixcr.vdjaligners.VDJCAlignerParameters) RandomGenerator(org.apache.commons.math3.random.RandomGenerator) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence) VDJCGene(io.repseq.core.VDJCGene) GeneType(io.repseq.core.GeneType) EnumMap(java.util.EnumMap)

Example 27 with VDJCGene

use of io.repseq.core.VDJCGene in project mixcr by milaboratory.

the class VDJCAlignerPVFirstTest method test1.

@Test
public void test1() throws Exception {
    VDJCAlignerParameters parameters = VDJCParametersPresets.getByName("default");
    ByteArrayOutputStream bos = new ByteArrayOutputStream();
    List<VDJCAlignments> alignemntsList = new ArrayList<>();
    int header;
    int total = 0;
    int leftHit = 0;
    try (PairedFastqReader reader = new PairedFastqReader(VDJCAlignerSTest.class.getClassLoader().getResourceAsStream("sequences/sample_IGH_R1.fastq"), VDJCAlignerSTest.class.getClassLoader().getResourceAsStream("sequences/sample_IGH_R2.fastq"), true)) {
        VDJCAlignerPVFirst aligner = new VDJCAlignerPVFirst(parameters);
        for (VDJCGene gene : VDJCLibraryRegistry.getDefault().getLibrary("default", "hs").getGenes(Chains.IGH)) {
            if (parameters.containsRequiredFeature(gene))
                aligner.addGene(gene);
        }
        for (PairedRead read : CUtils.it(reader)) {
            ++total;
            VDJCAlignmentResult<PairedRead> result = aligner.process(read);
            if (result.alignment != null) {
                alignemntsList.add(result.alignment);
                for (VDJCHit hit : result.alignment.getHits(GeneType.Variable)) if (hit.getAlignment(0) != null && hit.getAlignment(1) != null)
                    ++leftHit;
            }
        }
    }
    System.out.println(alignemntsList.size());
    System.out.println(total);
    System.out.println(leftHit);
    Assert.assertTrue(alignemntsList.size() > 10);
    int k = 10;
    for (VDJCAlignments alignments : alignemntsList) {
        for (int target = 0; target < alignments.numberOfTargets(); target++) {
            MultiAlignmentHelper helperBig = VDJCAlignmentsFormatter.getTargetAsMultiAlignment(alignments, target);
            if (helperBig == null)
                continue;
            for (MultiAlignmentHelper helper : helperBig.split(80)) {
                System.out.println(helper);
                System.out.println();
                if (--k < 0)
                    return;
            }
        }
    }
// System.out.println("Bytes per alignment: " + (bos.size() - header) / alignemntsList.size());
// 
// try (VDJCAlignmentsReader reader = new VDJCAlignmentsReader(new ByteArrayInputStream(bos.toByteArray()), ll)) {
// int i = 0;
// for (VDJCAlignments alignments : CUtils.it(reader))
// Assert.assertEquals(alignemntsList.get(i++), alignments);
// }
}
Also used : MultiAlignmentHelper(com.milaboratory.core.alignment.MultiAlignmentHelper) ArrayList(java.util.ArrayList) ByteArrayOutputStream(java.io.ByteArrayOutputStream) PairedRead(com.milaboratory.core.io.sequence.PairedRead) VDJCGene(io.repseq.core.VDJCGene) VDJCAlignments(com.milaboratory.mixcr.basictypes.VDJCAlignments) PairedFastqReader(com.milaboratory.core.io.sequence.fastq.PairedFastqReader) VDJCHit(com.milaboratory.mixcr.basictypes.VDJCHit) Test(org.junit.Test)

Aggregations

VDJCGene (io.repseq.core.VDJCGene)27 NucleotideSequence (com.milaboratory.core.sequence.NucleotideSequence)15 ArrayList (java.util.ArrayList)8 Test (org.junit.Test)8 VDJCAlignments (com.milaboratory.mixcr.basictypes.VDJCAlignments)7 VDJCHit (com.milaboratory.mixcr.basictypes.VDJCHit)7 VDJCAlignerParameters (com.milaboratory.mixcr.vdjaligners.VDJCAlignerParameters)6 GeneFeature (io.repseq.core.GeneFeature)6 VDJCLibrary (io.repseq.core.VDJCLibrary)6 GeneType (io.repseq.core.GeneType)5 VDJCLibraryRegistry (io.repseq.core.VDJCLibraryRegistry)5 SingleRead (com.milaboratory.core.io.sequence.SingleRead)3 NSequenceWithQuality (com.milaboratory.core.sequence.NSequenceWithQuality)3 ReferencePoint (io.repseq.core.ReferencePoint)3 VDJCGeneId (io.repseq.core.VDJCGeneId)3 ByteArrayOutputStream (java.io.ByteArrayOutputStream)3 Pattern (java.util.regex.Pattern)3 ParallelProcessor (cc.redberry.pipe.blocks.ParallelProcessor)2 AlignmentHit (com.milaboratory.core.alignment.batch.AlignmentHit)2 PairedRead (com.milaboratory.core.io.sequence.PairedRead)2