Search in sources :

Example 6 with VDJCLibrary

use of io.repseq.core.VDJCLibrary in project repseqio by repseqio.

the class DebugAction method go.

@Override
public void go(ActionHelper helper) throws Exception {
    VDJCLibraryRegistry reg = VDJCLibraryRegistry.getDefault();
    reg.registerLibraries(params.getInput());
    Pattern namePattern = params.name == null ? null : Pattern.compile(params.name);
    GeneFeature cdr3FirstTriplet = new GeneFeature(ReferencePoint.CDR3Begin, 0, 3);
    GeneFeature cdr3LastTriplet = new GeneFeature(ReferencePoint.CDR3End, -3, 0);
    GeneFeature vIntronDonor = new GeneFeature(ReferencePoint.VIntronBegin, 0, 2);
    GeneFeature vIntronAcceptor = new GeneFeature(ReferencePoint.VIntronEnd, -2, 0);
    for (VDJCLibrary lib : reg.getLoadedLibraries()) {
        for (VDJCGene gene : lib.getGenes()) {
            if (namePattern != null && !namePattern.matcher(gene.getName()).matches())
                continue;
            // First generate list of warning messages
            List<String> warnings = new ArrayList<>();
            if (gene.isFunctional() || params.getCheckAll()) {
                NucleotideSequence l3;
                switch(gene.getGeneType()) {
                    case Variable:
                        // Flag AA residues flanking CDR3
                        l3 = gene.getFeature(cdr3FirstTriplet);
                        if (l3 == null)
                            warnings.add("Unable to find CDR3 start");
                        else if (l3.size() != 3)
                            warnings.add("Unable to translate sequence: " + gene.getName() + " / " + l3);
                        else if (AminoAcidSequence.translate(l3).codeAt(0) != AminoAcidAlphabet.C)
                            warnings.add("CDR3 does not start with C, was: " + l3.toString() + " / " + AminoAcidSequence.translate(l3).toString() + " / CDR3Begin: " + gene.getData().getAnchorPoints().get(ReferencePoint.CDR3Begin));
                        // Flag suspicious exon borders
                        // https://schneider.ncifcrf.gov/gallery/SequenceLogoSculpture.gif
                        NucleotideSequence vIntronDonorSeq = gene.getFeature(vIntronDonor);
                        if (vIntronDonorSeq != null && !vIntronDonorSeq.toString().equals("GT") && !vIntronDonorSeq.toString().equals("GC"))
                            warnings.add("Expected VIntron sequence to start with GT, was: " + vIntronDonorSeq.toString());
                        NucleotideSequence vIntronAcceptorSeq = gene.getFeature(vIntronAcceptor);
                        if (vIntronAcceptorSeq != null && !vIntronAcceptorSeq.toString().equals("AG"))
                            warnings.add("Expected VIntron sequence to end with AG, was: " + vIntronAcceptorSeq.toString());
                        ReferencePoints partitioning = gene.getPartitioning();
                        if (partitioning.isAvailable(GeneFeature.VTranscriptWithout5UTR)) {
                            // Iterating over all reading-frame bound anchor points of V gene
                            for (ReferencePoint anchorPoint : ReferencePoint.DefaultReferencePoints) {
                                if (anchorPoint.getGeneType() != GeneType.Variable || !anchorPoint.isTripletBoundary())
                                    continue;
                                // And checking that they are in the same frame as Start (L1Begin)
                                int relativePosition = partitioning.getRelativePosition(GeneFeature.VTranscriptWithout5UTR, anchorPoint);
                                if (// Point is defined
                                relativePosition >= 0 && relativePosition % 3 != 0)
                                    warnings.add("Expected " + anchorPoint + " to have position dividable by three inside VTranscriptWithout5UTR. " + "This may indicate an error in the L2 boundaries.");
                            }
                        }
                        break;
                    case Joining:
                        // Flag AA residues flanking CDR3
                        l3 = gene.getFeature(cdr3LastTriplet);
                        if (l3 == null)
                            warnings.add("Unable to find CDR3 end");
                        else if (l3.size() != 3)
                            warnings.add("Unable to translate sequence: " + gene.getName() + " / " + l3);
                        else if (AminoAcidSequence.translate(l3).codeAt(0) != AminoAcidAlphabet.W && AminoAcidSequence.translate(l3).codeAt(0) != AminoAcidAlphabet.F)
                            warnings.add("CDR3 does not end with W or F, was: " + l3.toString() + " / " + AminoAcidSequence.translate(l3).toString() + " / CDR3End: " + gene.getData().getAnchorPoints().get(ReferencePoint.CDR3End));
                        break;
                }
                for (GeneFeature geneFeature : aaGeneFeatures.get(gene.getGeneType())) {
                    AminoAcidSequence aaSequence = getAminoAcidSequence(gene, geneFeature, gene.getFeature(geneFeature));
                    if (aaSequence != null) {
                        // Flag if contains stop codon
                        if (aaSequence.numberOfStops() > 0)
                            warnings.add(GeneFeature.encode(geneFeature) + " contains a stop codon");
                    }
                }
            }
            if (params.getProblemOnly() && warnings.isEmpty())
                continue;
            System.out.println(gene.getName() + " (" + (gene.isFunctional() ? "F" : "P") + ") " + gene.getChains() + " : " + lib.getTaxonId());
            if (!warnings.isEmpty()) {
                System.out.println();
                System.out.println("WARNINGS: ");
                for (String warning : warnings) {
                    System.out.println(warning);
                }
                System.out.println();
            }
            for (GeneFeature geneFeature : geneFeatures.get(gene.getGeneType())) {
                System.out.println();
                System.out.println(GeneFeature.encode(geneFeature));
                NucleotideSequence nSequence = gene.getFeature(geneFeature);
                AminoAcidSequence aaSequence = getAminoAcidSequence(gene, geneFeature, nSequence);
                System.out.print("N:   ");
                if (nSequence == null)
                    System.out.println("Not Available");
                else
                    System.out.println(nSequence);
                if (GeneFeature.getFrameReference(geneFeature) != null) {
                    System.out.print("AA:  ");
                    if (aaSequence == null)
                        System.out.println("Not Available");
                    else
                        System.out.println(aaSequence);
                }
            }
            System.out.println("=========");
            System.out.println();
        }
    }
}
Also used : Pattern(java.util.regex.Pattern) GeneFeature(io.repseq.core.GeneFeature) ArrayList(java.util.ArrayList) ReferencePoints(io.repseq.core.ReferencePoints) ReferencePoint(io.repseq.core.ReferencePoint) ReferencePoint(io.repseq.core.ReferencePoint) AminoAcidSequence(com.milaboratory.core.sequence.AminoAcidSequence) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence) VDJCLibrary(io.repseq.core.VDJCLibrary) VDJCGene(io.repseq.core.VDJCGene) VDJCLibraryRegistry(io.repseq.core.VDJCLibraryRegistry)

Example 7 with VDJCLibrary

use of io.repseq.core.VDJCLibrary in project repseqio by repseqio.

the class FastaAction method go.

@Override
public void go(ActionHelper helper) throws Exception {
    GeneFeature geneFeature = params.getGeneFeature();
    VDJCLibraryRegistry reg = VDJCLibraryRegistry.getDefault();
    if (!"default".equals(params.getInput()))
        reg.registerLibraries(params.getInput());
    else
        reg.loadAllLibraries("default");
    Pattern chainPattern = params.chain == null ? null : Pattern.compile(params.chain);
    Pattern namePattern = params.name == null ? null : Pattern.compile(params.name);
    Long taxonFilter = params.taxonId;
    if (taxonFilter == null && params.species != null)
        taxonFilter = reg.resolveSpecies(params.species);
    try (FastaWriter<NucleotideSequence> writer = CLIUtils.createSingleFastaWriter(params.getOutput())) {
        for (VDJCLibrary lib : reg.getLoadedLibraries()) {
            if (taxonFilter != null && taxonFilter != lib.getTaxonId())
                continue;
            for (VDJCGene gene : lib.getGenes()) {
                if (chainPattern != null) {
                    boolean y = false;
                    for (String s : gene.getChains()) if (y |= chainPattern.matcher(s).matches())
                        break;
                    if (!y)
                        continue;
                }
                if (namePattern != null && !namePattern.matcher(gene.getName()).matches())
                    continue;
                NucleotideSequence featureSequence = gene.getFeature(geneFeature);
                if (featureSequence == null)
                    continue;
                writer.write(gene.getName() + "|" + (gene.isFunctional() ? "F" : "P") + "|taxonId=" + gene.getParentLibrary().getTaxonId(), featureSequence);
            }
        }
    }
}
Also used : GeneFeature(io.repseq.core.GeneFeature) Pattern(java.util.regex.Pattern) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence) VDJCLibrary(io.repseq.core.VDJCLibrary) VDJCGene(io.repseq.core.VDJCGene) VDJCLibraryRegistry(io.repseq.core.VDJCLibraryRegistry)

Example 8 with VDJCLibrary

use of io.repseq.core.VDJCLibrary in project mixcr by milaboratory.

the class PartialAlignmentsAssemblerAlignerTest method basicTest1.

@Test
public void basicTest1() throws Exception {
    Well44497b rg = new Well44497b(12312);
    VDJCAlignerParameters rnaSeqParams = VDJCParametersPresets.getByName("rna-seq");
    PartialAlignmentsAssemblerAligner aligner = new PartialAlignmentsAssemblerAligner(rnaSeqParams);
    VDJCLibrary lib = VDJCLibraryRegistry.getDefault().getLibrary("default", "hs");
    for (VDJCGene gene : VDJCLibraryRegistry.getDefault().getLibrary("default", "hs").getGenes()) if (gene.isFunctional())
        aligner.addGene(gene);
    TargetBuilder.VDJCGenes genes = new TargetBuilder.VDJCGenes(lib, "TRBV12-3*00", "TRBD1*00", "TRBJ1-3*00", "TRBC2*00");
    // | 305
    // 250V + 55CDR3 (20V 7N 10D 3N 15J) + 28J + 100C
    NucleotideSequence baseSeq = TargetBuilder.generateSequence(genes, "{CDR3Begin(-250)}V*270 NNNNNNN {DBegin(0)}D*10 NNN {CDR3End(-15):FR4End} {CBegin}C*100", rg);
    int len = 70;
    NucleotideSequence seq1 = baseSeq.getRange(0, len);
    NucleotideSequence seq2 = baseSeq.getRange(245, 245 + len);
    NucleotideSequence seq3 = baseSeq.getRange(320, 320 + len);
    VDJCAlignmentResult<VDJCMultiRead> alignment = aligner.process(MiXCRTestUtils.createMultiRead(seq1, seq2, seq3));
    VDJCAlignments al = alignment.alignment;
    Assert.assertNotNull(al);
    assertInHits(genes.v, al);
    assertInHits(genes.d, al);
    assertInHits(genes.j, al);
    assertInHits(genes.c, al);
    VDJCHit bestV = al.getBestHit(GeneType.Variable);
    VDJCHit bestD = al.getBestHit(GeneType.Diversity);
    VDJCHit bestJ = al.getBestHit(GeneType.Joining);
    VDJCHit bestC = al.getBestHit(GeneType.Constant);
    Assert.assertNotNull(bestV.getAlignment(0));
    Assert.assertNotNull(bestV.getAlignment(1));
    Assert.assertNull(bestV.getAlignment(2));
    Assert.assertNull(bestD.getAlignment(0));
    Assert.assertNotNull(bestD.getAlignment(1));
    Assert.assertNull(bestD.getAlignment(2));
    Assert.assertNull(bestJ.getAlignment(0));
    Assert.assertNotNull(bestJ.getAlignment(1));
    Assert.assertNotNull(bestJ.getAlignment(2));
    Assert.assertNull(bestC.getAlignment(0));
    Assert.assertNull(bestC.getAlignment(1));
    Assert.assertNotNull(bestC.getAlignment(2));
}
Also used : VDJCAlignerParameters(com.milaboratory.mixcr.vdjaligners.VDJCAlignerParameters) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence) Well44497b(org.apache.commons.math3.random.Well44497b) VDJCLibrary(io.repseq.core.VDJCLibrary) VDJCGene(io.repseq.core.VDJCGene) TargetBuilder(com.milaboratory.mixcr.tests.TargetBuilder) VDJCAlignments(com.milaboratory.mixcr.basictypes.VDJCAlignments) VDJCHit(com.milaboratory.mixcr.basictypes.VDJCHit) Test(org.junit.Test)

Example 9 with VDJCLibrary

use of io.repseq.core.VDJCLibrary in project repseqio by repseqio.

the class CompileAction method compile.

public static void compile(Path source, Path destination, int surroundings) throws IOException {
    VDJCLibraryRegistry.resetDefaultRegistry();
    VDJCLibraryRegistry reg = VDJCLibraryRegistry.getDefault();
    reg.registerLibraries(source, "lib");
    List<VDJCLibraryData> result = new ArrayList<>();
    for (VDJCLibrary lib : reg.getLoadedLibraries()) {
        VDJCDataUtils.FragmentsBuilder fragmentsBuilder = new VDJCDataUtils.FragmentsBuilder();
        for (KnownSequenceFragmentData fragment : lib.getData().getSequenceFragments()) fragmentsBuilder.addRegion(fragment);
        for (VDJCGene gene : lib.getGenes()) {
            if (!gene.getData().getBaseSequence().isPureOriginalSequence())
                throw new IllegalArgumentException("Don't support mutated sequences yet.");
            URI uri = gene.getData().getBaseSequence().getOrigin();
            Range region = gene.getPartitioning().getContainingRegion();
            region = region.expand(surroundings);
            NucleotideSequence seq;
            try {
                seq = gene.getSequenceProvider().getRegion(region);
            } catch (SequenceProviderIndexOutOfBoundsException e) {
                region = e.getAvailableRange();
                if (region == null)
                    throw new IllegalArgumentException("Wrong anchor points for " + gene.getName() + " ?");
                seq = gene.getSequenceProvider().getRegion(region);
            }
            fragmentsBuilder.addRegion(uri, region, seq);
        }
        result.add(new VDJCLibraryData(lib.getTaxonId(), lib.getData().getSpeciesNames(), lib.getData().getGenes(), lib.getData().getMeta(), fragmentsBuilder.getFragments()));
    }
    VDJCDataUtils.writeToFile(result, destination, true);
    log.info("{} compiled successfully.", source);
}
Also used : VDJCDataUtils(io.repseq.dto.VDJCDataUtils) ArrayList(java.util.ArrayList) Range(com.milaboratory.core.Range) URI(java.net.URI) KnownSequenceFragmentData(io.repseq.dto.KnownSequenceFragmentData) SequenceProviderIndexOutOfBoundsException(com.milaboratory.core.sequence.provider.SequenceProviderIndexOutOfBoundsException) VDJCLibraryData(io.repseq.dto.VDJCLibraryData) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence) VDJCLibrary(io.repseq.core.VDJCLibrary) VDJCGene(io.repseq.core.VDJCGene) VDJCLibraryRegistry(io.repseq.core.VDJCLibraryRegistry)

Example 10 with VDJCLibrary

use of io.repseq.core.VDJCLibrary in project repseqio by repseqio.

the class BasicGCloneModel method create.

@Override
public GCloneGenerator create(RandomGenerator random, VDJCLibraryRegistry registry) {
    VDJCLibrary library = registry.getLibrary(vdjcLibrary);
    final IndependentRealGenerator abundanceGenerator = abundanceModel.create(random);
    final Map<String, GGeneGenerator> geneGenerators = new HashMap<>();
    for (Map.Entry<String, GGeneModel> e : geneModels.entrySet()) geneGenerators.put(e.getKey(), e.getValue().create(random, library));
    return new GCloneGenerator() {

        @Override
        public GClone sample() {
            double abundance = abundanceGenerator.generate();
            Map<String, GGene> genes = new HashMap<>();
            for (Map.Entry<String, GGeneGenerator> e : geneGenerators.entrySet()) genes.put(e.getKey(), e.getValue().generate());
            return new GClone(abundance, genes);
        }
    };
}
Also used : HashMap(java.util.HashMap) GGene(io.repseq.gen.GGene) VDJCLibrary(io.repseq.core.VDJCLibrary) Map(java.util.Map) HashMap(java.util.HashMap) GClone(io.repseq.gen.GClone)

Aggregations

VDJCLibrary (io.repseq.core.VDJCLibrary)11 NucleotideSequence (com.milaboratory.core.sequence.NucleotideSequence)7 VDJCGene (io.repseq.core.VDJCGene)6 VDJCLibraryRegistry (io.repseq.core.VDJCLibraryRegistry)5 GeneFeature (io.repseq.core.GeneFeature)3 ArrayList (java.util.ArrayList)3 Pattern (java.util.regex.Pattern)3 Test (org.junit.Test)3 GClone (io.repseq.gen.GClone)2 GGene (io.repseq.gen.GGene)2 Well44497b (org.apache.commons.math3.random.Well44497b)2 TypeReference (com.fasterxml.jackson.core.type.TypeReference)1 ObjectWriter (com.fasterxml.jackson.databind.ObjectWriter)1 JCommanderBasedMain (com.milaboratory.cli.JCommanderBasedMain)1 Range (com.milaboratory.core.Range)1 AminoAcidSequence (com.milaboratory.core.sequence.AminoAcidSequence)1 SequenceProviderIndexOutOfBoundsException (com.milaboratory.core.sequence.provider.SequenceProviderIndexOutOfBoundsException)1 VDJCAlignments (com.milaboratory.mixcr.basictypes.VDJCAlignments)1 VDJCHit (com.milaboratory.mixcr.basictypes.VDJCHit)1 TargetBuilder (com.milaboratory.mixcr.tests.TargetBuilder)1