Search in sources :

Example 21 with GeneFeature

use of io.repseq.core.GeneFeature in project repseqio by repseqio.

the class TsvAction method go.

@Override
public void go(ActionHelper helper) throws Exception {
    VDJCLibraryRegistry reg = VDJCLibraryRegistry.getDefault();
    if (!"default".equals(params.getInput()))
        reg.registerLibraries(params.getInput());
    else
        reg.loadAllLibraries("default");
    Pattern chainPattern = params.chain == null ? null : Pattern.compile(params.chain);
    Pattern namePattern = params.name == null ? null : Pattern.compile(params.name);
    Long taxonFilter = params.taxonId;
    if (taxonFilter == null && params.species != null)
        taxonFilter = reg.resolveSpecies(params.species);
    try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(params.getOutputStream(), StandardCharsets.UTF_8))) {
        writer.write("Gene\tChains\tFeature\tStart\tStop\tSource\tSequence\n");
        for (VDJCLibrary lib : reg.getLoadedLibraries()) {
            if (taxonFilter != null && taxonFilter != lib.getTaxonId())
                continue;
            for (VDJCGene gene : lib.getGenes()) {
                if (chainPattern != null) {
                    boolean y = false;
                    for (String s : gene.getChains()) if (y |= chainPattern.matcher(s).matches())
                        break;
                    if (!y)
                        continue;
                }
                if (namePattern != null && !namePattern.matcher(gene.getName()).matches())
                    continue;
                for (GeneFeatureWithOriginalName feature : params.features) {
                    GeneFeature geneFeature = feature.feature;
                    NucleotideSequence featureSequence = gene.getFeature(geneFeature);
                    if (featureSequence == null)
                        continue;
                    // Don't output start and end positions for composite gene features
                    Long start = geneFeature.isComposite() ? null : gene.getData().getAnchorPoints().get(geneFeature.getFirstPoint());
                    Long end = geneFeature.isComposite() ? null : gene.getData().getAnchorPoints().get(geneFeature.getLastPoint());
                    NucleotideSequence nSequence = gene.getFeature(geneFeature);
                    List<String> tokens = Arrays.asList(gene.getGeneName(), gene.getChains().toString(), feature.originalName, // (so essentially 1-based inclusive). Report both as 1-based.
                    (start == null ? "" : params.isOneBased() ? String.valueOf(start + 1) : String.valueOf(start)), (end == null ? "" : String.valueOf(end)), gene.getData().getBaseSequence().getOrigin().toString(), nSequence.toString());
                    String delim = "";
                    for (String t : tokens) {
                        writer.write(delim);
                        writer.write(t);
                        delim = "\t";
                    }
                    writer.write('\n');
                }
            }
        }
    }
}
Also used : Pattern(java.util.regex.Pattern) GeneFeature(io.repseq.core.GeneFeature) GeneFeatureWithOriginalName(io.repseq.cli.CLIUtils.GeneFeatureWithOriginalName) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence) VDJCLibrary(io.repseq.core.VDJCLibrary) VDJCGene(io.repseq.core.VDJCGene) VDJCLibraryRegistry(io.repseq.core.VDJCLibraryRegistry)

Example 22 with GeneFeature

use of io.repseq.core.GeneFeature in project repseqio by repseqio.

the class DebugAction method go.

@Override
public void go(ActionHelper helper) throws Exception {
    VDJCLibraryRegistry reg = VDJCLibraryRegistry.getDefault();
    reg.registerLibraries(params.getInput());
    Pattern namePattern = params.name == null ? null : Pattern.compile(params.name);
    GeneFeature cdr3FirstTriplet = new GeneFeature(ReferencePoint.CDR3Begin, 0, 3);
    GeneFeature cdr3LastTriplet = new GeneFeature(ReferencePoint.CDR3End, -3, 0);
    GeneFeature vIntronDonor = new GeneFeature(ReferencePoint.VIntronBegin, 0, 2);
    GeneFeature vIntronAcceptor = new GeneFeature(ReferencePoint.VIntronEnd, -2, 0);
    for (VDJCLibrary lib : reg.getLoadedLibraries()) {
        for (VDJCGene gene : lib.getGenes()) {
            if (namePattern != null && !namePattern.matcher(gene.getName()).matches())
                continue;
            // First generate list of warning messages
            List<String> warnings = new ArrayList<>();
            if (gene.isFunctional() || params.getCheckAll()) {
                NucleotideSequence l3;
                switch(gene.getGeneType()) {
                    case Variable:
                        // Flag AA residues flanking CDR3
                        l3 = gene.getFeature(cdr3FirstTriplet);
                        if (l3 == null)
                            warnings.add("Unable to find CDR3 start");
                        else if (l3.size() != 3)
                            warnings.add("Unable to translate sequence: " + gene.getName() + " / " + l3);
                        else if (AminoAcidSequence.translate(l3).codeAt(0) != AminoAcidAlphabet.C)
                            warnings.add("CDR3 does not start with C, was: " + l3.toString() + " / " + AminoAcidSequence.translate(l3).toString() + " / CDR3Begin: " + gene.getData().getAnchorPoints().get(ReferencePoint.CDR3Begin));
                        // Flag suspicious exon borders
                        // https://schneider.ncifcrf.gov/gallery/SequenceLogoSculpture.gif
                        NucleotideSequence vIntronDonorSeq = gene.getFeature(vIntronDonor);
                        if (vIntronDonorSeq != null && !vIntronDonorSeq.toString().equals("GT") && !vIntronDonorSeq.toString().equals("GC"))
                            warnings.add("Expected VIntron sequence to start with GT, was: " + vIntronDonorSeq.toString());
                        NucleotideSequence vIntronAcceptorSeq = gene.getFeature(vIntronAcceptor);
                        if (vIntronAcceptorSeq != null && !vIntronAcceptorSeq.toString().equals("AG"))
                            warnings.add("Expected VIntron sequence to end with AG, was: " + vIntronAcceptorSeq.toString());
                        ReferencePoints partitioning = gene.getPartitioning();
                        if (partitioning.isAvailable(GeneFeature.VTranscriptWithout5UTR)) {
                            // Iterating over all reading-frame bound anchor points of V gene
                            for (ReferencePoint anchorPoint : ReferencePoint.DefaultReferencePoints) {
                                if (anchorPoint.getGeneType() != GeneType.Variable || !anchorPoint.isTripletBoundary())
                                    continue;
                                // And checking that they are in the same frame as Start (L1Begin)
                                int relativePosition = partitioning.getRelativePosition(GeneFeature.VTranscriptWithout5UTR, anchorPoint);
                                if (// Point is defined
                                relativePosition >= 0 && relativePosition % 3 != 0)
                                    warnings.add("Expected " + anchorPoint + " to have position dividable by three inside VTranscriptWithout5UTR. " + "This may indicate an error in the L2 boundaries.");
                            }
                        }
                        break;
                    case Joining:
                        // Flag AA residues flanking CDR3
                        l3 = gene.getFeature(cdr3LastTriplet);
                        if (l3 == null)
                            warnings.add("Unable to find CDR3 end");
                        else if (l3.size() != 3)
                            warnings.add("Unable to translate sequence: " + gene.getName() + " / " + l3);
                        else if (AminoAcidSequence.translate(l3).codeAt(0) != AminoAcidAlphabet.W && AminoAcidSequence.translate(l3).codeAt(0) != AminoAcidAlphabet.F)
                            warnings.add("CDR3 does not end with W or F, was: " + l3.toString() + " / " + AminoAcidSequence.translate(l3).toString() + " / CDR3End: " + gene.getData().getAnchorPoints().get(ReferencePoint.CDR3End));
                        break;
                }
                for (GeneFeature geneFeature : aaGeneFeatures.get(gene.getGeneType())) {
                    AminoAcidSequence aaSequence = getAminoAcidSequence(gene, geneFeature, gene.getFeature(geneFeature));
                    if (aaSequence != null) {
                        // Flag if contains stop codon
                        if (aaSequence.numberOfStops() > 0)
                            warnings.add(GeneFeature.encode(geneFeature) + " contains a stop codon");
                    }
                }
            }
            if (params.getProblemOnly() && warnings.isEmpty())
                continue;
            System.out.println(gene.getName() + " (" + (gene.isFunctional() ? "F" : "P") + ") " + gene.getChains() + " : " + lib.getTaxonId());
            if (!warnings.isEmpty()) {
                System.out.println();
                System.out.println("WARNINGS: ");
                for (String warning : warnings) {
                    System.out.println(warning);
                }
                System.out.println();
            }
            for (GeneFeature geneFeature : geneFeatures.get(gene.getGeneType())) {
                System.out.println();
                System.out.println(GeneFeature.encode(geneFeature));
                NucleotideSequence nSequence = gene.getFeature(geneFeature);
                AminoAcidSequence aaSequence = getAminoAcidSequence(gene, geneFeature, nSequence);
                System.out.print("N:   ");
                if (nSequence == null)
                    System.out.println("Not Available");
                else
                    System.out.println(nSequence);
                if (GeneFeature.getFrameReference(geneFeature) != null) {
                    System.out.print("AA:  ");
                    if (aaSequence == null)
                        System.out.println("Not Available");
                    else
                        System.out.println(aaSequence);
                }
            }
            System.out.println("=========");
            System.out.println();
        }
    }
}
Also used : Pattern(java.util.regex.Pattern) GeneFeature(io.repseq.core.GeneFeature) ArrayList(java.util.ArrayList) ReferencePoints(io.repseq.core.ReferencePoints) ReferencePoint(io.repseq.core.ReferencePoint) ReferencePoint(io.repseq.core.ReferencePoint) AminoAcidSequence(com.milaboratory.core.sequence.AminoAcidSequence) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence) VDJCLibrary(io.repseq.core.VDJCLibrary) VDJCGene(io.repseq.core.VDJCGene) VDJCLibraryRegistry(io.repseq.core.VDJCLibraryRegistry)

Example 23 with GeneFeature

use of io.repseq.core.GeneFeature in project repseqio by repseqio.

the class FastaAction method go.

@Override
public void go(ActionHelper helper) throws Exception {
    GeneFeature geneFeature = params.getGeneFeature();
    VDJCLibraryRegistry reg = VDJCLibraryRegistry.getDefault();
    if (!"default".equals(params.getInput()))
        reg.registerLibraries(params.getInput());
    else
        reg.loadAllLibraries("default");
    Pattern chainPattern = params.chain == null ? null : Pattern.compile(params.chain);
    Pattern namePattern = params.name == null ? null : Pattern.compile(params.name);
    Long taxonFilter = params.taxonId;
    if (taxonFilter == null && params.species != null)
        taxonFilter = reg.resolveSpecies(params.species);
    try (FastaWriter<NucleotideSequence> writer = CLIUtils.createSingleFastaWriter(params.getOutput())) {
        for (VDJCLibrary lib : reg.getLoadedLibraries()) {
            if (taxonFilter != null && taxonFilter != lib.getTaxonId())
                continue;
            for (VDJCGene gene : lib.getGenes()) {
                if (chainPattern != null) {
                    boolean y = false;
                    for (String s : gene.getChains()) if (y |= chainPattern.matcher(s).matches())
                        break;
                    if (!y)
                        continue;
                }
                if (namePattern != null && !namePattern.matcher(gene.getName()).matches())
                    continue;
                NucleotideSequence featureSequence = gene.getFeature(geneFeature);
                if (featureSequence == null)
                    continue;
                writer.write(gene.getName() + "|" + (gene.isFunctional() ? "F" : "P") + "|taxonId=" + gene.getParentLibrary().getTaxonId(), featureSequence);
            }
        }
    }
}
Also used : GeneFeature(io.repseq.core.GeneFeature) Pattern(java.util.regex.Pattern) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence) VDJCLibrary(io.repseq.core.VDJCLibrary) VDJCGene(io.repseq.core.VDJCGene) VDJCLibraryRegistry(io.repseq.core.VDJCLibraryRegistry)

Example 24 with GeneFeature

use of io.repseq.core.GeneFeature in project mixcr by milaboratory.

the class FullSeqAssemblerTest method testLargeCloneNoMismatches.

@Test
public void testLargeCloneNoMismatches() throws Exception {
    MasterSequence master = FullSeqAssemblerTest.masterSeq1WT;
    NSequenceWithQuality seq = new NSequenceWithQuality(master.getRange(-master.vPart + 10, 80), SequenceQuality.GOOD_QUALITY_VALUE);
    RunMiXCR.RunMiXCRAnalysis params0 = new RunMiXCR.RunMiXCRAnalysis(new SingleReadImpl(0, seq, ""));
    params0.cloneAssemblerParameters.setAssemblingFeatures(new GeneFeature[] { GeneFeature.VDJRegion });
    Clone largeClone = RunMiXCR.assemble(RunMiXCR.align(params0)).cloneSet.get(0);
    // ActionExportClonesPretty.outputCompact(System.out, largeClone);
    // System.exit(0);
    Well44497b rnd = new Well44497b(1234567889L);
    int nReads = 100_000;
    int readLength = 75, readGap = 150;
    // slice seq randomly
    PairedRead[] slicedReads = new PairedRead[nReads];
    for (int i = 0; i < nReads; ++i) {
        int r1from = rnd.nextInt(seq.size() - readLength - 1), r1to = r1from + readLength, r2from = r1from + 1 + rnd.nextInt(seq.size() - r1from - readLength - 1), r2to = r2from + readLength;
        assert r2from > r1from;
        slicedReads[i] = new PairedRead(new SingleReadImpl(i, seq.getRange(r1from, r1to), "" + i), new SingleReadImpl(i, seq.getRange(r2from, r2to).getReverseComplement(), "" + i));
    }
    RunMiXCR.RunMiXCRAnalysis params = new RunMiXCR.RunMiXCRAnalysis(slicedReads);
    // params.alignerParameters = VDJCParametersPresets.getByName("rna-seq");
    params.alignerParameters.setSaveOriginalReads(true);
    RunMiXCR.AlignResult align = RunMiXCR.align(params);
    RunMiXCR.AssembleResult assemble = RunMiXCR.assemble(align);
    // for (VDJCAlignments al : align.alignments) {
    // for (int i = 0; i < al.numberOfTargets(); i++) {
    // System.out.println(VDJCAlignmentsFormatter.getTargetAsMultiAlignment(al, i));
    // System.out.println();
    // }
    // System.out.println();
    // System.out.println(" ================================================ ");
    // System.out.println();
    // }
    Assert.assertEquals(1, assemble.cloneSet.size());
    Clone initialClone = assemble.cloneSet.get(0);
    NSequenceWithQuality cdr3 = initialClone.getFeature(GeneFeature.CDR3);
    List<VDJCAlignments> alignments = align.alignments.stream().filter(al -> cdr3.equals(al.getFeature(GeneFeature.CDR3))).collect(Collectors.toList());
    alignments.stream().filter(al -> Arrays.stream(al.getBestHit(GeneType.Variable).getAlignments()).filter(Objects::nonNull).anyMatch(a -> !a.getAbsoluteMutations().isEmpty())).filter(al -> al.getBestHit(GeneType.Variable).getGene().getName().contains("3-74")).forEach(al -> {
        for (int i = 0; i < al.numberOfTargets(); i++) {
            System.out.println(VDJCAlignmentsFormatter.getTargetAsMultiAlignment(al, i));
            System.out.println();
        }
        System.out.println();
        System.out.println(" ================================================ ");
        System.out.println();
    });
    // System.exit(0);
    System.out.println("=> Agg");
    CloneFactory cloneFactory = new CloneFactory(align.parameters.cloneAssemblerParameters.getCloneFactoryParameters(), align.parameters.cloneAssemblerParameters.getAssemblingFeatures(), align.usedGenes, align.parameters.alignerParameters.getFeaturesToAlignMap());
    FullSeqAssembler agg = new FullSeqAssembler(cloneFactory, DEFAULT_PARAMETERS, initialClone, align.parameters.alignerParameters);
    FullSeqAssembler.RawVariantsData prep = agg.calculateRawData(() -> CUtils.asOutputPort(alignments));
    List<Clone> clones = new ArrayList<>(new CloneSet(Arrays.asList(agg.callVariants(prep))).getClones());
    clones.sort(Comparator.comparingDouble(Clone::getCount).reversed());
    for (Clone clone : clones) {
        ActionExportClonesPretty.outputCompact(System.out, clone);
        System.out.println();
        System.out.println(" ================================================ ");
        System.out.println();
    }
}
Also used : java.util(java.util) SequenceRead(com.milaboratory.core.io.sequence.SequenceRead) Well44497b(org.apache.commons.math3.random.Well44497b) SequenceQuality(com.milaboratory.core.sequence.SequenceQuality) Clone(com.milaboratory.mixcr.basictypes.Clone) GeneFeature(io.repseq.core.GeneFeature) CloneFactory(com.milaboratory.mixcr.assembler.CloneFactory) Main(com.milaboratory.mixcr.cli.Main) StreamSupport(java.util.stream.StreamSupport) PairedRead(com.milaboratory.core.io.sequence.PairedRead) RunMiXCR(com.milaboratory.mixcr.util.RunMiXCR) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence) VDJCAlignmentsFormatter(com.milaboratory.mixcr.basictypes.VDJCAlignmentsFormatter) CUtils(cc.redberry.pipe.CUtils) Test(org.junit.Test) Collectors(java.util.stream.Collectors) TIntHashSet(gnu.trove.set.hash.TIntHashSet) RandomDataGenerator(org.apache.commons.math3.random.RandomDataGenerator) Well19937c(org.apache.commons.math3.random.Well19937c) CloneSet(com.milaboratory.mixcr.basictypes.CloneSet) SingleReadImpl(com.milaboratory.core.io.sequence.SingleReadImpl) GeneType(io.repseq.core.GeneType) NSequenceWithQuality(com.milaboratory.core.sequence.NSequenceWithQuality) ActionExportClonesPretty(com.milaboratory.mixcr.cli.ActionExportClonesPretty) VDJCParametersPresets(com.milaboratory.mixcr.vdjaligners.VDJCParametersPresets) Assert(org.junit.Assert) SequenceReaderCloseable(com.milaboratory.core.io.sequence.SequenceReaderCloseable) VDJCAlignments(com.milaboratory.mixcr.basictypes.VDJCAlignments) SingleReadImpl(com.milaboratory.core.io.sequence.SingleReadImpl) PairedRead(com.milaboratory.core.io.sequence.PairedRead) CloneSet(com.milaboratory.mixcr.basictypes.CloneSet) NSequenceWithQuality(com.milaboratory.core.sequence.NSequenceWithQuality) Well44497b(org.apache.commons.math3.random.Well44497b) RunMiXCR(com.milaboratory.mixcr.util.RunMiXCR) VDJCAlignments(com.milaboratory.mixcr.basictypes.VDJCAlignments) CloneFactory(com.milaboratory.mixcr.assembler.CloneFactory) Clone(com.milaboratory.mixcr.basictypes.Clone) Test(org.junit.Test)

Example 25 with GeneFeature

use of io.repseq.core.GeneFeature in project mixcr by milaboratory.

the class TargetMerger method merge.

@SuppressWarnings("unchecked")
public AlignedTarget merge(AlignedTarget targetLeft, AlignedTarget targetRight, int offset, OverlapType overlapType, int nMismatches) {
    if (offset < 0)
        return merge(targetRight, targetLeft, -offset, overlapType, nMismatches);
    final NSequenceWithQuality mergedTarget = merger.overlap(targetLeft.getTarget(), targetRight.getTarget(), offset);
    EnumMap<GeneType, VDJCHit[]> result = new EnumMap<>(GeneType.class);
    for (GeneType geneType : GeneType.VJC_REFERENCE) {
        final BatchAlignerWithBaseParameters bp = ((KGeneAlignmentParameters) alignerParameters.getGeneAlignerParameters(geneType)).getParameters();
        final VDJCHit[] leftHits = targetLeft.getAlignments().getHits(geneType);
        final VDJCHit[] rightHits = targetRight.getAlignments().getHits(geneType);
        GeneFeature alignedFeature = leftHits.length == 0 ? rightHits.length == 0 ? null : rightHits[0].getAlignedFeature() : leftHits[0].getAlignedFeature();
        Map<VDJCGeneId, HitMappingRecord> map = extractHitsMapping(targetLeft, targetRight, geneType);
        ArrayList<VDJCHit> resultingHits = new ArrayList<>();
        for (Map.Entry<VDJCGeneId, HitMappingRecord> mE : map.entrySet()) {
            final VDJCGene gene = mE.getValue().gene;
            Alignment<NucleotideSequence> mergedAl = merge(bp.getScoring(), extractBandedWidth(bp), mergedTarget.getSequence(), offset, mE.getValue().alignments[0], mE.getValue().alignments[1]);
            resultingHits.add(new VDJCHit(gene, mergedAl, alignedFeature));
        }
        Collections.sort(resultingHits);
        // final float relativeMinScore = extractRelativeMinScore(bp);
        // int threshold = (int) (resultingHits.size() > 0 ? resultingHits.get(0).getScore() * relativeMinScore : 0);
        // for (int i = resultingHits.size() - 1; i > 0; --i)
        // if (resultingHits.get(i).getScore() < threshold)
        // resultingHits.remove(i);
        result.put(geneType, resultingHits.toArray(new VDJCHit[resultingHits.size()]));
    }
    VDJCAlignments alignments = new VDJCAlignments(result, new NSequenceWithQuality[] { mergedTarget }, new SequenceHistory[] { new SequenceHistory.Merge(overlapType, targetLeft.getHistory(), targetRight.getHistory(), offset, nMismatches) }, VDJCAlignments.mergeOriginalReads(targetLeft.getAlignments(), targetRight.getAlignments()));
    AlignedTarget resultTarget = new AlignedTarget(alignments, 0);
    for (BPoint bPoint : BPoint.values()) {
        int leftPoint = targetLeft.getBPoint(bPoint);
        int rightPoint = targetRight.getBPoint(bPoint);
        if (leftPoint != -1 && rightPoint != -1)
            throw new IllegalArgumentException("Same bPoint defined in both input targets.");
        else if (leftPoint != -1)
            resultTarget = resultTarget.setBPoint(bPoint, leftPoint);
        else if (rightPoint != -1)
            resultTarget = resultTarget.setBPoint(bPoint, offset + rightPoint);
    }
    return resultTarget;
}
Also used : GeneFeature(io.repseq.core.GeneFeature) SequenceHistory(com.milaboratory.mixcr.basictypes.SequenceHistory) VDJCGeneId(io.repseq.core.VDJCGeneId) NSequenceWithQuality(com.milaboratory.core.sequence.NSequenceWithQuality) VDJCAlignments(com.milaboratory.mixcr.basictypes.VDJCAlignments) BatchAlignerWithBaseParameters(com.milaboratory.core.alignment.batch.BatchAlignerWithBaseParameters) KGeneAlignmentParameters(com.milaboratory.mixcr.vdjaligners.KGeneAlignmentParameters) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence) VDJCGene(io.repseq.core.VDJCGene) GeneType(io.repseq.core.GeneType) VDJCHit(com.milaboratory.mixcr.basictypes.VDJCHit)

Aggregations

GeneFeature (io.repseq.core.GeneFeature)41 Test (org.junit.Test)23 NucleotideSequence (com.milaboratory.core.sequence.NucleotideSequence)10 GeneType (io.repseq.core.GeneType)9 VDJCGene (io.repseq.core.VDJCGene)6 NSequenceWithQuality (com.milaboratory.core.sequence.NSequenceWithQuality)3 VDJCAlignments (com.milaboratory.mixcr.basictypes.VDJCAlignments)3 ReferencePoint (io.repseq.core.ReferencePoint)3 VDJCLibrary (io.repseq.core.VDJCLibrary)3 VDJCLibraryRegistry (io.repseq.core.VDJCLibraryRegistry)3 Pattern (java.util.regex.Pattern)3 SequenceRead (com.milaboratory.core.io.sequence.SequenceRead)2 AminoAcidSequence (com.milaboratory.core.sequence.AminoAcidSequence)2 Clone (com.milaboratory.mixcr.basictypes.Clone)2 VDJCHit (com.milaboratory.mixcr.basictypes.VDJCHit)2 VDJCAlignerParameters (com.milaboratory.mixcr.vdjaligners.VDJCAlignerParameters)2 IntArrayList (com.milaboratory.util.IntArrayList)2 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 Well19937c (org.apache.commons.math3.random.Well19937c)2