Search in sources :

Example 1 with VDJCLibraryRegistry

use of io.repseq.core.VDJCLibraryRegistry in project repseqio by repseqio.

the class Main method main.

public static void main(String[] args) throws Exception {
    Signal.handle(new Signal("PIPE"), new SignalHandler() {

        @Override
        public void handle(Signal signal) {
            System.exit(0);
        }
    });
    if (System.getProperty("localOnly") == null) {
        Path cachePath = Paths.get(System.getProperty("user.home"), ".repseqio", "cache");
        SequenceResolvers.initDefaultResolver(cachePath);
    }
    // Setting up main helper
    JCommanderBasedMain main = new JCommanderBasedMain("repseqio", new ListAction(), new FilterAction(), new MergeAction(), new CompileAction(), new GenerateClonesAction(), new NormalizeCloneAbundancesAction(), new ExportCloneSequencesAction(), new FastaAction(), new TsvAction(), new InferAnchorPointsAction(), new DebugAction(), new FormatAction(), new StatAction(), new FromFastaAction(), new FromPaddedFastaAction());
    main.setVersionInfoCallback(new Runnable() {

        @Override
        public void run() {
            VersionInfo milib = VersionInfo.getVersionInfoForArtifact("milib");
            VersionInfo repseqio = VersionInfo.getVersionInfoForArtifact("repseqio");
            StringBuilder builder = new StringBuilder();
            builder.append("RepSeq.IO.CLI v").append(repseqio.getVersion()).append(" (built ").append(repseqio.getTimestamp()).append("; rev=").append(repseqio.getRevision()).append("; branch=").append(repseqio.getBranch()).append("; host=").append(repseqio.getHost()).append(")").append("\n");
            builder.append("MiLib v").append(milib.getVersion()).append(" (rev=").append(milib.getRevision()).append("; branch=").append(milib.getBranch()).append(")").append("\n");
            builder.append("Built-in libraries:\n");
            VDJCLibraryRegistry reg = VDJCLibraryRegistry.createDefaultRegistry();
            reg.loadAllLibraries("default");
            for (VDJCLibrary lib : reg.getLoadedLibraries()) builder.append(lib.getLibraryId()).append("\n");
            System.out.print(builder.toString());
        }
    });
    main.main(args);
}
Also used : Signal(sun.misc.Signal) Path(java.nio.file.Path) VersionInfo(com.milaboratory.util.VersionInfo) JCommanderBasedMain(com.milaboratory.cli.JCommanderBasedMain) SignalHandler(sun.misc.SignalHandler) VDJCLibrary(io.repseq.core.VDJCLibrary) VDJCLibraryRegistry(io.repseq.core.VDJCLibraryRegistry)

Example 2 with VDJCLibraryRegistry

use of io.repseq.core.VDJCLibraryRegistry in project repseqio by repseqio.

the class TsvAction method go.

@Override
public void go(ActionHelper helper) throws Exception {
    VDJCLibraryRegistry reg = VDJCLibraryRegistry.getDefault();
    if (!"default".equals(params.getInput()))
        reg.registerLibraries(params.getInput());
    else
        reg.loadAllLibraries("default");
    Pattern chainPattern = params.chain == null ? null : Pattern.compile(params.chain);
    Pattern namePattern = params.name == null ? null : Pattern.compile(params.name);
    Long taxonFilter = params.taxonId;
    if (taxonFilter == null && params.species != null)
        taxonFilter = reg.resolveSpecies(params.species);
    try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(params.getOutputStream(), StandardCharsets.UTF_8))) {
        writer.write("Gene\tChains\tFeature\tStart\tStop\tSource\tSequence\n");
        for (VDJCLibrary lib : reg.getLoadedLibraries()) {
            if (taxonFilter != null && taxonFilter != lib.getTaxonId())
                continue;
            for (VDJCGene gene : lib.getGenes()) {
                if (chainPattern != null) {
                    boolean y = false;
                    for (String s : gene.getChains()) if (y |= chainPattern.matcher(s).matches())
                        break;
                    if (!y)
                        continue;
                }
                if (namePattern != null && !namePattern.matcher(gene.getName()).matches())
                    continue;
                for (GeneFeatureWithOriginalName feature : params.features) {
                    GeneFeature geneFeature = feature.feature;
                    NucleotideSequence featureSequence = gene.getFeature(geneFeature);
                    if (featureSequence == null)
                        continue;
                    // Don't output start and end positions for composite gene features
                    Long start = geneFeature.isComposite() ? null : gene.getData().getAnchorPoints().get(geneFeature.getFirstPoint());
                    Long end = geneFeature.isComposite() ? null : gene.getData().getAnchorPoints().get(geneFeature.getLastPoint());
                    NucleotideSequence nSequence = gene.getFeature(geneFeature);
                    List<String> tokens = Arrays.asList(gene.getGeneName(), gene.getChains().toString(), feature.originalName, // (so essentially 1-based inclusive). Report both as 1-based.
                    (start == null ? "" : params.isOneBased() ? String.valueOf(start + 1) : String.valueOf(start)), (end == null ? "" : String.valueOf(end)), gene.getData().getBaseSequence().getOrigin().toString(), nSequence.toString());
                    String delim = "";
                    for (String t : tokens) {
                        writer.write(delim);
                        writer.write(t);
                        delim = "\t";
                    }
                    writer.write('\n');
                }
            }
        }
    }
}
Also used : Pattern(java.util.regex.Pattern) GeneFeature(io.repseq.core.GeneFeature) GeneFeatureWithOriginalName(io.repseq.cli.CLIUtils.GeneFeatureWithOriginalName) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence) VDJCLibrary(io.repseq.core.VDJCLibrary) VDJCGene(io.repseq.core.VDJCGene) VDJCLibraryRegistry(io.repseq.core.VDJCLibraryRegistry)

Example 3 with VDJCLibraryRegistry

use of io.repseq.core.VDJCLibraryRegistry in project repseqio by repseqio.

the class DebugAction method go.

@Override
public void go(ActionHelper helper) throws Exception {
    VDJCLibraryRegistry reg = VDJCLibraryRegistry.getDefault();
    reg.registerLibraries(params.getInput());
    Pattern namePattern = params.name == null ? null : Pattern.compile(params.name);
    GeneFeature cdr3FirstTriplet = new GeneFeature(ReferencePoint.CDR3Begin, 0, 3);
    GeneFeature cdr3LastTriplet = new GeneFeature(ReferencePoint.CDR3End, -3, 0);
    GeneFeature vIntronDonor = new GeneFeature(ReferencePoint.VIntronBegin, 0, 2);
    GeneFeature vIntronAcceptor = new GeneFeature(ReferencePoint.VIntronEnd, -2, 0);
    for (VDJCLibrary lib : reg.getLoadedLibraries()) {
        for (VDJCGene gene : lib.getGenes()) {
            if (namePattern != null && !namePattern.matcher(gene.getName()).matches())
                continue;
            // First generate list of warning messages
            List<String> warnings = new ArrayList<>();
            if (gene.isFunctional() || params.getCheckAll()) {
                NucleotideSequence l3;
                switch(gene.getGeneType()) {
                    case Variable:
                        // Flag AA residues flanking CDR3
                        l3 = gene.getFeature(cdr3FirstTriplet);
                        if (l3 == null)
                            warnings.add("Unable to find CDR3 start");
                        else if (l3.size() != 3)
                            warnings.add("Unable to translate sequence: " + gene.getName() + " / " + l3);
                        else if (AminoAcidSequence.translate(l3).codeAt(0) != AminoAcidAlphabet.C)
                            warnings.add("CDR3 does not start with C, was: " + l3.toString() + " / " + AminoAcidSequence.translate(l3).toString() + " / CDR3Begin: " + gene.getData().getAnchorPoints().get(ReferencePoint.CDR3Begin));
                        // Flag suspicious exon borders
                        // https://schneider.ncifcrf.gov/gallery/SequenceLogoSculpture.gif
                        NucleotideSequence vIntronDonorSeq = gene.getFeature(vIntronDonor);
                        if (vIntronDonorSeq != null && !vIntronDonorSeq.toString().equals("GT") && !vIntronDonorSeq.toString().equals("GC"))
                            warnings.add("Expected VIntron sequence to start with GT, was: " + vIntronDonorSeq.toString());
                        NucleotideSequence vIntronAcceptorSeq = gene.getFeature(vIntronAcceptor);
                        if (vIntronAcceptorSeq != null && !vIntronAcceptorSeq.toString().equals("AG"))
                            warnings.add("Expected VIntron sequence to end with AG, was: " + vIntronAcceptorSeq.toString());
                        ReferencePoints partitioning = gene.getPartitioning();
                        if (partitioning.isAvailable(GeneFeature.VTranscriptWithout5UTR)) {
                            // Iterating over all reading-frame bound anchor points of V gene
                            for (ReferencePoint anchorPoint : ReferencePoint.DefaultReferencePoints) {
                                if (anchorPoint.getGeneType() != GeneType.Variable || !anchorPoint.isTripletBoundary())
                                    continue;
                                // And checking that they are in the same frame as Start (L1Begin)
                                int relativePosition = partitioning.getRelativePosition(GeneFeature.VTranscriptWithout5UTR, anchorPoint);
                                if (// Point is defined
                                relativePosition >= 0 && relativePosition % 3 != 0)
                                    warnings.add("Expected " + anchorPoint + " to have position dividable by three inside VTranscriptWithout5UTR. " + "This may indicate an error in the L2 boundaries.");
                            }
                        }
                        break;
                    case Joining:
                        // Flag AA residues flanking CDR3
                        l3 = gene.getFeature(cdr3LastTriplet);
                        if (l3 == null)
                            warnings.add("Unable to find CDR3 end");
                        else if (l3.size() != 3)
                            warnings.add("Unable to translate sequence: " + gene.getName() + " / " + l3);
                        else if (AminoAcidSequence.translate(l3).codeAt(0) != AminoAcidAlphabet.W && AminoAcidSequence.translate(l3).codeAt(0) != AminoAcidAlphabet.F)
                            warnings.add("CDR3 does not end with W or F, was: " + l3.toString() + " / " + AminoAcidSequence.translate(l3).toString() + " / CDR3End: " + gene.getData().getAnchorPoints().get(ReferencePoint.CDR3End));
                        break;
                }
                for (GeneFeature geneFeature : aaGeneFeatures.get(gene.getGeneType())) {
                    AminoAcidSequence aaSequence = getAminoAcidSequence(gene, geneFeature, gene.getFeature(geneFeature));
                    if (aaSequence != null) {
                        // Flag if contains stop codon
                        if (aaSequence.numberOfStops() > 0)
                            warnings.add(GeneFeature.encode(geneFeature) + " contains a stop codon");
                    }
                }
            }
            if (params.getProblemOnly() && warnings.isEmpty())
                continue;
            System.out.println(gene.getName() + " (" + (gene.isFunctional() ? "F" : "P") + ") " + gene.getChains() + " : " + lib.getTaxonId());
            if (!warnings.isEmpty()) {
                System.out.println();
                System.out.println("WARNINGS: ");
                for (String warning : warnings) {
                    System.out.println(warning);
                }
                System.out.println();
            }
            for (GeneFeature geneFeature : geneFeatures.get(gene.getGeneType())) {
                System.out.println();
                System.out.println(GeneFeature.encode(geneFeature));
                NucleotideSequence nSequence = gene.getFeature(geneFeature);
                AminoAcidSequence aaSequence = getAminoAcidSequence(gene, geneFeature, nSequence);
                System.out.print("N:   ");
                if (nSequence == null)
                    System.out.println("Not Available");
                else
                    System.out.println(nSequence);
                if (GeneFeature.getFrameReference(geneFeature) != null) {
                    System.out.print("AA:  ");
                    if (aaSequence == null)
                        System.out.println("Not Available");
                    else
                        System.out.println(aaSequence);
                }
            }
            System.out.println("=========");
            System.out.println();
        }
    }
}
Also used : Pattern(java.util.regex.Pattern) GeneFeature(io.repseq.core.GeneFeature) ArrayList(java.util.ArrayList) ReferencePoints(io.repseq.core.ReferencePoints) ReferencePoint(io.repseq.core.ReferencePoint) ReferencePoint(io.repseq.core.ReferencePoint) AminoAcidSequence(com.milaboratory.core.sequence.AminoAcidSequence) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence) VDJCLibrary(io.repseq.core.VDJCLibrary) VDJCGene(io.repseq.core.VDJCGene) VDJCLibraryRegistry(io.repseq.core.VDJCLibraryRegistry)

Example 4 with VDJCLibraryRegistry

use of io.repseq.core.VDJCLibraryRegistry in project repseqio by repseqio.

the class FastaAction method go.

@Override
public void go(ActionHelper helper) throws Exception {
    GeneFeature geneFeature = params.getGeneFeature();
    VDJCLibraryRegistry reg = VDJCLibraryRegistry.getDefault();
    if (!"default".equals(params.getInput()))
        reg.registerLibraries(params.getInput());
    else
        reg.loadAllLibraries("default");
    Pattern chainPattern = params.chain == null ? null : Pattern.compile(params.chain);
    Pattern namePattern = params.name == null ? null : Pattern.compile(params.name);
    Long taxonFilter = params.taxonId;
    if (taxonFilter == null && params.species != null)
        taxonFilter = reg.resolveSpecies(params.species);
    try (FastaWriter<NucleotideSequence> writer = CLIUtils.createSingleFastaWriter(params.getOutput())) {
        for (VDJCLibrary lib : reg.getLoadedLibraries()) {
            if (taxonFilter != null && taxonFilter != lib.getTaxonId())
                continue;
            for (VDJCGene gene : lib.getGenes()) {
                if (chainPattern != null) {
                    boolean y = false;
                    for (String s : gene.getChains()) if (y |= chainPattern.matcher(s).matches())
                        break;
                    if (!y)
                        continue;
                }
                if (namePattern != null && !namePattern.matcher(gene.getName()).matches())
                    continue;
                NucleotideSequence featureSequence = gene.getFeature(geneFeature);
                if (featureSequence == null)
                    continue;
                writer.write(gene.getName() + "|" + (gene.isFunctional() ? "F" : "P") + "|taxonId=" + gene.getParentLibrary().getTaxonId(), featureSequence);
            }
        }
    }
}
Also used : GeneFeature(io.repseq.core.GeneFeature) Pattern(java.util.regex.Pattern) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence) VDJCLibrary(io.repseq.core.VDJCLibrary) VDJCGene(io.repseq.core.VDJCGene) VDJCLibraryRegistry(io.repseq.core.VDJCLibraryRegistry)

Example 5 with VDJCLibraryRegistry

use of io.repseq.core.VDJCLibraryRegistry in project repseqio by repseqio.

the class CompileAction method compile.

public static void compile(Path source, Path destination, int surroundings) throws IOException {
    VDJCLibraryRegistry.resetDefaultRegistry();
    VDJCLibraryRegistry reg = VDJCLibraryRegistry.getDefault();
    reg.registerLibraries(source, "lib");
    List<VDJCLibraryData> result = new ArrayList<>();
    for (VDJCLibrary lib : reg.getLoadedLibraries()) {
        VDJCDataUtils.FragmentsBuilder fragmentsBuilder = new VDJCDataUtils.FragmentsBuilder();
        for (KnownSequenceFragmentData fragment : lib.getData().getSequenceFragments()) fragmentsBuilder.addRegion(fragment);
        for (VDJCGene gene : lib.getGenes()) {
            if (!gene.getData().getBaseSequence().isPureOriginalSequence())
                throw new IllegalArgumentException("Don't support mutated sequences yet.");
            URI uri = gene.getData().getBaseSequence().getOrigin();
            Range region = gene.getPartitioning().getContainingRegion();
            region = region.expand(surroundings);
            NucleotideSequence seq;
            try {
                seq = gene.getSequenceProvider().getRegion(region);
            } catch (SequenceProviderIndexOutOfBoundsException e) {
                region = e.getAvailableRange();
                if (region == null)
                    throw new IllegalArgumentException("Wrong anchor points for " + gene.getName() + " ?");
                seq = gene.getSequenceProvider().getRegion(region);
            }
            fragmentsBuilder.addRegion(uri, region, seq);
        }
        result.add(new VDJCLibraryData(lib.getTaxonId(), lib.getData().getSpeciesNames(), lib.getData().getGenes(), lib.getData().getMeta(), fragmentsBuilder.getFragments()));
    }
    VDJCDataUtils.writeToFile(result, destination, true);
    log.info("{} compiled successfully.", source);
}
Also used : VDJCDataUtils(io.repseq.dto.VDJCDataUtils) ArrayList(java.util.ArrayList) Range(com.milaboratory.core.Range) URI(java.net.URI) KnownSequenceFragmentData(io.repseq.dto.KnownSequenceFragmentData) SequenceProviderIndexOutOfBoundsException(com.milaboratory.core.sequence.provider.SequenceProviderIndexOutOfBoundsException) VDJCLibraryData(io.repseq.dto.VDJCLibraryData) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence) VDJCLibrary(io.repseq.core.VDJCLibrary) VDJCGene(io.repseq.core.VDJCGene) VDJCLibraryRegistry(io.repseq.core.VDJCLibraryRegistry)

Aggregations

VDJCLibrary (io.repseq.core.VDJCLibrary)5 VDJCLibraryRegistry (io.repseq.core.VDJCLibraryRegistry)5 NucleotideSequence (com.milaboratory.core.sequence.NucleotideSequence)4 VDJCGene (io.repseq.core.VDJCGene)4 GeneFeature (io.repseq.core.GeneFeature)3 Pattern (java.util.regex.Pattern)3 ArrayList (java.util.ArrayList)2 JCommanderBasedMain (com.milaboratory.cli.JCommanderBasedMain)1 Range (com.milaboratory.core.Range)1 AminoAcidSequence (com.milaboratory.core.sequence.AminoAcidSequence)1 SequenceProviderIndexOutOfBoundsException (com.milaboratory.core.sequence.provider.SequenceProviderIndexOutOfBoundsException)1 VersionInfo (com.milaboratory.util.VersionInfo)1 GeneFeatureWithOriginalName (io.repseq.cli.CLIUtils.GeneFeatureWithOriginalName)1 ReferencePoint (io.repseq.core.ReferencePoint)1 ReferencePoints (io.repseq.core.ReferencePoints)1 KnownSequenceFragmentData (io.repseq.dto.KnownSequenceFragmentData)1 VDJCDataUtils (io.repseq.dto.VDJCDataUtils)1 VDJCLibraryData (io.repseq.dto.VDJCLibraryData)1 URI (java.net.URI)1 Path (java.nio.file.Path)1