Search in sources :

Example 6 with AminoAcidSequence

use of com.milaboratory.core.sequence.AminoAcidSequence in project repseqio by repseqio.

the class DebugAction method getAminoAcidSequence.

private static AminoAcidSequence getAminoAcidSequence(VDJCGene gene, GeneFeature geneFeature, NucleotideSequence nSequence) {
    ReferencePoints partitioning = gene.getPartitioning();
    ReferencePoint frameReference = GeneFeature.getFrameReference(geneFeature);
    AminoAcidSequence aaSequence;
    if (frameReference != null) {
        int relativePosition = partitioning.getRelativePosition(geneFeature, frameReference);
        aaSequence = nSequence == null || relativePosition < 0 ? null : AminoAcidSequence.translate(nSequence, withIncompleteCodon(relativePosition));
    } else
        aaSequence = null;
    return aaSequence;
}
Also used : ReferencePoint(io.repseq.core.ReferencePoint) AminoAcidSequence(com.milaboratory.core.sequence.AminoAcidSequence) ReferencePoints(io.repseq.core.ReferencePoints) ReferencePoint(io.repseq.core.ReferencePoint)

Example 7 with AminoAcidSequence

use of com.milaboratory.core.sequence.AminoAcidSequence in project repseqio by repseqio.

the class FromFastaActionAbstract method go.

@Override
public void go(ActionHelper helper) throws Exception {
    Pattern functionalityRegexp = params.getFunctionalityRegexp();
    GeneType geneType = params.getGeneType();
    Map<String, VDJCGeneData> genes = new HashMap<>();
    Path libraryPath = Paths.get(params.getOutputJSON()).toAbsolutePath();
    // Parsing -P or --gene-feature parameters
    Map<ReferencePoint, Integer> points = new HashMap<>();
    if (params.geneFeature != null) {
        GeneFeature gf = params.getGeneFeature();
        points.put(gf.getFirstPoint(), 0);
        points.put(gf.getLastPoint(), -1);
    } else
        for (Map.Entry<String, String> p : params.points.entrySet()) {
            ReferencePoint anchorPoint = ReferencePoint.getPointByName(p.getKey());
            if (anchorPoint == null)
                throw new IllegalArgumentException("Unknown anchor point: " + p.getKey());
            int position = Integer.decode(p.getValue());
            points.put(anchorPoint, position);
        }
    // Check
    for (Map.Entry<ReferencePoint, Integer> entry : points.entrySet()) if (entry.getKey().getGeneType() != null && entry.getKey().getGeneType() != geneType)
        throw new IllegalArgumentException("Incompatible anchor point and gene type: " + entry.getKey() + " / " + geneType);
    try (FastaReader reader = new FastaReader<>(params.getInput(), null);
        SequenceStorage storage = params.doEmbedSequences() ? new EmbeddedWriter() : params.getOutputFasta() == null ? new ExistingFileWriter(libraryPath, Paths.get(params.getInput()).toAbsolutePath()) : new FastaSequenceStorage(libraryPath, Paths.get(params.getOutputFasta()).toAbsolutePath())) {
        for (FastaReader.RawFastaRecord record : CUtils.it((OutputPortCloseable<FastaReader.RawFastaRecord>) reader.asRawRecordsPort())) {
            StringWithMapping swm = StringWithMapping.removeSymbol(record.sequence, params.getPaddingCharacter());
            NucleotideSequence seq = new NucleotideSequence(swm.getModifiedString());
            if (seq.containsWildcards()) {
                System.out.println("Sequence dropped because contain wildcards: " + record.description);
                continue;
            }
            String[] fields = record.description.split("\\|");
            String geneName = fields[params.nameIndex];
            boolean functionality = true;
            if (params.functionalityIndex != null)
                functionality = functionalityRegexp.matcher(fields[params.functionalityIndex]).matches();
            SortedMap<ReferencePoint, Long> anchorPoints = new TreeMap<>();
            for (Map.Entry<String, String> p : params.patterns.entrySet()) {
                ReferencePoint anchorPoint = ReferencePoint.getPointByName(p.getKey());
                if (anchorPoint == null)
                    throw new IllegalArgumentException("Unknown anchor point: " + p.getKey());
                if (anchorPoint.getGeneType() != null && anchorPoint.getGeneType() != geneType)
                    throw new IllegalArgumentException("Incompatible anchor point and gene type: " + anchorPoint + " / " + geneType);
                Pattern pattern = Pattern.compile(p.getValue());
                int position = -1;
                for (boolean stops : new boolean[] { false, true }) for (int f = 0; f < 3; f++) {
                    if (position != -1)
                        continue;
                    TranslationParameters tp = TranslationParameters.withoutIncompleteCodon(f);
                    AminoAcidSequence aa = AminoAcidSequence.translate(seq, tp);
                    if (!stops && aa.containStops())
                        continue;
                    String str = aa.toString();
                    Matcher matcher = pattern.matcher(str);
                    if (matcher.find()) {
                        int aaPosition = matcher.start(1);
                        position = AminoAcidSequence.convertAAPositionToNt(aaPosition, seq.size(), tp);
                    }
                }
                if (position == -1)
                    continue;
                anchorPoints.put(anchorPoint, (long) position);
            }
            for (Map.Entry<ReferencePoint, Integer> p : points.entrySet()) {
                // AA patterns have priority over positional anchor points
                if (anchorPoints.containsKey(p.getKey()))
                    continue;
                // Converting position using
                int position = swm.convertPosition(p.getValue());
                // Can't be converted (e.g. position of padding symbol) skipping
                if (position == -1)
                    continue;
                anchorPoints.put(p.getKey(), (long) position);
            }
            if (genes.containsKey(geneName)) {
                if (params.getIgnoreDuplicates()) {
                    System.out.println("Ignored: Duplicate records for " + geneName);
                    continue;
                } else
                    throw new IllegalArgumentException("Duplicate records for " + geneName);
            }
            BaseSequence baseSequence = storage.storeSequence(seq, geneName, record.description);
            VDJCGeneData gene = new VDJCGeneData(baseSequence, geneName, geneType, functionality, new Chains(params.chain), new TreeMap<String, SortedSet<String>>(), anchorPoints).addMetaValue(KnownVDJCGeneMetaFields.COMMENTS, record.description);
            genes.put(geneName, gene);
        }
        VDJCLibraryData library = new VDJCLibraryData(params.taxonId, params.speciesNames, new ArrayList<>(genes.values()), new TreeMap<String, SortedSet<String>>(), storage.getBase()).addMetaValue(KnownVDJCLibraryMetaFields.COMMENTS, "Imported from: " + params.getInput());
        VDJCDataUtils.writeToFile(new VDJCLibraryData[] { library }, params.getOutputJSON(), false);
    }
}
Also used : FastaReader(com.milaboratory.core.io.sequence.fasta.FastaReader) Matcher(java.util.regex.Matcher) AminoAcidSequence(com.milaboratory.core.sequence.AminoAcidSequence) Path(java.nio.file.Path) Pattern(java.util.regex.Pattern) StringWithMapping(io.repseq.util.StringWithMapping) TranslationParameters(com.milaboratory.core.sequence.TranslationParameters) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence)

Example 8 with AminoAcidSequence

use of com.milaboratory.core.sequence.AminoAcidSequence in project mixcr by milaboratory.

the class BackwardCompatibilityTests method assertGoodCLNS.

public static void assertGoodCLNS(String resource, int size, int good, double sumCount) throws IOException {
    CloneSet cloneSet = CloneSetIO.read(BackwardCompatibilityTests.class.getResource(resource).getFile());
    Assert.assertEquals(size, cloneSet.size());
    int countGood = 0;
    for (Clone clone : cloneSet.getClones()) {
        sumCount -= clone.getCount();
        AminoAcidSequence aaCDR3 = AminoAcidSequence.translateFromCenter(clone.getFeature(GeneFeature.CDR3).getSequence());
        if (aaCDR3.codeAt(0) == AminoAcidAlphabet.C && aaCDR3.codeAt(aaCDR3.size() - 1) == AminoAcidAlphabet.F) {
            ++countGood;
        }
    }
    Assert.assertEquals(0, sumCount, 0.01);
    Assert.assertEquals(good, countGood);
}
Also used : AminoAcidSequence(com.milaboratory.core.sequence.AminoAcidSequence)

Aggregations

AminoAcidSequence (com.milaboratory.core.sequence.AminoAcidSequence)8 NucleotideSequence (com.milaboratory.core.sequence.NucleotideSequence)5 ReferencePoint (io.repseq.core.ReferencePoint)4 TranslationParameters (com.milaboratory.core.sequence.TranslationParameters)3 Pattern (java.util.regex.Pattern)3 NSequenceWithQuality (com.milaboratory.core.sequence.NSequenceWithQuality)2 GeneFeature (io.repseq.core.GeneFeature)2 ReferencePoints (io.repseq.core.ReferencePoints)2 ArrayList (java.util.ArrayList)2 JsonProcessingException (com.fasterxml.jackson.core.JsonProcessingException)1 Alignment (com.milaboratory.core.alignment.Alignment)1 AlignmentHelper (com.milaboratory.core.alignment.AlignmentHelper)1 AlignmentHit (com.milaboratory.core.alignment.batch.AlignmentHit)1 SimpleBatchAligner (com.milaboratory.core.alignment.batch.SimpleBatchAligner)1 SimpleBatchAlignerParameters (com.milaboratory.core.alignment.batch.SimpleBatchAlignerParameters)1 SequenceRead (com.milaboratory.core.io.sequence.SequenceRead)1 FastaReader (com.milaboratory.core.io.sequence.fasta.FastaReader)1 Mutations (com.milaboratory.core.mutations.Mutations)1 Clone (com.milaboratory.mixcr.basictypes.Clone)1 VDJCAlignments (com.milaboratory.mixcr.basictypes.VDJCAlignments)1