Search in sources :

Example 1 with Mutations

use of com.milaboratory.core.mutations.Mutations in project mixcr by milaboratory.

the class FieldExtractors method getFields.

public static synchronized Field[] getFields() {
    if (descriptors == null) {
        List<Field> descriptorsList = new ArrayList<>();
        // Number of targets
        descriptorsList.add(new PL_O("-targets", "Export number of targets", "Number of targets", "numberOfTargets") {

            @Override
            protected String extract(VDJCObject object) {
                return Integer.toString(object.numberOfTargets());
            }
        });
        // Best hits
        for (final GeneType type : GeneType.values()) {
            char l = type.getLetter();
            descriptorsList.add(new PL_O("-" + Character.toLowerCase(l) + "Hit", "Export best " + l + " hit", "Best " + l + " hit", "best" + l + "Hit") {

                @Override
                protected String extract(VDJCObject object) {
                    VDJCHit bestHit = object.getBestHit(type);
                    if (bestHit == null)
                        return NULL;
                    return bestHit.getGene().getName();
                }
            });
        }
        // Best gene
        for (final GeneType type : GeneType.values()) {
            char l = type.getLetter();
            descriptorsList.add(new PL_O("-" + Character.toLowerCase(l) + "Gene", "Export best " + l + " hit gene name (e.g. TRBV12-3 for TRBV12-3*00)", "Best " + l + " gene", "best" + l + "Gene") {

                @Override
                protected String extract(VDJCObject object) {
                    VDJCHit bestHit = object.getBestHit(type);
                    if (bestHit == null)
                        return NULL;
                    return bestHit.getGene().getGeneName();
                }
            });
        }
        // Best family
        for (final GeneType type : GeneType.values()) {
            char l = type.getLetter();
            descriptorsList.add(new PL_O("-" + Character.toLowerCase(l) + "Family", "Export best " + l + " hit family name (e.g. TRBV12 for TRBV12-3*00)", "Best " + l + " family", "best" + l + "Family") {

                @Override
                protected String extract(VDJCObject object) {
                    VDJCHit bestHit = object.getBestHit(type);
                    if (bestHit == null)
                        return NULL;
                    return bestHit.getGene().getFamilyName();
                }
            });
        }
        // Best hit score
        for (final GeneType type : GeneType.values()) {
            char l = type.getLetter();
            descriptorsList.add(new PL_O("-" + Character.toLowerCase(l) + "HitScore", "Export score for best " + l + " hit", "Best " + l + " hit score", "best" + l + "HitScore") {

                @Override
                protected String extract(VDJCObject object) {
                    VDJCHit bestHit = object.getBestHit(type);
                    if (bestHit == null)
                        return NULL;
                    return String.valueOf(bestHit.getScore());
                }
            });
        }
        // All hits
        for (final GeneType type : GeneType.values()) {
            char l = type.getLetter();
            descriptorsList.add(new PL_O("-" + Character.toLowerCase(l) + "HitsWithScore", "Export all " + l + " hits with score", "All " + l + " hits", "all" + l + "HitsWithScore") {

                @Override
                protected String extract(VDJCObject object) {
                    VDJCHit[] hits = object.getHits(type);
                    if (hits.length == 0)
                        return "";
                    StringBuilder sb = new StringBuilder();
                    for (int i = 0; ; i++) {
                        sb.append(hits[i].getGene().getName()).append("(").append(SCORE_FORMAT.format(hits[i].getScore())).append(")");
                        if (i == hits.length - 1)
                            break;
                        sb.append(",");
                    }
                    return sb.toString();
                }
            });
        }
        // All hits without score
        for (final GeneType type : GeneType.values()) {
            char l = type.getLetter();
            descriptorsList.add(new PL_O("-" + Character.toLowerCase(l) + "Hits", "Export all " + l + " hits", "All " + l + " Hits", "all" + l + "Hits") {

                @Override
                protected String extract(VDJCObject object) {
                    VDJCHit[] hits = object.getHits(type);
                    if (hits.length == 0)
                        return "";
                    StringBuilder sb = new StringBuilder();
                    for (int i = 0; ; i++) {
                        sb.append(hits[i].getGene().getName());
                        if (i == hits.length - 1)
                            break;
                        sb.append(",");
                    }
                    return sb.toString();
                }
            });
        }
        // All gene names
        for (final GeneType type : GeneType.values()) {
            char l = type.getLetter();
            descriptorsList.add(new StringExtractor("-" + Character.toLowerCase(l) + "Genes", "Export all " + l + " gene names (e.g. TRBV12-3 for TRBV12-3*00)", "All " + l + " genes", "all" + l + "Genes", type) {

                @Override
                String extractStringForHit(VDJCHit hit) {
                    return hit.getGene().getGeneName();
                }
            });
        }
        // All families
        for (final GeneType type : GeneType.values()) {
            char l = type.getLetter();
            descriptorsList.add(new StringExtractor("-" + Character.toLowerCase(l) + "Families", "Export all " + l + " gene family anmes (e.g. TRBV12 for TRBV12-3*00)", "All " + l + " families", "all" + l + "Families", type) {

                @Override
                String extractStringForHit(VDJCHit hit) {
                    return hit.getGene().getFamilyName();
                }
            });
        }
        // Best alignment
        for (final GeneType type : GeneType.values()) {
            char l = type.getLetter();
            descriptorsList.add(new PL_O("-" + Character.toLowerCase(l) + "Alignment", "Export best " + l + " alignment", "Best " + l + " alignment", "best" + l + "Alignment") {

                @Override
                protected String extract(VDJCObject object) {
                    VDJCHit bestHit = object.getBestHit(type);
                    if (bestHit == null)
                        return NULL;
                    StringBuilder sb = new StringBuilder();
                    for (int i = 0; ; i++) {
                        Alignment<NucleotideSequence> alignment = bestHit.getAlignment(i);
                        if (alignment == null)
                            sb.append(NULL);
                        else
                            sb.append(alignment.toCompactString());
                        if (i == object.numberOfTargets() - 1)
                            break;
                        sb.append(",");
                    }
                    return sb.toString();
                }
            });
        }
        // All alignments
        for (final GeneType type : GeneType.values()) {
            char l = type.getLetter();
            descriptorsList.add(new PL_O("-" + Character.toLowerCase(l) + "Alignments", "Export all " + l + " alignments", "All " + l + " alignments", "all" + l + "Alignments") {

                @Override
                protected String extract(VDJCObject object) {
                    VDJCHit[] hits = object.getHits(type);
                    if (hits.length == 0)
                        return "";
                    StringBuilder sb = new StringBuilder();
                    for (int j = 0; ; ++j) {
                        for (int i = 0; ; i++) {
                            Alignment<NucleotideSequence> alignment = hits[j].getAlignment(i);
                            if (alignment == null)
                                sb.append(NULL);
                            else
                                sb.append(alignment.toCompactString());
                            if (i == object.numberOfTargets() - 1)
                                break;
                            sb.append(',');
                        }
                        if (j == hits.length - 1)
                            break;
                        sb.append(';');
                    }
                    return sb.toString();
                }
            });
        }
        descriptorsList.add(new FeatureExtractors.NSeqExtractor("-nFeature", "Export nucleotide sequence of specified gene feature", "N. Seq. ", "nSeq") {

            @Override
            public String convert(NSequenceWithQuality seq) {
                return seq.getSequence().toString();
            }
        });
        descriptorsList.add(new FeatureExtractors.NSeqExtractor("-qFeature", "Export quality string of specified gene feature", "Qual. ", "qual") {

            @Override
            public String convert(NSequenceWithQuality seq) {
                return seq.getQuality().toString();
            }
        });
        descriptorsList.add(new FeatureExtractors.WithHeader("-aaFeature", "Export amino acid sequence of specified gene feature", 1, new String[] { "AA. Seq. " }, new String[] { "aaSeq" }) {

            @Override
            protected String extractValue(VDJCObject object, GeneFeature[] parameters) {
                GeneFeature geneFeature = parameters[parameters.length - 1];
                NSequenceWithQuality feature = object.getFeature(geneFeature);
                if (feature == null)
                    return NULL;
                int targetId = object.getTargetContainingFeature(geneFeature);
                TranslationParameters tr = targetId == -1 ? TranslationParameters.FromLeftWithIncompleteCodon : object.getPartitionedTarget(targetId).getPartitioning().getTranslationParameters(geneFeature);
                if (tr == null)
                    return NULL;
                return AminoAcidSequence.translate(feature.getSequence(), tr).toString();
            }
        });
        // descriptorsList.add(new FeatureExtractorDescriptor("-aaFeatureFromLeft", "Export amino acid sequence of " +
        // "specified gene feature starting from the leftmost nucleotide (differs from -aaFeature only for " +
        // "sequences which length are not multiple of 3)", "AA. Seq.", "aaSeq") {
        // @Override
        // public String convert(NSequenceWithQuality seq) {
        // return AminoAcidSequence.translate(seq.getSequence(), FromLeftWithoutIncompleteCodon).toString();
        // }
        // });
        // 
        // descriptorsList.add(new FeatureExtractorDescriptor("-aaFeatureFromRight", "Export amino acid sequence of " +
        // "specified gene feature starting from the rightmost nucleotide (differs from -aaFeature only for " +
        // "sequences which length are not multiple of 3)", "AA. Seq.", "aaSeq") {
        // @Override
        // public String convert(NSequenceWithQuality seq) {
        // return AminoAcidSequence.translate(seq.getSequence(), FromRightWithoutIncompleteCodon).toString();
        // }
        // });
        descriptorsList.add(new FeatureExtractors.NSeqExtractor("-minFeatureQuality", "Export minimal quality of specified gene feature", "Min. qual. ", "minQual") {

            @Override
            public String convert(NSequenceWithQuality seq) {
                return "" + seq.getQuality().minValue();
            }
        });
        descriptorsList.add(new FeatureExtractors.NSeqExtractor("-avrgFeatureQuality", "Export average quality of specified gene feature", "Mean. qual. ", "meanQual") {

            @Override
            public String convert(NSequenceWithQuality seq) {
                return "" + seq.getQuality().meanValue();
            }
        });
        descriptorsList.add(new FeatureExtractors.NSeqExtractor("-lengthOf", "Exports length of specified gene feature.", "Length of ", "lengthOf") {

            @Override
            public String convert(NSequenceWithQuality seq) {
                return "" + seq.size();
            }
        });
        descriptorsList.add(new FeatureExtractors.MutationsExtractor("-nMutations", "Extract nucleotide mutations for specific gene feature; relative to germline sequence.", 1, new String[] { "N. Mutations in " }, new String[] { "nMutations" }) {

            @Override
            String convert(Mutations<NucleotideSequence> mutations, NucleotideSequence seq1, NucleotideSequence seq2, TranslationParameters tr) {
                return mutations.encode(",");
            }
        });
        descriptorsList.add(new FeatureExtractors.MutationsExtractor("-nMutationsRelative", "Extract nucleotide mutations for specific gene feature relative to another feature.", 2, new String[] { "N. Mutations in ", " relative to " }, new String[] { "nMutationsIn", "Relative" }) {

            @Override
            String convert(Mutations<NucleotideSequence> mutations, NucleotideSequence seq1, NucleotideSequence seq2, TranslationParameters tr) {
                return mutations.encode(",");
            }
        });
        final class AAMutations extends FeatureExtractors.MutationsExtractor {

            AAMutations(String command, String description, int nArgs, String[] hPrefix, String[] sPrefix) {
                super(command, description, nArgs, hPrefix, sPrefix);
            }

            @Override
            String convert(Mutations<NucleotideSequence> mutations, NucleotideSequence seq1, NucleotideSequence seq2, TranslationParameters tr) {
                if (tr == null)
                    return "-";
                Mutations<AminoAcidSequence> aaMuts = MutationsUtil.nt2aa(seq1, mutations, tr);
                if (aaMuts == null)
                    return "-";
                return aaMuts.encode(",");
            }
        }
        descriptorsList.add(new AAMutations("-aaMutations", "Extract amino acid mutations for specific gene feature", 1, new String[] { "AA. Mutations in " }, new String[] { "aaMutations" }));
        descriptorsList.add(new AAMutations("-aaMutationsRelative", "Extract amino acid mutations for specific gene feature relative to another feature.", 2, new String[] { "AA. Mutations in ", " relative to " }, new String[] { "aaMutationsIn", "Relative" }));
        final class MutationsDetailed extends FeatureExtractors.MutationsExtractor {

            MutationsDetailed(String command, String description, int nArgs, String[] hPrefix, String[] sPrefix) {
                super(command, description, nArgs, hPrefix, sPrefix);
            }

            @Override
            String convert(Mutations<NucleotideSequence> mutations, NucleotideSequence seq1, NucleotideSequence seq2, TranslationParameters tr) {
                if (tr == null)
                    return "-";
                MutationsUtil.MutationNt2AADescriptor[] descriptors = MutationsUtil.nt2aaDetailed(seq1, mutations, tr, 10);
                if (descriptors == null)
                    return "-";
                StringBuilder sb = new StringBuilder();
                for (int i = 0; i < descriptors.length; i++) {
                    sb.append(descriptors[i]);
                    if (i == descriptors.length - 1)
                        break;
                    sb.append(",");
                }
                return sb.toString();
            }
        }
        String detailedMutationsFormat = "Format <nt_mutation>:<aa_mutation_individual>:<aa_mutation_cumulative>, where <aa_mutation_individual> is an expected amino acid " + "mutation given no other mutations have occurred, and <aa_mutation_cumulative> amino acid mutation is the observed amino acid " + "mutation combining effect from all other. WARNING: format may change in following versions.";
        descriptorsList.add(new MutationsDetailed("-mutationsDetailed", "Detailed list of nucleotide and corresponding amino acid mutations. " + detailedMutationsFormat, 1, new String[] { "Detailed mutations in " }, new String[] { "mutationsDetailedIn" }));
        descriptorsList.add(new MutationsDetailed("-mutationsDetailedRelative", "Detailed list of nucleotide and corresponding amino acid mutations written, positions relative to specified gene feature. " + detailedMutationsFormat, 2, new String[] { "Detailed mutations in ", " relative to " }, new String[] { "mutationsDetailedIn", "Relative" }));
        descriptorsList.add(new ExtractReferencePointPosition());
        descriptorsList.add(new ExtractDefaultReferencePointsPositions());
        descriptorsList.add(new PL_A("-readId", "Export id of read corresponding to alignment", "Read id", "readId") {

            @Override
            protected String extract(VDJCAlignments object) {
                return "" + object.getMinReadId();
            }

            @Override
            public FieldExtractor<VDJCAlignments> create(OutputMode outputMode, String[] args) {
                System.out.println("WARNING: -readId is deprecated. Use -readIds");
                return super.create(outputMode, args);
            }
        });
        descriptorsList.add(new PL_A("-readIds", "Export id of read corresponding to alignment", "Read id", "readId") {

            @Override
            protected String extract(VDJCAlignments object) {
                long[] readIds = object.getReadIds();
                StringBuilder sb = new StringBuilder();
                for (int i = 0; ; i++) {
                    sb.append(readIds[i]);
                    if (i == readIds.length - 1)
                        return sb.toString();
                    sb.append(",");
                }
            }
        });
        descriptorsList.add(new ExtractSequence(VDJCAlignments.class, "-sequence", "Export aligned sequence (initial read), or 2 sequences in case of paired-end reads", "Read(s) sequence", "readSequence"));
        descriptorsList.add(new ExtractSequenceQuality(VDJCAlignments.class, "-quality", "Export initial read quality, or 2 qualities in case of paired-end reads", "Read(s) sequence qualities", "readQuality"));
        descriptorsList.add(new PL_C("-cloneId", "Unique clone identifier", "Clone ID", "cloneId") {

            @Override
            protected String extract(Clone object) {
                return "" + object.getId();
            }
        });
        descriptorsList.add(new PL_C("-count", "Export clone count", "Clone count", "cloneCount") {

            @Override
            protected String extract(Clone object) {
                return "" + object.getCount();
            }
        });
        descriptorsList.add(new PL_C("-fraction", "Export clone fraction", "Clone fraction", "cloneFraction") {

            @Override
            protected String extract(Clone object) {
                return "" + object.getFraction();
            }
        });
        descriptorsList.add(new ExtractSequence(Clone.class, "-sequence", "Export aligned sequence (initial read), or 2 sequences in case of paired-end reads", "Clonal sequence(s)", "clonalSequence"));
        descriptorsList.add(new ExtractSequenceQuality(Clone.class, "-quality", "Export initial read quality, or 2 qualities in case of paired-end reads", "Clonal sequence quality(s)", "clonalSequenceQuality"));
        descriptorsList.add(new PL_A("-descrR1", "Export description line from initial .fasta or .fastq file " + "of the first read (only available if --save-description was used in align command)", "Description R1", "descrR1") {

            @Override
            protected String extract(VDJCAlignments object) {
                List<SequenceRead> reads = object.getOriginalReads();
                if (reads == null)
                    throw new IllegalArgumentException("Error for option \'-descrR1\':\n" + "No description available for read: either re-run align action with -OsaveOriginalReads=true option " + "or don't use \'-descrR1\' in exportAlignments");
                return reads.get(0).getRead(0).getDescription();
            }

            @Override
            public FieldExtractor<VDJCAlignments> create(OutputMode outputMode, String[] args) {
                System.out.println("WARNING: -descrR1 is deprecated. Use -descrsR1");
                return super.create(outputMode, args);
            }
        });
        descriptorsList.add(new PL_A("-descrR2", "Export description line from initial .fasta or .fastq file " + "of the second read (only available if --save-description was used in align command)", "Description R2", "descrR2") {

            @Override
            protected String extract(VDJCAlignments object) {
                List<SequenceRead> reads = object.getOriginalReads();
                if (reads == null)
                    throw new IllegalArgumentException("Error for option \'-descrR1\':\n" + "No description available for read: either re-run align action with -OsaveOriginalReads=true option " + "or don't use \'-descrR1\' in exportAlignments");
                SequenceRead read = reads.get(0);
                if (read.numberOfReads() < 2)
                    throw new IllegalArgumentException("Error for option \'-descrR2\':\n" + "No description available for second read: your input data was single-end");
                return read.getRead(1).getDescription();
            }

            @Override
            public FieldExtractor<VDJCAlignments> create(OutputMode outputMode, String[] args) {
                System.out.println("WARNING: -descrR2 is deprecated. Use -descrsR2");
                return super.create(outputMode, args);
            }
        });
        descriptorsList.add(new PL_A("-descrsR1", "Export description lines from initial .fasta or .fastq file " + "of the first reads (only available if -OsaveOriginalReads=true was used in align command)", "Descriptions R1", "descrsR1") {

            @Override
            protected String extract(VDJCAlignments object) {
                List<SequenceRead> reads = object.getOriginalReads();
                if (reads == null)
                    throw new IllegalArgumentException("Error for option \'-descrR1\':\n" + "No description available for read: either re-run align action with -OsaveOriginalReads option " + "or don't use \'-descrR1\' in exportAlignments");
                StringBuilder sb = new StringBuilder();
                for (int i = 0; ; i++) {
                    sb.append(reads.get(i).getRead(0).getDescription());
                    if (i == reads.size() - 1)
                        return sb.toString();
                    sb.append(",");
                }
            }
        });
        descriptorsList.add(new PL_A("-descrsR2", "Export description lines from initial .fasta or .fastq file " + "of the second reads (only available if -OsaveOriginalReads=true was used in align command)", "Descriptions R2", "descrsR2") {

            @Override
            protected String extract(VDJCAlignments object) {
                List<SequenceRead> reads = object.getOriginalReads();
                if (reads == null)
                    throw new IllegalArgumentException("Error for option \'-descrR1\':\n" + "No description available for read: either re-run align action with -OsaveOriginalReads option " + "or don't use \'-descrR1\' in exportAlignments");
                StringBuilder sb = new StringBuilder();
                for (int i = 0; ; i++) {
                    SequenceRead read = reads.get(i);
                    if (read.numberOfReads() < 2)
                        throw new IllegalArgumentException("Error for option \'-descrsR2\':\n" + "No description available for second read: your input data was single-end");
                    sb.append(read.getRead(1).getDescription());
                    if (i == reads.size() - 1)
                        return sb.toString();
                    sb.append(",");
                }
            }
        });
        descriptorsList.add(new PL_A("-readHistory", "Export read history", "Read history", "readHistory") {

            @Override
            protected String extract(VDJCAlignments object) {
                try {
                    return GlobalObjectMappers.toOneLine(object.getHistory());
                } catch (JsonProcessingException ex) {
                    throw new RuntimeException(ex);
                }
            }
        });
        for (final GeneType type : GeneType.values()) {
            String c = Character.toLowerCase(type.getLetter()) + "IdentityPercents";
            descriptorsList.add(new PL_O("-" + c, type.getLetter() + " alignment identity percents", type.getLetter() + " alignment identity percents", c) {

                @Override
                protected String extract(VDJCObject object) {
                    VDJCHit[] hits = object.getHits(type);
                    if (hits == null)
                        return NULL;
                    StringBuilder sb = new StringBuilder();
                    sb.append("");
                    for (int i = 0; ; i++) {
                        sb.append(hits[i].getIdentity());
                        if (i == hits.length - 1)
                            return sb.toString();
                        sb.append(",");
                    }
                }
            });
        }
        for (final GeneType type : GeneType.values()) {
            String c = Character.toLowerCase(type.getLetter()) + "BestIdentityPercent";
            descriptorsList.add(new PL_O("-" + c, type.getLetter() + "best alignment identity percent", type.getLetter() + "best alignment identity percent", c) {

                @Override
                protected String extract(VDJCObject object) {
                    VDJCHit hit = object.getBestHit(type);
                    if (hit == null)
                        return NULL;
                    return Float.toString(hit.getIdentity());
                }
            });
        }
        descriptorsList.add(new PL_O("-chains", "Chains", "Chains", "Chains") {

            @Override
            protected String extract(VDJCObject object) {
                return object.commonChains().toString();
            }
        });
        descriptorsList.add(new PL_O("-topChains", "Top chains", "Top chains", "topChains") {

            @Override
            protected String extract(VDJCObject object) {
                return object.commonTopChains().toString();
            }
        });
        descriptors = descriptorsList.toArray(new Field[descriptorsList.size()]);
    }
    return descriptors;
}
Also used : GeneFeature(io.repseq.core.GeneFeature) ArrayList(java.util.ArrayList) VDJCObject(com.milaboratory.mixcr.basictypes.VDJCObject) Alignment(com.milaboratory.core.alignment.Alignment) AminoAcidSequence(com.milaboratory.core.sequence.AminoAcidSequence) NSequenceWithQuality(com.milaboratory.core.sequence.NSequenceWithQuality) ArrayList(java.util.ArrayList) List(java.util.List) VDJCAlignments(com.milaboratory.mixcr.basictypes.VDJCAlignments) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) Clone(com.milaboratory.mixcr.basictypes.Clone) Mutations(com.milaboratory.core.mutations.Mutations) ReferencePoint(io.repseq.core.ReferencePoint) TranslationParameters(com.milaboratory.core.sequence.TranslationParameters) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence) SequenceRead(com.milaboratory.core.io.sequence.SequenceRead) GeneType(io.repseq.core.GeneType) VDJCHit(com.milaboratory.mixcr.basictypes.VDJCHit)

Example 2 with Mutations

use of com.milaboratory.core.mutations.Mutations in project mixcr by milaboratory.

the class ClonalSequenceTest method testCompatiblePair.

private static void testCompatiblePair(ClonalSequence c1, ClonalSequence c2, RandomGenerator random) {
    int delta = c1.getConcatenated().size() - c2.getConcatenated().size();
    int c1size = c1.getConcatenated().size();
    for (int t = 0; t < 100; ++t) {
        int k = 1 + random.nextInt(2);
        int deletions, insertions;
        if (delta > 0) {
            // c1 > c2
            insertions = k;
            deletions = delta + k;
        } else {
            // c1 < c2
            deletions = k;
            insertions = -delta + k;
        }
        int[] muts = new int[Math.abs(deletions + insertions)];
        int c = 0;
        BitArray usedDels = new BitArray(c1size);
        for (int i = 0; i < deletions; ++i) {
            int p = random.nextInt(c1size);
            usedDels.set(p);
            muts[c++] = Mutation.createDeletion(p, 0);
        }
        for (int i = 0; i < insertions; ++i) {
            int p;
            do {
                p = random.nextInt(c1size);
            } while (usedDels.get(p));
            muts[c++] = Mutation.createInsertion(p, 0);
        }
        Arrays.sort(muts);
        Mutations<NucleotideSequence> mutations = new Mutations<>(NucleotideSequence.ALPHABET, muts);
        assert mutations.getLengthDelta() + c1.getConcatenated().size() == c2.getConcatenated().size();
        Assert.assertTrue(c1.isCompatible(c2, mutations));
    }
}
Also used : Mutations(com.milaboratory.core.mutations.Mutations) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence) BitArray(com.milaboratory.util.BitArray)

Example 3 with Mutations

use of com.milaboratory.core.mutations.Mutations in project mixcr by milaboratory.

the class Merger method merge.

public static NSequenceWithQuality merge(Range range, Alignment<NucleotideSequence>[] alignments, NSequenceWithQuality[] targets) {
    // Checking arguments
    if (alignments.length != targets.length)
        throw new IllegalArgumentException();
    // Extracting reference sequence
    NucleotideSequence sequence = alignments[0].getSequence1();
    for (int i = 1; i < alignments.length; i++) if (sequence != alignments[i].getSequence1())
        throw new IllegalArgumentException("Different reference sequences.");
    int position = range.getFrom();
    int localPosition = 0;
    int[] mPointers = new int[alignments.length];
    for (int i = 0; i < alignments.length; i++) mPointers[i] = firstIndexAfter(alignments[i].getAbsoluteMutations(), position);
    // Aggregator of quality information
    SequenceQualityBuilder qualityBuilder = new SequenceQualityBuilder().ensureCapacity(range.length());
    // Aggregator of mutations
    MutationsBuilder<NucleotideSequence> mutationsBuilder = new MutationsBuilder<>(NucleotideSequence.ALPHABET);
    do {
        int winnerIndex = -1;
        byte bestQuality = -1;
        int winnerMPointer = -1, winnerMLength = -1;
        for (int i = 0; i < alignments.length; i++) {
            Alignment<NucleotideSequence> al = alignments[i];
            if (!al.getSequence1Range().contains(position))
                continue;
            // Current mutation index
            int mPointer = mPointers[i];
            Mutations<NucleotideSequence> mutations = al.getAbsoluteMutations();
            // Number of mutations with the same position
            int length;
            if (mPointer < mutations.size() && mutations.getPositionByIndex(mPointer) == position) {
                length = 1;
                while (mPointer + length < mutations.size() && mutations.getPositionByIndex(mPointer + length) == position) ++length;
            } else
                length = 0;
            byte pointQuality = getQuality(position, mPointer, length, al, targets[i].getQuality());
            if (bestQuality < pointQuality) {
                winnerIndex = i;
                winnerMPointer = mPointer;
                winnerMLength = length;
                bestQuality = pointQuality;
            }
        }
        Mutations<NucleotideSequence> winnerMutations = alignments[winnerIndex].getAbsoluteMutations();
        byte sumQuality = 0;
        // Second pass to calculate score
        OUT: for (int i = 0; i < alignments.length; i++) {
            Alignment<NucleotideSequence> al = alignments[i];
            if (!al.getSequence1Range().contains(position))
                continue;
            // Current mutation index
            int mPointer = mPointers[i];
            Mutations<NucleotideSequence> mutations = al.getAbsoluteMutations();
            // Number of mutations with the same position
            int length;
            if (mPointer < mutations.size() && mutations.getPositionByIndex(mPointer) == position) {
                length = 1;
                while (mPointer + length < mutations.size() && mutations.getPositionByIndex(mPointer + length) == position) ++length;
            } else
                length = 0;
            // Advancing pointer
            mPointers[i] += length;
            if (length != winnerMLength)
                continue;
            for (int k = 0; k < length; ++k) if (mutations.getMutation(mPointer + k) != winnerMutations.getMutation(winnerMPointer + k))
                continue OUT;
            byte pointQuality = getQuality(position, mPointer, length, al, targets[i].getQuality());
            sumQuality += pointQuality;
        }
        // todo ??
        if (winnerIndex == -1)
            return null;
        sumQuality = (byte) Math.min(sumQuality, MAX_QUALITY);
        qualityBuilder.append(sumQuality);
        if (winnerMLength != 0)
            mutationsBuilder.append(winnerMutations, winnerMPointer, winnerMLength);
        ++localPosition;
    } while (++position < range.getTo());
    Mutations<NucleotideSequence> mutations = mutationsBuilder.createAndDestroy();
    NSequenceWithQuality toMutate = new NSequenceWithQuality(sequence.getRange(range), qualityBuilder.createAndDestroy());
    return MutationsUtil.mutate(toMutate, mutations.move(-range.getFrom()));
}
Also used : Mutations(com.milaboratory.core.mutations.Mutations) MutationsBuilder(com.milaboratory.core.mutations.MutationsBuilder) Alignment(com.milaboratory.core.alignment.Alignment) SequenceQualityBuilder(com.milaboratory.core.sequence.SequenceQualityBuilder) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence) NSequenceWithQuality(com.milaboratory.core.sequence.NSequenceWithQuality)

Aggregations

Mutations (com.milaboratory.core.mutations.Mutations)3 NucleotideSequence (com.milaboratory.core.sequence.NucleotideSequence)3 Alignment (com.milaboratory.core.alignment.Alignment)2 NSequenceWithQuality (com.milaboratory.core.sequence.NSequenceWithQuality)2 JsonProcessingException (com.fasterxml.jackson.core.JsonProcessingException)1 SequenceRead (com.milaboratory.core.io.sequence.SequenceRead)1 MutationsBuilder (com.milaboratory.core.mutations.MutationsBuilder)1 AminoAcidSequence (com.milaboratory.core.sequence.AminoAcidSequence)1 SequenceQualityBuilder (com.milaboratory.core.sequence.SequenceQualityBuilder)1 TranslationParameters (com.milaboratory.core.sequence.TranslationParameters)1 Clone (com.milaboratory.mixcr.basictypes.Clone)1 VDJCAlignments (com.milaboratory.mixcr.basictypes.VDJCAlignments)1 VDJCHit (com.milaboratory.mixcr.basictypes.VDJCHit)1 VDJCObject (com.milaboratory.mixcr.basictypes.VDJCObject)1 BitArray (com.milaboratory.util.BitArray)1 GeneFeature (io.repseq.core.GeneFeature)1 GeneType (io.repseq.core.GeneType)1 ReferencePoint (io.repseq.core.ReferencePoint)1 ArrayList (java.util.ArrayList)1 List (java.util.List)1