Search in sources :

Example 71 with NucleotideSequence

use of com.milaboratory.core.sequence.NucleotideSequence in project mixcr by milaboratory.

the class IOUtil method registerGeneReferences.

public static void registerGeneReferences(PrimitivO output, List<VDJCGene> genes, HasFeatureToAlign featuresToAlign) {
    // Putting genes references and feature sequences to be serialized/deserialized as references
    for (VDJCGene gene : genes) {
        output.putKnownReference(gene);
        // Also put sequences of certain gene features of genes as known references if required
        if (featuresToAlign != null) {
            GeneFeature featureToAlign = featuresToAlign.getFeatureToAlign(gene.getGeneType());
            if (featureToAlign == null)
                continue;
            NucleotideSequence featureSequence = gene.getFeature(featureToAlign);
            if (featureSequence == null)
                continue;
            output.putKnownReference(gene.getFeature(featuresToAlign.getFeatureToAlign(gene.getGeneType())));
        }
    }
}
Also used : GeneFeature(io.repseq.core.GeneFeature) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence) VDJCGene(io.repseq.core.VDJCGene)

Example 72 with NucleotideSequence

use of com.milaboratory.core.sequence.NucleotideSequence in project mixcr by milaboratory.

the class Merger method merge.

public static NSequenceWithQuality merge(Range range, Alignment<NucleotideSequence>[] alignments, NSequenceWithQuality[] targets) {
    // Checking arguments
    if (alignments.length != targets.length)
        throw new IllegalArgumentException();
    // Extracting reference sequence
    NucleotideSequence sequence = alignments[0].getSequence1();
    for (int i = 1; i < alignments.length; i++) if (sequence != alignments[i].getSequence1())
        throw new IllegalArgumentException("Different reference sequences.");
    int position = range.getFrom();
    int localPosition = 0;
    int[] mPointers = new int[alignments.length];
    for (int i = 0; i < alignments.length; i++) mPointers[i] = firstIndexAfter(alignments[i].getAbsoluteMutations(), position);
    // Aggregator of quality information
    SequenceQualityBuilder qualityBuilder = new SequenceQualityBuilder().ensureCapacity(range.length());
    // Aggregator of mutations
    MutationsBuilder<NucleotideSequence> mutationsBuilder = new MutationsBuilder<>(NucleotideSequence.ALPHABET);
    do {
        int winnerIndex = -1;
        byte bestQuality = -1;
        int winnerMPointer = -1, winnerMLength = -1;
        for (int i = 0; i < alignments.length; i++) {
            Alignment<NucleotideSequence> al = alignments[i];
            if (!al.getSequence1Range().contains(position))
                continue;
            // Current mutation index
            int mPointer = mPointers[i];
            Mutations<NucleotideSequence> mutations = al.getAbsoluteMutations();
            // Number of mutations with the same position
            int length;
            if (mPointer < mutations.size() && mutations.getPositionByIndex(mPointer) == position) {
                length = 1;
                while (mPointer + length < mutations.size() && mutations.getPositionByIndex(mPointer + length) == position) ++length;
            } else
                length = 0;
            byte pointQuality = getQuality(position, mPointer, length, al, targets[i].getQuality());
            if (bestQuality < pointQuality) {
                winnerIndex = i;
                winnerMPointer = mPointer;
                winnerMLength = length;
                bestQuality = pointQuality;
            }
        }
        Mutations<NucleotideSequence> winnerMutations = alignments[winnerIndex].getAbsoluteMutations();
        byte sumQuality = 0;
        // Second pass to calculate score
        OUT: for (int i = 0; i < alignments.length; i++) {
            Alignment<NucleotideSequence> al = alignments[i];
            if (!al.getSequence1Range().contains(position))
                continue;
            // Current mutation index
            int mPointer = mPointers[i];
            Mutations<NucleotideSequence> mutations = al.getAbsoluteMutations();
            // Number of mutations with the same position
            int length;
            if (mPointer < mutations.size() && mutations.getPositionByIndex(mPointer) == position) {
                length = 1;
                while (mPointer + length < mutations.size() && mutations.getPositionByIndex(mPointer + length) == position) ++length;
            } else
                length = 0;
            // Advancing pointer
            mPointers[i] += length;
            if (length != winnerMLength)
                continue;
            for (int k = 0; k < length; ++k) if (mutations.getMutation(mPointer + k) != winnerMutations.getMutation(winnerMPointer + k))
                continue OUT;
            byte pointQuality = getQuality(position, mPointer, length, al, targets[i].getQuality());
            sumQuality += pointQuality;
        }
        // todo ??
        if (winnerIndex == -1)
            return null;
        sumQuality = (byte) Math.min(sumQuality, MAX_QUALITY);
        qualityBuilder.append(sumQuality);
        if (winnerMLength != 0)
            mutationsBuilder.append(winnerMutations, winnerMPointer, winnerMLength);
        ++localPosition;
    } while (++position < range.getTo());
    Mutations<NucleotideSequence> mutations = mutationsBuilder.createAndDestroy();
    NSequenceWithQuality toMutate = new NSequenceWithQuality(sequence.getRange(range), qualityBuilder.createAndDestroy());
    return MutationsUtil.mutate(toMutate, mutations.move(-range.getFrom()));
}
Also used : Mutations(com.milaboratory.core.mutations.Mutations) MutationsBuilder(com.milaboratory.core.mutations.MutationsBuilder) Alignment(com.milaboratory.core.alignment.Alignment) SequenceQualityBuilder(com.milaboratory.core.sequence.SequenceQualityBuilder) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence) NSequenceWithQuality(com.milaboratory.core.sequence.NSequenceWithQuality)

Example 73 with NucleotideSequence

use of com.milaboratory.core.sequence.NucleotideSequence in project mixcr by milaboratory.

the class TargetPartitioning method getPosition.

@Override
public int getPosition(ReferencePoint referencePoint) {
    VDJCHit hit = hits.get(referencePoint.getGeneType());
    if (hit == null)
        return -1;
    int position;
    if (referencePoint.isAttachedToAlignmentBound()) {
        int positionInSeq1;
        Alignment<NucleotideSequence> alignment = hit.getAlignment(targetIndex);
        if (alignment == null)
            return -1;
        int positionOfActivationPoint = -2;
        if (referencePoint.getActivationPoint() != null)
            positionOfActivationPoint = hit.getGene().getPartitioning().getRelativePosition(hit.getAlignedFeature(), referencePoint.getActivationPoint());
        if (referencePoint.isAttachedToLeftAlignmentBound()) {
            positionInSeq1 = alignment.getSequence1Range().getFrom();
            if (positionOfActivationPoint != -2 && (positionOfActivationPoint == -1 || positionInSeq1 > positionOfActivationPoint))
                return -1;
        } else {
            positionInSeq1 = alignment.getSequence1Range().getTo();
            if (positionOfActivationPoint != -2 && (positionOfActivationPoint == -1 || positionInSeq1 < positionOfActivationPoint))
                return -1;
        }
        positionInSeq1 += referencePoint.getOffset();
        position = alignment.convertToSeq2Position(positionInSeq1);
    } else
        position = hit.getPosition(targetIndex, referencePoint);
    if (position == -1)
        return -1;
    if (position < 0)
        return -2 - position;
    return position;
}
Also used : NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence) ReferencePoint(io.repseq.core.ReferencePoint)

Example 74 with NucleotideSequence

use of com.milaboratory.core.sequence.NucleotideSequence in project mixcr by milaboratory.

the class CloneFactory method create.

public Clone create(int id, double count, EnumMap<GeneType, TObjectFloatHashMap<VDJCGeneId>> geneScores, NSequenceWithQuality[] targets) {
    EnumMap<GeneType, VDJCHit[]> hits = new EnumMap<>(GeneType.class);
    for (GeneType geneType : GeneType.VJC_REFERENCE) {
        VJCClonalAlignerParameters vjcParameters = parameters.getVJCParameters(geneType);
        if (vjcParameters == null)
            continue;
        GeneFeature featureToAlign = featuresToAlign.get(geneType);
        TObjectFloatHashMap<VDJCGeneId> gtGeneScores = geneScores.get(geneType);
        if (gtGeneScores == null)
            continue;
        GeneFeature[] intersectingFeatures = new GeneFeature[assemblingFeatures.length];
        for (int i = 0; i < assemblingFeatures.length; ++i) {
            intersectingFeatures[i] = GeneFeature.intersection(featureToAlign, assemblingFeatures[i]);
            if (intersectingFeatures[i] != null)
                switch(geneType) {
                    case Variable:
                        if (!intersectingFeatures[i].getFirstPoint().equals(assemblingFeatures[i].getFirstPoint()))
                            throw new IllegalArgumentException();
                        break;
                    case Joining:
                        if (!intersectingFeatures[i].getLastPoint().equals(assemblingFeatures[i].getLastPoint()))
                            throw new IllegalArgumentException();
                        break;
                }
        }
        VDJCHit[] result = new VDJCHit[gtGeneScores.size()];
        int pointer = 0;
        TObjectFloatIterator<VDJCGeneId> iterator = gtGeneScores.iterator();
        while (iterator.hasNext()) {
            iterator.advance();
            VDJCGene gene = usedGenes.get(iterator.key());
            Alignment<NucleotideSequence>[] alignments = new Alignment[assemblingFeatures.length];
            for (int i = 0; i < assemblingFeatures.length; ++i) {
                if (intersectingFeatures[i] == null)
                    continue;
                NucleotideSequence referenceSequence = gene.getFeature(featureToAlign);
                Range rangeInReference = gene.getPartitioning().getRelativeRange(featureToAlign, intersectingFeatures[i]);
                if (rangeInReference == null || referenceSequence == null)
                    continue;
                Boolean leftSide;
                switch(geneType) {
                    case Variable:
                        leftSide = intersectingFeatures[i].getLastPoint().isTrimmable() ? true : null;
                        break;
                    case Joining:
                        leftSide = intersectingFeatures[i].getFirstPoint().isTrimmable() ? false : null;
                        break;
                    case Constant:
                        leftSide = null;
                        break;
                    default:
                        throw new RuntimeException();
                }
                BandedAlignerParameters<NucleotideSequence> alignmentParameters = vjcParameters.getAlignmentParameters();
                int referenceLength = rangeInReference.length();
                NucleotideSequence target = targets[i].getSequence();
                if (alignmentParameters.getScoring() instanceof LinearGapAlignmentScoring) {
                    if (leftSide == null) {
                        alignments[i] = BandedLinearAligner.align((LinearGapAlignmentScoring<NucleotideSequence>) alignmentParameters.getScoring(), referenceSequence, target, rangeInReference.getFrom(), referenceLength, 0, target.size(), alignmentParameters.getWidth());
                    } else if (leftSide) {
                        assert rangeInReference.getFrom() + referenceLength == referenceSequence.size();
                        alignments[i] = BandedLinearAligner.alignSemiLocalLeft((LinearGapAlignmentScoring<NucleotideSequence>) alignmentParameters.getScoring(), referenceSequence, target, rangeInReference.getFrom(), referenceLength, 0, target.size(), alignmentParameters.getWidth(), alignmentParameters.getStopPenalty());
                    } else {
                        assert rangeInReference.getFrom() == 0;
                        // int offset2 = Math.max(0, target.size() - referenceLength);
                        alignments[i] = BandedLinearAligner.alignSemiLocalRight((LinearGapAlignmentScoring<NucleotideSequence>) alignmentParameters.getScoring(), referenceSequence, target, rangeInReference.getFrom(), referenceLength, 0, target.size(), alignmentParameters.getWidth(), alignmentParameters.getStopPenalty());
                    }
                } else {
                    if (leftSide == null) {
                        alignments[i] = BandedAffineAligner.align((AffineGapAlignmentScoring<NucleotideSequence>) alignmentParameters.getScoring(), referenceSequence, target, rangeInReference.getFrom(), referenceLength, 0, target.size(), alignmentParameters.getWidth());
                    } else if (leftSide) {
                        assert rangeInReference.getFrom() + referenceLength == referenceSequence.size();
                        alignments[i] = BandedAffineAligner.semiLocalRight((AffineGapAlignmentScoring<NucleotideSequence>) alignmentParameters.getScoring(), referenceSequence, target, rangeInReference.getFrom(), referenceLength, 0, target.size(), alignmentParameters.getWidth());
                    } else {
                        assert rangeInReference.getFrom() == 0;
                        // int offset2 = Math.max(0, target.size() - referenceLength);
                        alignments[i] = BandedAffineAligner.semiLocalLeft((AffineGapAlignmentScoring<NucleotideSequence>) alignmentParameters.getScoring(), referenceSequence, target, rangeInReference.getFrom(), referenceLength, 0, target.size(), alignmentParameters.getWidth());
                    }
                }
            }
            result[pointer++] = new VDJCHit(gene, alignments, featureToAlign, iterator.value());
        }
        Arrays.sort(result, 0, pointer);
        hits.put(geneType, pointer < result.length ? Arrays.copyOf(result, pointer) : result);
    }
    // D
    NucleotideSequence sequenceToAlign = targets[indexOfAssemblingFeatureWithD].getSequence();
    int from = 0;
    int to = sequenceToAlign.size();
    VDJCHit[] hs = hits.get(GeneType.Variable);
    if (hs != null && hs.length > 0) {
        int p = hs[0].getPartitioningForTarget(indexOfAssemblingFeatureWithD).getPosition(ReferencePoint.VEndTrimmed);
        if (p != -1) {
            if (p < 0)
                p = -2 - p;
            from = p;
        }
    }
    hs = hits.get(GeneType.Joining);
    if (hs != null && hs.length > 0) {
        int p = hs[0].getPartitioningForTarget(indexOfAssemblingFeatureWithD).getPosition(ReferencePoint.JBeginTrimmed);
        if (p != -1) {
            if (p < 0)
                p = -2 - p;
            to = p;
        }
    }
    if (from < to)
        hits.put(GeneType.Diversity, dAligner.align(sequenceToAlign, VDJCAligner.getPossibleDLoci(hits.get(GeneType.Variable), hits.get(GeneType.Joining)), from, to, indexOfAssemblingFeatureWithD, assemblingFeatures.length));
    else
        hits.put(GeneType.Diversity, new VDJCHit[0]);
    return new Clone(targets, hits, count, id);
}
Also used : Range(com.milaboratory.core.Range) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence) VDJCHit(com.milaboratory.mixcr.basictypes.VDJCHit) Clone(com.milaboratory.mixcr.basictypes.Clone)

Example 75 with NucleotideSequence

use of com.milaboratory.core.sequence.NucleotideSequence in project mixcr by milaboratory.

the class ClonalSequence method getStretches.

public Stretch[] getStretches() {
    if (stretches == null) {
        stretches = new Stretch[sequences.length - 1];
        for (int i = 1; i < sequences.length; ++i) {
            NucleotideSequence left = sequences[i - 1].getSequence(), right = sequences[i].getSequence();
            int leftSize = left.size(), rightSize = right.size();
            byte code = left.codeAt(leftSize - 1);
            if (code != right.codeAt(0)) {
                stretches[i - 1] = new Stretch(code);
                continue;
            }
            int leftDelta = 0, rightDelta = 0;
            while (leftDelta < leftSize && left.codeAt(leftSize - leftDelta - 1) == code) ++leftDelta;
            while (rightDelta < rightSize && right.codeAt(rightDelta) == code) ++rightDelta;
            stretches[i - 1] = new Stretch(leftDelta, rightDelta, code);
        }
    }
    return stretches;
}
Also used : NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence)

Aggregations

NucleotideSequence (com.milaboratory.core.sequence.NucleotideSequence)81 Test (org.junit.Test)32 Range (com.milaboratory.core.Range)19 VDJCGene (io.repseq.core.VDJCGene)15 GeneType (io.repseq.core.GeneType)14 NSequenceWithQuality (com.milaboratory.core.sequence.NSequenceWithQuality)13 GeneFeature (io.repseq.core.GeneFeature)9 VDJCAlignments (com.milaboratory.mixcr.basictypes.VDJCAlignments)8 VDJCHit (com.milaboratory.mixcr.basictypes.VDJCHit)8 ReferencePoint (io.repseq.core.ReferencePoint)7 VDJCLibrary (io.repseq.core.VDJCLibrary)7 ArrayList (java.util.ArrayList)7 VDJCAlignerParameters (com.milaboratory.mixcr.vdjaligners.VDJCAlignerParameters)6 AlignmentHit (com.milaboratory.core.alignment.batch.AlignmentHit)5 PairedRead (com.milaboratory.core.io.sequence.PairedRead)5 AminoAcidSequence (com.milaboratory.core.sequence.AminoAcidSequence)5 Path (java.nio.file.Path)5 Alignment (com.milaboratory.core.alignment.Alignment)4 Pattern (java.util.regex.Pattern)4 AlignmentHelper (com.milaboratory.core.alignment.AlignmentHelper)3