Search in sources :

Example 1 with AlignmentHit

use of com.milaboratory.core.alignment.batch.AlignmentHit in project mixcr by milaboratory.

the class PartialAlignmentsAssemblerAligner method process0.

@Override
@SuppressWarnings("unchecked")
protected VDJCAlignmentResult<VDJCMultiRead> process0(VDJCMultiRead input) {
    final int nReads = input.numberOfReads();
    EnumMap<GeneType, VDJCHit[]> vdjcHits = new EnumMap<>(GeneType.class);
    NSequenceWithQuality[] targets = new NSequenceWithQuality[nReads];
    Chains currentChains = Chains.ALL;
    // Across all gene types
    int lastAlignedTarget = 0;
    int firstJTarget = -1;
    int lastVTarget = -1;
    for (int g = 0; g < GeneType.VJC_REFERENCE.length; g++) {
        GeneType gt = GeneType.VJC_REFERENCE[g];
        AlignmentHit<NucleotideSequence, VDJCGene>[][] alignmentHits = new AlignmentHit[nReads][];
        Arrays.fill(alignmentHits, new AlignmentHit[0]);
        for (int targetId = lastAlignedTarget; targetId < nReads; targetId++) {
            targets[targetId] = input.getRead(targetId).getData();
            final NucleotideSequence sequence = input.getRead(targetId).getData().getSequence();
            AlignmentResult<AlignmentHit<NucleotideSequence, VDJCGene>> als;
            final BatchAlignerWithBaseWithFilter<NucleotideSequence, VDJCGene, AlignmentHit<NucleotideSequence, VDJCGene>> aligner = getAligner(gt);
            if (aligner != null) {
                int pointer = 0;
                if (g != 0) {
                    // Not V gene
                    VDJCHit[] vdjcHits1 = vdjcHits.get(GeneType.VJC_REFERENCE[g - 1]);
                    Alignment<NucleotideSequence> alignment;
                    if (vdjcHits1.length != 0 && (alignment = vdjcHits1[0].getAlignment(targetId)) != null)
                        pointer = alignment.getSequence2Range().getTo();
                }
                als = aligner.align(sequence, pointer, sequence.size(), getFilter(gt, currentChains));
                if (als != null && als.hasHits()) {
                    lastAlignedTarget = targetId;
                    if (// V
                    g == 0)
                        lastVTarget = targetId;
                    if (// J
                    g == 1)
                        firstJTarget = targetId;
                    alignmentHits[targetId] = als.getHits().toArray(new AlignmentHit[als.getHits().size()]);
                }
            }
        }
        Chains chains = Chains.EMPTY;
        for (AlignmentHit<NucleotideSequence, VDJCGene>[] alignmentHit0 : alignmentHits) if (alignmentHit0 != null)
            for (AlignmentHit<NucleotideSequence, VDJCGene> hit : alignmentHit0) chains = chains.merge(hit.getRecordPayload().getChains());
        currentChains = currentChains.intersection(chains);
        vdjcHits.put(gt, combine(parameters.getFeatureToAlign(gt), alignmentHits));
    }
    boolean fineVAlignmentPerformed = false, fineJAlignmentPerformed = false;
    // Additional (fine) alignment step for V gene
    VDJCHit[] vHits = vdjcHits.get(GeneType.Variable);
    final AlignmentScoring<NucleotideSequence> vScoring = parameters.getVAlignerParameters().getParameters().getScoring();
    if (// TODO implement AffineGapAlignmentScoring
    vHits != null && vHits.length > 0 && !(vScoring instanceof AffineGapAlignmentScoring) && vdjcHits.get(GeneType.Joining) != null && vdjcHits.get(GeneType.Joining).length > 0) {
        int minimalVSpace = getAbsoluteMinScore(parameters.getVAlignerParameters().getParameters()) / vScoring.getMaximalMatchScore();
        // Assert
        if (firstJTarget == -1)
            throw new AssertionError();
        for (int targetId = 1; targetId <= firstJTarget; targetId++) {
            int vSpace;
            final NucleotideSequence sequence2 = targets[targetId].getSequence();
            if (vdjcHits.get(GeneType.Joining)[0].getAlignment(targetId) != null && (vSpace = vdjcHits.get(GeneType.Joining)[0].getAlignment(targetId).getSequence2Range().getFrom()) >= minimalVSpace) {
                for (int vHitIndex = 0; vHitIndex < vHits.length; vHitIndex++) {
                    VDJCHit vHit = vHits[vHitIndex];
                    // Perform fine alignment only if target is not already aligned by fast aligner
                    if (vHit.getAlignment(targetId) != null)
                        continue;
                    Alignment<NucleotideSequence> leftAlignment = vHit.getAlignment(targetId - 1);
                    if (leftAlignment == null)
                        continue;
                    final NucleotideSequence sequence1 = leftAlignment.getSequence1();
                    final int beginFR3 = vHit.getGene().getPartitioning().getRelativePosition(parameters.getFeatureToAlign(GeneType.Variable), ReferencePoint.FR3Begin);
                    if (beginFR3 == -1)
                        continue;
                    final Alignment alignment = AlignerCustom.alignLinearSemiLocalLeft0((LinearGapAlignmentScoring<NucleotideSequence>) vScoring, sequence1, sequence2, beginFR3, sequence1.size() - beginFR3, 0, vSpace, false, true, NucleotideSequence.ALPHABET, linearMatrixCache.get());
                    if (alignment.getScore() < getAbsoluteMinScore(parameters.getVAlignerParameters().getParameters()))
                        continue;
                    fineVAlignmentPerformed = true;
                    vHits[vHitIndex] = vHit.setAlignment(targetId, alignment);
                }
            }
        }
    }
    Arrays.sort(vHits);
    vdjcHits.put(GeneType.Variable, cutRelativeScore(vHits, parameters.getVAlignerParameters().getRelativeMinScore(), parameters.getVAlignerParameters().getParameters().getMaxHits()));
    // Additional (fine) alignment step for J gene
    VDJCHit[] jHits = vdjcHits.get(GeneType.Joining);
    final AlignmentScoring<NucleotideSequence> jScoring = parameters.getJAlignerParameters().getParameters().getScoring();
    if (// TODO implement AffineGapAlignmentScoring
    jHits != null && jHits.length > 0 && !(jScoring instanceof AffineGapAlignmentScoring) && vdjcHits.get(GeneType.Variable) != null && vdjcHits.get(GeneType.Variable).length > 0) {
        int minimalJSpace = getAbsoluteMinScore(parameters.getJAlignerParameters().getParameters()) / jScoring.getMaximalMatchScore();
        // Assert
        if (lastVTarget == -1)
            throw new AssertionError();
        for (int targetId = lastVTarget; targetId < nReads - 1; targetId++) {
            int jSpaceBegin;
            final NucleotideSequence sequence2 = targets[targetId].getSequence();
            if (vdjcHits.get(GeneType.Variable)[0].getAlignment(targetId) != null && (sequence2.size() - (jSpaceBegin = vdjcHits.get(GeneType.Variable)[0].getAlignment(targetId).getSequence2Range().getTo())) >= minimalJSpace) {
                for (int jHitIndex = 0; jHitIndex < jHits.length; jHitIndex++) {
                    VDJCHit jHit = jHits[jHitIndex];
                    // Perform fine alignment only if target is not already aligned by fast aligner
                    if (jHit.getAlignment(targetId) != null)
                        continue;
                    Alignment<NucleotideSequence> rightAlignment = jHit.getAlignment(targetId + 1);
                    if (rightAlignment == null)
                        continue;
                    final NucleotideSequence sequence1 = rightAlignment.getSequence1();
                    final Alignment alignment = AlignerCustom.alignLinearSemiLocalRight0((LinearGapAlignmentScoring) jScoring, sequence1, sequence2, 0, sequence1.size(), jSpaceBegin, sequence2.size() - jSpaceBegin, false, true, NucleotideSequence.ALPHABET, linearMatrixCache.get());
                    if (alignment.getScore() < getAbsoluteMinScore(parameters.getJAlignerParameters().getParameters()))
                        continue;
                    fineJAlignmentPerformed = true;
                    jHits[jHitIndex] = jHit.setAlignment(targetId, alignment);
                }
            }
        }
    }
    Arrays.sort(jHits);
    vdjcHits.put(GeneType.Joining, cutRelativeScore(jHits, parameters.getJAlignerParameters().getRelativeMinScore(), parameters.getJAlignerParameters().getParameters().getMaxHits()));
    int dGeneTarget = -1;
    VDJCHit[] vResult = vdjcHits.get(GeneType.Variable);
    VDJCHit[] jResult = vdjcHits.get(GeneType.Joining);
    if (vResult.length != 0 && jResult.length != 0)
        for (int i = 0; i < nReads; i++) if (vResult[0].getAlignment(i) != null && jResult[0].getAlignment(i) != null) {
            dGeneTarget = i;
            break;
        }
    // if (fineVAlignmentPerformed && fineJAlignmentPerformed)
    // System.out.println("sd");
    VDJCHit[] dResult;
    if (dGeneTarget == -1)
        dResult = new VDJCHit[0];
    else {
        final Alignment<NucleotideSequence> vAl = vResult[0].getAlignment(dGeneTarget);
        final Alignment<NucleotideSequence> jAl = jResult[0].getAlignment(dGeneTarget);
        if (vAl == null || jAl == null || singleDAligner == null)
            dResult = new VDJCHit[0];
        else
            dResult = singleDAligner.align(targets[dGeneTarget].getSequence(), getPossibleDLoci(vResult, jResult), vAl.getSequence2Range().getTo(), jAl.getSequence2Range().getFrom(), dGeneTarget, nReads);
    }
    final VDJCAlignments alignment = new VDJCAlignments(vResult, dResult, jResult, cutRelativeScore(vdjcHits.get(GeneType.Constant), parameters.getCAlignerParameters().getRelativeMinScore(), parameters.getMaxHits()), targets, input.getHistory(), input.getOriginalReads());
    return new VDJCAlignmentResult<>(input, alignment);
}
Also used : Chains(io.repseq.core.Chains) NSequenceWithQuality(com.milaboratory.core.sequence.NSequenceWithQuality) EnumMap(java.util.EnumMap) VDJCAlignments(com.milaboratory.mixcr.basictypes.VDJCAlignments) VDJCAlignmentResult(com.milaboratory.mixcr.vdjaligners.VDJCAlignmentResult) ReferencePoint(io.repseq.core.ReferencePoint) AlignmentHit(com.milaboratory.core.alignment.batch.AlignmentHit) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence) VDJCGene(io.repseq.core.VDJCGene) GeneType(io.repseq.core.GeneType) VDJCHit(com.milaboratory.mixcr.basictypes.VDJCHit)

Example 2 with AlignmentHit

use of com.milaboratory.core.alignment.batch.AlignmentHit in project mixcr by milaboratory.

the class VDJCAlignerS method align.

// TODO all this ifs can be simplified
private KVJResultsForSingle align(Target target) {
    NucleotideSequence sequence = target.targets[0].getSequence();
    ensureInitialized();
    AlignmentResult<AlignmentHit<NucleotideSequence, VDJCGene>> vResult, jResult;
    switch(parameters.getVJAlignmentOrder()) {
        case VThenJ:
            vResult = vAligner.align(sequence);
            // If there is no results for V return
            if (!vResult.hasHits())
                return new KVJResultsForSingle(target, // V result is empty
                vResult, // If -OallowPartialAlignments=true try align J gene
                parameters.getAllowPartialAlignments() ? jAligner.align(sequence) : null);
            // Returning result
            int jFrom = vResult.getBestHit().getAlignment().getSequence2Range().getTo() - parameters.getVJOverlapWindow();
            jFrom = jFrom < 0 ? 0 : jFrom;
            return new KVJResultsForSingle(target, vResult, jAligner.align(sequence, jFrom, sequence.size(), getFilter(GeneType.Joining, vResult.getHits())));
        case JThenV:
            jResult = jAligner.align(sequence);
            // If there is no results for J return
            if (!jResult.hasHits())
                return new KVJResultsForSingle(target, // If -OallowPartialAlignments=true try align V gene
                parameters.getAllowPartialAlignments() ? vAligner.align(sequence) : null, // J result is empty
                jResult);
            // Returning result
            int vTo = jResult.getBestHit().getAlignment().getSequence2Range().getFrom() + parameters.getVJOverlapWindow();
            vTo = vTo > sequence.size() ? sequence.size() : vTo;
            return new KVJResultsForSingle(target, vAligner.align(sequence, 0, vTo, getFilter(GeneType.Variable, jResult.getHits())), jResult);
    }
    throw new IllegalArgumentException("vjAlignmentOrder not set.");
}
Also used : AlignmentHit(com.milaboratory.core.alignment.batch.AlignmentHit) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence)

Example 3 with AlignmentHit

use of com.milaboratory.core.alignment.batch.AlignmentHit in project repseqio by repseqio.

the class InferAnchorPointsAction method go.

public void go(GeneFeature geneFeature, String inputFile, String outputFile) throws Exception {
    VDJCLibraryRegistry.resetDefaultRegistry();
    VDJCLibraryRegistry reg = VDJCLibraryRegistry.getDefault();
    // Map of library names
    Map<String, String> libraryNameToAddress = new HashMap<>();
    // Registering reference library
    int i = 0;
    for (String refAddress : params.getReference()) {
        String name = REFERENCE_LIBRARY_PREFIX + (i++);
        reg.registerLibraries(refAddress, name);
        libraryNameToAddress.put(name, refAddress);
    }
    if (params.getReference().isEmpty()) {
        reg.loadAllLibraries("default");
        for (VDJCLibrary library : reg.getLoadedLibraries()) libraryNameToAddress.put(library.getName(), "built-in");
    }
    // Registering target library
    reg.registerLibraries(inputFile, TARGET_LIBRARY_NAME);
    // Compile gene filter
    Pattern namePattern = params.name == null ? null : Pattern.compile(params.name);
    List<VDJCLibrary> refLibraries = reg.getLoadedLibrariesByNamePattern(REFERENCE_LIBRARY_PATTERN);
    SimpleBatchAlignerParameters<AminoAcidSequence> aParams = new SimpleBatchAlignerParameters<>(5, 0.4f, params.getAbsoluteMinScore(geneFeature), true, AffineGapAlignmentScoring.getAminoAcidBLASTScoring(BLASTMatrix.BLOSUM62, -10, -1));
    SimpleBatchAligner<AminoAcidSequence, Ref> aligner = new SimpleBatchAligner<>(aParams);
    int dbSize = 0;
    for (VDJCLibrary lib : refLibraries) {
        for (VDJCGene gene : lib.getGenes()) {
            NucleotideSequence nSeq = gene.getFeature(geneFeature);
            if (nSeq == null)
                continue;
            ReferencePoint frameReference = GeneFeature.getFrameReference(geneFeature);
            ReferencePoints partitioning = gene.getPartitioning();
            if (frameReference == null)
                continue;
            int relativePosition = partitioning.getRelativePosition(geneFeature, frameReference);
            if (relativePosition < 0)
                continue;
            TranslationParameters frame = withIncompleteCodon(relativePosition);
            AminoAcidSequence aaSequence = AminoAcidSequence.translate(nSeq, frame);
            aligner.addReference(aaSequence, new Ref(gene, frame, nSeq.size()));
            ++dbSize;
        }
    }
    System.out.println("DB size: " + dbSize);
    System.out.println();
    // Checking that db is not empty
    if (dbSize == 0)
        throw new RuntimeException("No reference genes.");
    ArrayList<VDJCLibraryData> result = new ArrayList<>();
    ByteArrayOutputStream bos = new ByteArrayOutputStream();
    PrintStream bufferPS = new PrintStream(bos);
    // Iteration over target genes
    for (VDJCLibrary lib : reg.getLoadedLibrariesByName(TARGET_LIBRARY_NAME)) {
        ArrayList<VDJCGeneData> genes = new ArrayList<>();
        for (VDJCGene targetGene : lib.getGenes()) {
            bos.reset();
            PrintStream ps = params.outputOnlyModified() ? bufferPS : System.out;
            if (namePattern != null && !namePattern.matcher(targetGene.getName()).matches()) {
                if (!params.outputOnlyModified())
                    genes.add(targetGene.getData());
                continue;
            }
            ps.println("Processing: " + targetGene.getName() + " (" + (targetGene.isFunctional() ? "F" : "P") + ") " + targetGene.getChains());
            // Getting gene feature sequence from target gene
            NucleotideSequence nSeq = targetGene.getFeature(geneFeature);
            if (nSeq == null) {
                ps.println("Failed to extract " + GeneFeature.encode(geneFeature));
                ps.println("================");
                ps.println();
                if (!params.outputOnlyModified())
                    genes.add(targetGene.getData());
                continue;
            }
            // Alignment result
            AlignmentResult<AlignmentHit<AminoAcidSequence, Ref>> bestAResult = null;
            TranslationParameters bestFrame = null;
            // Searching for best alignment
            for (TranslationParameters frame : TRANSLATION_PARAMETERS) {
                AminoAcidSequence aaSeq = AminoAcidSequence.translate(nSeq, frame);
                AlignmentResult<AlignmentHit<AminoAcidSequence, Ref>> r = aligner.align(aaSeq);
                if (r != null && r.hasHits() && (bestAResult == null || bestAResult.getBestHit().getAlignment().getScore() < r.getBestHit().getAlignment().getScore())) {
                    bestAResult = r;
                    bestFrame = frame;
                }
            }
            if (bestFrame == null) {
                ps.println("No alignments found.");
                if (!params.outputOnlyModified())
                    genes.add(targetGene.getData());
                continue;
            }
            List<AlignmentHit<AminoAcidSequence, Ref>> hits = bestAResult.getHits();
            VDJCGeneData targetGeneData = targetGene.getData().clone();
            boolean anyPointChanged = false;
            for (int ai = 0; ai < hits.size(); ai++) {
                // Accumulate output
                ByteArrayOutputStream localBos = new ByteArrayOutputStream();
                PrintStream localPS = new PrintStream(localBos);
                Alignment<AminoAcidSequence> bestAlignment = hits.get(ai).getAlignment();
                Ref bestRef = hits.get(ai).getRecordPayload();
                VDJCGene bestReferenceGene = bestRef.gene;
                localPS.println("Aligned with " + bestReferenceGene.getName() + " from " + libraryNameToAddress.get(bestReferenceGene.getParentLibrary().getName()) + " ; Score = " + bestAlignment.getScore());
                AlignmentHelper alignmentHelper = bestAlignment.getAlignmentHelper();
                for (AlignmentHelper h : alignmentHelper.split(150)) localPS.println(h + "\n");
                ReferencePoints targetPartitioning = targetGene.getPartitioning();
                ReferencePoints referencePartitioning = bestReferenceGene.getPartitioning();
                for (GeneFeature.ReferenceRange range : geneFeature) for (ReferencePoint point : range.getIntermediatePoints()) {
                    localPS.print(point + ": ");
                    boolean isAvailable = targetPartitioning.isAvailable(point);
                    if (isAvailable) {
                        localPS.println("already set");
                    }
                    if (!referencePartitioning.isAvailable(point)) {
                        if (!isAvailable)
                            localPS.println("not set in reference gene");
                        continue;
                    }
                    int ntPositionInReference = referencePartitioning.getRelativePosition(geneFeature, point);
                    // Projecting position
                    AminoAcidSequence.AminoAcidSequencePosition aaPositionInReferece = AminoAcidSequence.convertNtPositionToAA(ntPositionInReference, bestRef.ntSeqLength, bestRef.frame);
                    if (aaPositionInReferece == null) {
                        if (!isAvailable)
                            localPS.println("failed to convert to aa position in ref");
                        continue;
                    }
                    int aaPositionInTarget = Alignment.aabs(bestAlignment.convertToSeq2Position(aaPositionInReferece.aminoAcidPosition));
                    if (aaPositionInTarget == -1) {
                        if (!isAvailable)
                            localPS.println("failed to project using alignment");
                        continue;
                    }
                    int ntPositionInTarget = AminoAcidSequence.convertAAPositionToNt(aaPositionInTarget, nSeq.size(), bestFrame);
                    if (ntPositionInTarget == -1) {
                        if (!isAvailable)
                            localPS.println("failed");
                        continue;
                    }
                    ntPositionInTarget += aaPositionInReferece.positionInTriplet;
                    ntPositionInTarget = targetPartitioning.getAbsolutePosition(geneFeature, ntPositionInTarget);
                    if (ntPositionInTarget == -1) {
                        if (!isAvailable)
                            localPS.println("failed");
                        continue;
                    }
                    if (isAvailable) {
                        int existingPosition = targetPartitioning.getPosition(point);
                        if (existingPosition != ntPositionInTarget) {
                            localPS.println("inferred position differs from existing value.  existing: " + existingPosition + ", inferred: " + ntPositionInTarget);
                        }
                        continue;
                    }
                    localPS.println(ntPositionInTarget);
                    targetGeneData.getAnchorPoints().put(point, (long) ntPositionInTarget);
                    anyPointChanged = true;
                }
                if (!anyPointChanged) {
                    if (!params.outputOnlyModified() && ai == 0)
                        ps.write(localBos.toByteArray());
                } else {
                    ps.write(localBos.toByteArray());
                    break;
                }
            }
            ps.println("================");
            ps.println();
            if (anyPointChanged && params.outputOnlyModified())
                System.out.write(bos.toByteArray());
            genes.add(targetGeneData);
        }
        result.add(new VDJCLibraryData(lib.getData(), genes));
    }
    VDJCDataUtils.writeToFile(result, outputFile, false);
}
Also used : SimpleBatchAlignerParameters(com.milaboratory.core.alignment.batch.SimpleBatchAlignerParameters) AminoAcidSequence(com.milaboratory.core.sequence.AminoAcidSequence) VDJCLibraryData(io.repseq.dto.VDJCLibraryData) VDJCGeneData(io.repseq.dto.VDJCGeneData) Pattern(java.util.regex.Pattern) PrintStream(java.io.PrintStream) AlignmentHelper(com.milaboratory.core.alignment.AlignmentHelper) ByteArrayOutputStream(java.io.ByteArrayOutputStream) SimpleBatchAligner(com.milaboratory.core.alignment.batch.SimpleBatchAligner) AlignmentHit(com.milaboratory.core.alignment.batch.AlignmentHit) TranslationParameters(com.milaboratory.core.sequence.TranslationParameters) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence)

Example 4 with AlignmentHit

use of com.milaboratory.core.alignment.batch.AlignmentHit in project mixcr by milaboratory.

the class VDJCAlignerPVFirst method alignVThenJ.

@SuppressWarnings("unchecked")
PAlignmentHelper alignVThenJ(final Target target) {
    /*
         * Step 1: alignment of V gene
         */
    List<AlignmentHit<NucleotideSequence, VDJCGene>> vAl1 = vAligner.align(target.targets[0].getSequence()).getHits(), vAl2 = vAligner.align(target.targets[1].getSequence()).getHits();
    /*
         * Step 1.5: eliminating conflicting alignments in favor of alignments covering CDR3 edge
         */
    boolean vChimera = checkAndEliminateChimera(vAl1, vAl2, GeneType.Variable);
    PairedHit[] vHits = extractDoubleHits(vAl1, vAl2);
    for (PairedHit vHit : vHits) {
        if (vHit.hit1 == null) {
            AlignmentHit<NucleotideSequence, VDJCGene> leftHit = vHit.hit0;
            // Checking whether alignment touches right read edge (taking to account tolerance)
            if (leftHit.getAlignment().getSequence2Range().getTo() < target.targets[0].size() - parameters.getAlignmentBoundaryTolerance())
                continue;
            final AlignmentScoring<NucleotideSequence> scoring = parameters.getVAlignerParameters().getParameters().getScoring();
            if (scoring instanceof AffineGapAlignmentScoring)
                // TODO IMPLEMENT
                continue;
            final NucleotideSequence sequence2 = target.targets[1].getSequence();
            final NucleotideSequence sequence1 = leftHit.getAlignment().getSequence1();
            final int beginFR3 = leftHit.getRecordPayload().getPartitioning().getRelativePosition(parameters.getFeatureToAlign(GeneType.Variable), ReferencePoint.FR3Begin);
            if (beginFR3 == -1)
                continue;
            final Alignment alignment = AlignerCustom.alignLinearSemiLocalLeft0((LinearGapAlignmentScoring) scoring, sequence1, sequence2, beginFR3, sequence1.size() - beginFR3, 0, sequence2.size(), false, true, NucleotideSequence.ALPHABET, linearMatrixCache.get());
            if (alignment.getScore() < getAbsoluteMinScore(parameters.getVAlignerParameters().getParameters()))
                continue;
            vHit.set(1, new AlignmentHitImpl<NucleotideSequence, VDJCGene>(alignment, leftHit.getRecordPayload()));
            vHit.calculateScore();
        }
    }
    vHits = sortAndFilterHits(vHits, parameters.getVAlignerParameters());
    /*
         * Step 3: alignment of J gene
         */
    List<AlignmentHit<NucleotideSequence, VDJCGene>> jAl1 = performJAlignment(target, vHits, 0), jAl2 = performJAlignment(target, vHits, 1);
    /*
         * Step 3.5: eliminating conflicting alignments in favor of alignments covering CDR3 edge
         */
    boolean jChimera = checkAndEliminateChimera(jAl1, jAl2, GeneType.Joining);
    PairedHit[] jHits = extractDoubleHits(jAl1, jAl2);
    for (PairedHit jHit : jHits) {
        if (jHit.hit0 == null) {
            AlignmentHit<NucleotideSequence, VDJCGene> rightHit = jHit.hit1;
            // Checking whether alignment touches left read edge (taking to account tolerance)
            if (rightHit.getAlignment().getSequence2Range().getFrom() > parameters.getAlignmentBoundaryTolerance())
                continue;
            final AlignmentScoring<NucleotideSequence> scoring = parameters.getJAlignerParameters().getParameters().getScoring();
            if (scoring instanceof AffineGapAlignmentScoring)
                // TODO IMPLEMENT
                continue;
            final NucleotideSequence sequence2 = target.targets[0].getSequence();
            final NucleotideSequence sequence1 = rightHit.getAlignment().getSequence1();
            int begin = 0;
            if (vHits.length != 0 && vHits[0].hit0 != null) {
                begin = vHits[0].hit0.getAlignment().getSequence2Range().getTo() - parameters.getVJOverlapWindow();
                if (begin < 0)
                    begin = 0;
            }
            final Alignment alignment = AlignerCustom.alignLinearSemiLocalRight0((LinearGapAlignmentScoring) scoring, sequence1, sequence2, 0, sequence1.size(), begin, sequence2.size() - begin, false, true, NucleotideSequence.ALPHABET, linearMatrixCache.get());
            if (alignment.getScore() < getAbsoluteMinScore(parameters.getJAlignerParameters().getParameters()))
                continue;
            jHit.set(0, new AlignmentHitImpl<NucleotideSequence, VDJCGene>(alignment, rightHit.getRecordPayload()));
            jHit.calculateScore();
        }
    }
    jHits = sortAndFilterHits(jHits, parameters.getJAlignerParameters());
    // Check if parameters allow chimeras
    if (!parameters.isAllowChimeras()) {
        // Calculate common chains
        Chains commonChains = getVJCommonChains(vHits, jHits);
        if (!commonChains.isEmpty()) {
            // Filtering V genes
            int filteredSize = 0;
            for (PairedHit hit : vHits) if (hit.getGene().getChains().intersects(commonChains))
                ++filteredSize;
            // Perform filtering (new array allocation) only if needed
            if (vHits.length != filteredSize) {
                PairedHit[] newHits = new PairedHit[filteredSize];
                // Used as pointer
                filteredSize = 0;
                for (PairedHit hit : vHits) if (hit.getGene().getChains().intersects(commonChains))
                    newHits[filteredSize++] = hit;
                assert newHits.length == filteredSize;
                vHits = newHits;
            }
            // Filtering J genes
            filteredSize = 0;
            for (PairedHit hit : jHits) if (hit.getGene().getChains().intersects(commonChains))
                ++filteredSize;
            // Perform filtering (new array allocation) only if needed
            if (jHits.length != filteredSize) {
                PairedHit[] newHits = new PairedHit[filteredSize];
                // Used as pointer
                filteredSize = 0;
                for (PairedHit hit : jHits) if (hit.getGene().getChains().intersects(commonChains))
                    newHits[filteredSize++] = hit;
                assert newHits.length == filteredSize;
                jHits = newHits;
            }
        }
    }
    return new PAlignmentHelper(target, vHits, jHits, vChimera, jChimera);
}
Also used : AlignmentHit(com.milaboratory.core.alignment.batch.AlignmentHit) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence)

Example 5 with AlignmentHit

use of com.milaboratory.core.alignment.batch.AlignmentHit in project mixcr by milaboratory.

the class VDJCAlignerAbstract method init.

@Override
protected void init() {
    DAlignerParameters dAlignerParameters = parameters.getDAlignerParameters();
    List<VDJCGene> dGenes = genesToAlign.get(GeneType.Diversity);
    if (dAlignerParameters != null && dGenes.size() != 0)
        singleDAligner = new SingleDAligner(dAlignerParameters, genesToAlign.get(GeneType.Diversity));
    vAligner = createKAligner(GeneType.Variable);
    jAligner = createKAligner(GeneType.Joining);
    cAligner = createKAligner(GeneType.Constant);
    Chains chains = new Chains();
    for (VDJCGene gene : getUsedGenes()) chains = chains.merge(gene.getChains());
    filters = new EnumMap<>(GeneType.class);
    for (GeneType geneType : GeneType.VJC_REFERENCE) {
        HashMap<String, BitArray> f = new HashMap<>();
        for (final String chain : chains) {
            BatchAlignerWithBaseWithFilter<NucleotideSequence, VDJCGene, AlignmentHit<NucleotideSequence, VDJCGene>> aligner = getAligner(geneType);
            if (aligner != null)
                f.put(chain, aligner.createFilter(new Filter<VDJCGene>() {

                    @Override
                    public boolean accept(VDJCGene object) {
                        return object.getChains().contains(chain);
                    }
                }));
        }
        filters.put(geneType, f);
    }
}
Also used : HashMap(java.util.HashMap) Chains(io.repseq.core.Chains) AlignmentHit(com.milaboratory.core.alignment.batch.AlignmentHit) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence) VDJCGene(io.repseq.core.VDJCGene) GeneType(io.repseq.core.GeneType) BitArray(com.milaboratory.util.BitArray)

Aggregations

AlignmentHit (com.milaboratory.core.alignment.batch.AlignmentHit)5 NucleotideSequence (com.milaboratory.core.sequence.NucleotideSequence)5 Chains (io.repseq.core.Chains)2 GeneType (io.repseq.core.GeneType)2 VDJCGene (io.repseq.core.VDJCGene)2 AlignmentHelper (com.milaboratory.core.alignment.AlignmentHelper)1 SimpleBatchAligner (com.milaboratory.core.alignment.batch.SimpleBatchAligner)1 SimpleBatchAlignerParameters (com.milaboratory.core.alignment.batch.SimpleBatchAlignerParameters)1 AminoAcidSequence (com.milaboratory.core.sequence.AminoAcidSequence)1 NSequenceWithQuality (com.milaboratory.core.sequence.NSequenceWithQuality)1 TranslationParameters (com.milaboratory.core.sequence.TranslationParameters)1 VDJCAlignments (com.milaboratory.mixcr.basictypes.VDJCAlignments)1 VDJCHit (com.milaboratory.mixcr.basictypes.VDJCHit)1 VDJCAlignmentResult (com.milaboratory.mixcr.vdjaligners.VDJCAlignmentResult)1 BitArray (com.milaboratory.util.BitArray)1 ReferencePoint (io.repseq.core.ReferencePoint)1 VDJCGeneData (io.repseq.dto.VDJCGeneData)1 VDJCLibraryData (io.repseq.dto.VDJCLibraryData)1 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 PrintStream (java.io.PrintStream)1