Search in sources :

Example 16 with NucleotideSequence

use of com.milaboratory.core.sequence.NucleotideSequence in project mixcr by milaboratory.

the class PartialAlignmentsAssembler method searchOverlaps.

public void searchOverlaps(VDJCAlignmentsReader reader) {
    final VDJCAlignerParameters alignerParameters = reader.getParameters();
    PartialAlignmentsAssemblerAligner aligner = new PartialAlignmentsAssemblerAligner(alignerParameters);
    targetMerger.setAlignerParameters(alignerParameters);
    for (VDJCGene gene : reader.getUsedGenes()) aligner.addGene(gene);
    for (final VDJCAlignments alignment : CUtils.it(reader)) {
        total.incrementAndGet();
        if (alignment.getFeature(GeneFeature.CDR3) != null) {
            containsCDR3.incrementAndGet();
            if (!overlappedOnly) {
                totalWritten.incrementAndGet();
                writer.write(alignment);
            }
            continue;
        }
        if (alreadyMergedIds.contains(alignment.getAlignmentsIndex()))
            continue;
        final OverlapSearchResult searchResult = searchOverlaps(alignment, alignerParameters.isAllowChimeras());
        // Common procedure to cancel processing of current input alignment if it fails to pass some good
        // overlap filtering criterion
        final Runnable cancelCurrentResult = new Runnable() {

            @Override
            public void run() {
                if (searchResult != null)
                    searchResult.cancel();
                if (writePartial && !overlappedOnly && notInLeftIndexIds.contains(alignment.getAlignmentsIndex())) {
                    totalWritten.incrementAndGet();
                    partialAsIs.incrementAndGet();
                    writer.write(alignment);
                }
            }
        };
        if (searchResult == null) {
            cancelCurrentResult.run();
            continue;
        }
        List<AlignedTarget> mergedTargets = searchResult.result;
        VDJCMultiRead mRead = new VDJCMultiRead(mergedTargets);
        final VDJCAlignments mAlignment = aligner.process(mRead).alignment;
        // Checking number of overlapped non-template (NRegion) letters
        int overlapTargetId = -1;
        Range overlapRange = null;
        for (int i = 0; i < mergedTargets.size(); i++) {
            overlapRange = AlignedTarget.getOverlapRange(mergedTargets.get(i));
            if (overlapRange != null) {
                overlapTargetId = i;
                break;
            }
        }
        if (overlapTargetId == -1) {
            // No alignments for Best V Hit and Best J Hit in central (overlapped) target
            cancelCurrentResult.run();
            continue;
        }
        int targetLength = mergedTargets.get(overlapTargetId).getTarget().size();
        VDJCHit bestVHit = mAlignment.getBestHit(GeneType.Variable), bestJHit = mAlignment.getBestHit(GeneType.Joining);
        if (bestVHit == null || bestJHit == null || bestVHit.getAlignment(overlapTargetId) == null || bestJHit.getAlignment(overlapTargetId) == null) {
            cancelCurrentResult.run();
            continue;
        }
        int ndnRegionBegin = 0;
        int ndnRegionEnd = targetLength;
        Alignment<NucleotideSequence> vAlignment = bestVHit.getAlignment(overlapTargetId);
        if (vAlignment != null)
            ndnRegionBegin = vAlignment.getSequence2Range().getTo();
        Alignment<NucleotideSequence> jAlignment = bestJHit.getAlignment(overlapTargetId);
        if (jAlignment != null)
            ndnRegionEnd = jAlignment.getSequence2Range().getFrom();
        RangeSet nRegion = ndnRegionBegin >= ndnRegionEnd ? RangeSet.EMPTY : RangeSet.create(ndnRegionBegin, ndnRegionEnd);
        Range dRange = mAlignment.getPartitionedTarget(overlapTargetId).getPartitioning().getRange(GeneFeature.DRegionTrimmed);
        if (dRange != null)
            nRegion = nRegion.subtract(dRange);
        RangeSet nRegionInOverlap = nRegion.intersection(overlapRange);
        int actualNRegionLength = nRegion.totalLength();
        int minimalN = Math.min(minimalNOverlap, actualNRegionLength);
        if (nRegionInOverlap.totalLength() < minimalN) {
            droppedSmallOverlapNRegion.incrementAndGet();
            cancelCurrentResult.run();
            continue;
        }
        // DDDDDDDDDJJJJJJJJJJJJJJJ
        if (minimalN == 0 && (!overlapRange.contains(ndnRegionBegin - 1) || !overlapRange.contains(ndnRegionEnd))) {
            droppedNoNRegion.incrementAndGet();
            cancelCurrentResult.run();
            continue;
        }
        overlapped.incrementAndGet();
        totalWritten.incrementAndGet();
        writer.write(mAlignment);
        // Saving alignment that where merge to prevent it's use as left part
        alreadyMergedIds.add(alignment.getAlignmentsIndex());
        alreadyMergedIds.add(searchResult.KMerInfo.alignments.getAlignmentsIndex());
    }
    if (writePartial && !overlappedOnly)
        for (List<KMerInfo> kMerInfos : kToIndexLeft.valueCollection()) for (KMerInfo kMerInfo : kMerInfos) if (alreadyMergedIds.add(kMerInfo.alignments.getAlignmentsIndex())) {
            totalWritten.incrementAndGet();
            partialAsIs.incrementAndGet();
            writer.write(kMerInfo.getAlignments());
        }
    writer.setNumberOfProcessedReads(reader.getNumberOfReads() - overlapped.get());
}
Also used : VDJCAlignerParameters(com.milaboratory.mixcr.vdjaligners.VDJCAlignerParameters) Range(com.milaboratory.core.Range) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence) ArrayList(java.util.ArrayList) List(java.util.List)

Example 17 with NucleotideSequence

use of com.milaboratory.core.sequence.NucleotideSequence in project mixcr by milaboratory.

the class PartialAlignmentsAssemblerAligner method process0.

@Override
@SuppressWarnings("unchecked")
protected VDJCAlignmentResult<VDJCMultiRead> process0(VDJCMultiRead input) {
    final int nReads = input.numberOfReads();
    EnumMap<GeneType, VDJCHit[]> vdjcHits = new EnumMap<>(GeneType.class);
    NSequenceWithQuality[] targets = new NSequenceWithQuality[nReads];
    Chains currentChains = Chains.ALL;
    // Across all gene types
    int lastAlignedTarget = 0;
    int firstJTarget = -1;
    int lastVTarget = -1;
    for (int g = 0; g < GeneType.VJC_REFERENCE.length; g++) {
        GeneType gt = GeneType.VJC_REFERENCE[g];
        AlignmentHit<NucleotideSequence, VDJCGene>[][] alignmentHits = new AlignmentHit[nReads][];
        Arrays.fill(alignmentHits, new AlignmentHit[0]);
        for (int targetId = lastAlignedTarget; targetId < nReads; targetId++) {
            targets[targetId] = input.getRead(targetId).getData();
            final NucleotideSequence sequence = input.getRead(targetId).getData().getSequence();
            AlignmentResult<AlignmentHit<NucleotideSequence, VDJCGene>> als;
            final BatchAlignerWithBaseWithFilter<NucleotideSequence, VDJCGene, AlignmentHit<NucleotideSequence, VDJCGene>> aligner = getAligner(gt);
            if (aligner != null) {
                int pointer = 0;
                if (g != 0) {
                    // Not V gene
                    VDJCHit[] vdjcHits1 = vdjcHits.get(GeneType.VJC_REFERENCE[g - 1]);
                    Alignment<NucleotideSequence> alignment;
                    if (vdjcHits1.length != 0 && (alignment = vdjcHits1[0].getAlignment(targetId)) != null)
                        pointer = alignment.getSequence2Range().getTo();
                }
                als = aligner.align(sequence, pointer, sequence.size(), getFilter(gt, currentChains));
                if (als != null && als.hasHits()) {
                    lastAlignedTarget = targetId;
                    if (// V
                    g == 0)
                        lastVTarget = targetId;
                    if (// J
                    g == 1)
                        firstJTarget = targetId;
                    alignmentHits[targetId] = als.getHits().toArray(new AlignmentHit[als.getHits().size()]);
                }
            }
        }
        Chains chains = Chains.EMPTY;
        for (AlignmentHit<NucleotideSequence, VDJCGene>[] alignmentHit0 : alignmentHits) if (alignmentHit0 != null)
            for (AlignmentHit<NucleotideSequence, VDJCGene> hit : alignmentHit0) chains = chains.merge(hit.getRecordPayload().getChains());
        currentChains = currentChains.intersection(chains);
        vdjcHits.put(gt, combine(parameters.getFeatureToAlign(gt), alignmentHits));
    }
    boolean fineVAlignmentPerformed = false, fineJAlignmentPerformed = false;
    // Additional (fine) alignment step for V gene
    VDJCHit[] vHits = vdjcHits.get(GeneType.Variable);
    final AlignmentScoring<NucleotideSequence> vScoring = parameters.getVAlignerParameters().getParameters().getScoring();
    if (// TODO implement AffineGapAlignmentScoring
    vHits != null && vHits.length > 0 && !(vScoring instanceof AffineGapAlignmentScoring) && vdjcHits.get(GeneType.Joining) != null && vdjcHits.get(GeneType.Joining).length > 0) {
        int minimalVSpace = getAbsoluteMinScore(parameters.getVAlignerParameters().getParameters()) / vScoring.getMaximalMatchScore();
        // Assert
        if (firstJTarget == -1)
            throw new AssertionError();
        for (int targetId = 1; targetId <= firstJTarget; targetId++) {
            int vSpace;
            final NucleotideSequence sequence2 = targets[targetId].getSequence();
            if (vdjcHits.get(GeneType.Joining)[0].getAlignment(targetId) != null && (vSpace = vdjcHits.get(GeneType.Joining)[0].getAlignment(targetId).getSequence2Range().getFrom()) >= minimalVSpace) {
                for (int vHitIndex = 0; vHitIndex < vHits.length; vHitIndex++) {
                    VDJCHit vHit = vHits[vHitIndex];
                    // Perform fine alignment only if target is not already aligned by fast aligner
                    if (vHit.getAlignment(targetId) != null)
                        continue;
                    Alignment<NucleotideSequence> leftAlignment = vHit.getAlignment(targetId - 1);
                    if (leftAlignment == null)
                        continue;
                    final NucleotideSequence sequence1 = leftAlignment.getSequence1();
                    final int beginFR3 = vHit.getGene().getPartitioning().getRelativePosition(parameters.getFeatureToAlign(GeneType.Variable), ReferencePoint.FR3Begin);
                    if (beginFR3 == -1)
                        continue;
                    final Alignment alignment = AlignerCustom.alignLinearSemiLocalLeft0((LinearGapAlignmentScoring<NucleotideSequence>) vScoring, sequence1, sequence2, beginFR3, sequence1.size() - beginFR3, 0, vSpace, false, true, NucleotideSequence.ALPHABET, linearMatrixCache.get());
                    if (alignment.getScore() < getAbsoluteMinScore(parameters.getVAlignerParameters().getParameters()))
                        continue;
                    fineVAlignmentPerformed = true;
                    vHits[vHitIndex] = vHit.setAlignment(targetId, alignment);
                }
            }
        }
    }
    Arrays.sort(vHits);
    vdjcHits.put(GeneType.Variable, cutRelativeScore(vHits, parameters.getVAlignerParameters().getRelativeMinScore(), parameters.getVAlignerParameters().getParameters().getMaxHits()));
    // Additional (fine) alignment step for J gene
    VDJCHit[] jHits = vdjcHits.get(GeneType.Joining);
    final AlignmentScoring<NucleotideSequence> jScoring = parameters.getJAlignerParameters().getParameters().getScoring();
    if (// TODO implement AffineGapAlignmentScoring
    jHits != null && jHits.length > 0 && !(jScoring instanceof AffineGapAlignmentScoring) && vdjcHits.get(GeneType.Variable) != null && vdjcHits.get(GeneType.Variable).length > 0) {
        int minimalJSpace = getAbsoluteMinScore(parameters.getJAlignerParameters().getParameters()) / jScoring.getMaximalMatchScore();
        // Assert
        if (lastVTarget == -1)
            throw new AssertionError();
        for (int targetId = lastVTarget; targetId < nReads - 1; targetId++) {
            int jSpaceBegin;
            final NucleotideSequence sequence2 = targets[targetId].getSequence();
            if (vdjcHits.get(GeneType.Variable)[0].getAlignment(targetId) != null && (sequence2.size() - (jSpaceBegin = vdjcHits.get(GeneType.Variable)[0].getAlignment(targetId).getSequence2Range().getTo())) >= minimalJSpace) {
                for (int jHitIndex = 0; jHitIndex < jHits.length; jHitIndex++) {
                    VDJCHit jHit = jHits[jHitIndex];
                    // Perform fine alignment only if target is not already aligned by fast aligner
                    if (jHit.getAlignment(targetId) != null)
                        continue;
                    Alignment<NucleotideSequence> rightAlignment = jHit.getAlignment(targetId + 1);
                    if (rightAlignment == null)
                        continue;
                    final NucleotideSequence sequence1 = rightAlignment.getSequence1();
                    final Alignment alignment = AlignerCustom.alignLinearSemiLocalRight0((LinearGapAlignmentScoring) jScoring, sequence1, sequence2, 0, sequence1.size(), jSpaceBegin, sequence2.size() - jSpaceBegin, false, true, NucleotideSequence.ALPHABET, linearMatrixCache.get());
                    if (alignment.getScore() < getAbsoluteMinScore(parameters.getJAlignerParameters().getParameters()))
                        continue;
                    fineJAlignmentPerformed = true;
                    jHits[jHitIndex] = jHit.setAlignment(targetId, alignment);
                }
            }
        }
    }
    Arrays.sort(jHits);
    vdjcHits.put(GeneType.Joining, cutRelativeScore(jHits, parameters.getJAlignerParameters().getRelativeMinScore(), parameters.getJAlignerParameters().getParameters().getMaxHits()));
    int dGeneTarget = -1;
    VDJCHit[] vResult = vdjcHits.get(GeneType.Variable);
    VDJCHit[] jResult = vdjcHits.get(GeneType.Joining);
    if (vResult.length != 0 && jResult.length != 0)
        for (int i = 0; i < nReads; i++) if (vResult[0].getAlignment(i) != null && jResult[0].getAlignment(i) != null) {
            dGeneTarget = i;
            break;
        }
    // if (fineVAlignmentPerformed && fineJAlignmentPerformed)
    // System.out.println("sd");
    VDJCHit[] dResult;
    if (dGeneTarget == -1)
        dResult = new VDJCHit[0];
    else {
        final Alignment<NucleotideSequence> vAl = vResult[0].getAlignment(dGeneTarget);
        final Alignment<NucleotideSequence> jAl = jResult[0].getAlignment(dGeneTarget);
        if (vAl == null || jAl == null || singleDAligner == null)
            dResult = new VDJCHit[0];
        else
            dResult = singleDAligner.align(targets[dGeneTarget].getSequence(), getPossibleDLoci(vResult, jResult), vAl.getSequence2Range().getTo(), jAl.getSequence2Range().getFrom(), dGeneTarget, nReads);
    }
    final VDJCAlignments alignment = new VDJCAlignments(vResult, dResult, jResult, cutRelativeScore(vdjcHits.get(GeneType.Constant), parameters.getCAlignerParameters().getRelativeMinScore(), parameters.getMaxHits()), targets, input.getHistory(), input.getOriginalReads());
    return new VDJCAlignmentResult<>(input, alignment);
}
Also used : Chains(io.repseq.core.Chains) NSequenceWithQuality(com.milaboratory.core.sequence.NSequenceWithQuality) EnumMap(java.util.EnumMap) VDJCAlignments(com.milaboratory.mixcr.basictypes.VDJCAlignments) VDJCAlignmentResult(com.milaboratory.mixcr.vdjaligners.VDJCAlignmentResult) ReferencePoint(io.repseq.core.ReferencePoint) AlignmentHit(com.milaboratory.core.alignment.batch.AlignmentHit) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence) VDJCGene(io.repseq.core.VDJCGene) GeneType(io.repseq.core.GeneType) VDJCHit(com.milaboratory.mixcr.basictypes.VDJCHit)

Example 18 with NucleotideSequence

use of com.milaboratory.core.sequence.NucleotideSequence in project mixcr by milaboratory.

the class VDJCAlignerPVFirst method performJAlignment.

/**
 * Preforms J alignment for a single read
 */
@SuppressWarnings("unchecked")
List<AlignmentHit<NucleotideSequence, VDJCGene>> performJAlignment(final Target target, final PairedHit[] vHits, final int index) {
    // Getting best V hit
    AlignmentHit<NucleotideSequence, VDJCGene> vHit = vHits.length == 0 ? null : vHits[0].get(index);
    final NucleotideSequence targetSequence = target.targets[index].getSequence();
    BitArray filterForJ = getFilter(GeneType.Joining, vHits);
    if (vHit == null)
        return jAligner.align(targetSequence, 0, targetSequence.size(), filterForJ).getHits();
    // TODO remove
    if (vHit.getAlignment().getSequence1Range().getTo() <= vHit.getRecordPayload().getPartitioning().getRelativePosition(parameters.getFeatureToAlign(GeneType.Variable), ReferencePoint.FR3Begin) || vHit.getAlignment().getSequence2Range().getTo() == targetSequence.size())
        return Collections.EMPTY_LIST;
    int jFrom = vHit.getAlignment().getSequence2Range().getTo() - parameters.getVJOverlapWindow();
    jFrom = jFrom < 0 ? 0 : jFrom;
    return jAligner.align(targetSequence, jFrom, targetSequence.size(), filterForJ).getHits();
}
Also used : NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence) BitArray(com.milaboratory.util.BitArray)

Example 19 with NucleotideSequence

use of com.milaboratory.core.sequence.NucleotideSequence in project mixcr by milaboratory.

the class VDJCAlignerS method align.

// TODO all this ifs can be simplified
private KVJResultsForSingle align(Target target) {
    NucleotideSequence sequence = target.targets[0].getSequence();
    ensureInitialized();
    AlignmentResult<AlignmentHit<NucleotideSequence, VDJCGene>> vResult, jResult;
    switch(parameters.getVJAlignmentOrder()) {
        case VThenJ:
            vResult = vAligner.align(sequence);
            // If there is no results for V return
            if (!vResult.hasHits())
                return new KVJResultsForSingle(target, // V result is empty
                vResult, // If -OallowPartialAlignments=true try align J gene
                parameters.getAllowPartialAlignments() ? jAligner.align(sequence) : null);
            // Returning result
            int jFrom = vResult.getBestHit().getAlignment().getSequence2Range().getTo() - parameters.getVJOverlapWindow();
            jFrom = jFrom < 0 ? 0 : jFrom;
            return new KVJResultsForSingle(target, vResult, jAligner.align(sequence, jFrom, sequence.size(), getFilter(GeneType.Joining, vResult.getHits())));
        case JThenV:
            jResult = jAligner.align(sequence);
            // If there is no results for J return
            if (!jResult.hasHits())
                return new KVJResultsForSingle(target, // If -OallowPartialAlignments=true try align V gene
                parameters.getAllowPartialAlignments() ? vAligner.align(sequence) : null, // J result is empty
                jResult);
            // Returning result
            int vTo = jResult.getBestHit().getAlignment().getSequence2Range().getFrom() + parameters.getVJOverlapWindow();
            vTo = vTo > sequence.size() ? sequence.size() : vTo;
            return new KVJResultsForSingle(target, vAligner.align(sequence, 0, vTo, getFilter(GeneType.Variable, jResult.getHits())), jResult);
    }
    throw new IllegalArgumentException("vjAlignmentOrder not set.");
}
Also used : AlignmentHit(com.milaboratory.core.alignment.batch.AlignmentHit) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence)

Example 20 with NucleotideSequence

use of com.milaboratory.core.sequence.NucleotideSequence in project mixcr by milaboratory.

the class FieldExtractors method getFields.

public static synchronized Field[] getFields() {
    if (descriptors == null) {
        List<Field> descriptorsList = new ArrayList<>();
        // Number of targets
        descriptorsList.add(new PL_O("-targets", "Export number of targets", "Number of targets", "numberOfTargets") {

            @Override
            protected String extract(VDJCObject object) {
                return Integer.toString(object.numberOfTargets());
            }
        });
        // Best hits
        for (final GeneType type : GeneType.values()) {
            char l = type.getLetter();
            descriptorsList.add(new PL_O("-" + Character.toLowerCase(l) + "Hit", "Export best " + l + " hit", "Best " + l + " hit", "best" + l + "Hit") {

                @Override
                protected String extract(VDJCObject object) {
                    VDJCHit bestHit = object.getBestHit(type);
                    if (bestHit == null)
                        return NULL;
                    return bestHit.getGene().getName();
                }
            });
        }
        // Best gene
        for (final GeneType type : GeneType.values()) {
            char l = type.getLetter();
            descriptorsList.add(new PL_O("-" + Character.toLowerCase(l) + "Gene", "Export best " + l + " hit gene name (e.g. TRBV12-3 for TRBV12-3*00)", "Best " + l + " gene", "best" + l + "Gene") {

                @Override
                protected String extract(VDJCObject object) {
                    VDJCHit bestHit = object.getBestHit(type);
                    if (bestHit == null)
                        return NULL;
                    return bestHit.getGene().getGeneName();
                }
            });
        }
        // Best family
        for (final GeneType type : GeneType.values()) {
            char l = type.getLetter();
            descriptorsList.add(new PL_O("-" + Character.toLowerCase(l) + "Family", "Export best " + l + " hit family name (e.g. TRBV12 for TRBV12-3*00)", "Best " + l + " family", "best" + l + "Family") {

                @Override
                protected String extract(VDJCObject object) {
                    VDJCHit bestHit = object.getBestHit(type);
                    if (bestHit == null)
                        return NULL;
                    return bestHit.getGene().getFamilyName();
                }
            });
        }
        // Best hit score
        for (final GeneType type : GeneType.values()) {
            char l = type.getLetter();
            descriptorsList.add(new PL_O("-" + Character.toLowerCase(l) + "HitScore", "Export score for best " + l + " hit", "Best " + l + " hit score", "best" + l + "HitScore") {

                @Override
                protected String extract(VDJCObject object) {
                    VDJCHit bestHit = object.getBestHit(type);
                    if (bestHit == null)
                        return NULL;
                    return String.valueOf(bestHit.getScore());
                }
            });
        }
        // All hits
        for (final GeneType type : GeneType.values()) {
            char l = type.getLetter();
            descriptorsList.add(new PL_O("-" + Character.toLowerCase(l) + "HitsWithScore", "Export all " + l + " hits with score", "All " + l + " hits", "all" + l + "HitsWithScore") {

                @Override
                protected String extract(VDJCObject object) {
                    VDJCHit[] hits = object.getHits(type);
                    if (hits.length == 0)
                        return "";
                    StringBuilder sb = new StringBuilder();
                    for (int i = 0; ; i++) {
                        sb.append(hits[i].getGene().getName()).append("(").append(SCORE_FORMAT.format(hits[i].getScore())).append(")");
                        if (i == hits.length - 1)
                            break;
                        sb.append(",");
                    }
                    return sb.toString();
                }
            });
        }
        // All hits without score
        for (final GeneType type : GeneType.values()) {
            char l = type.getLetter();
            descriptorsList.add(new PL_O("-" + Character.toLowerCase(l) + "Hits", "Export all " + l + " hits", "All " + l + " Hits", "all" + l + "Hits") {

                @Override
                protected String extract(VDJCObject object) {
                    VDJCHit[] hits = object.getHits(type);
                    if (hits.length == 0)
                        return "";
                    StringBuilder sb = new StringBuilder();
                    for (int i = 0; ; i++) {
                        sb.append(hits[i].getGene().getName());
                        if (i == hits.length - 1)
                            break;
                        sb.append(",");
                    }
                    return sb.toString();
                }
            });
        }
        // All gene names
        for (final GeneType type : GeneType.values()) {
            char l = type.getLetter();
            descriptorsList.add(new StringExtractor("-" + Character.toLowerCase(l) + "Genes", "Export all " + l + " gene names (e.g. TRBV12-3 for TRBV12-3*00)", "All " + l + " genes", "all" + l + "Genes", type) {

                @Override
                String extractStringForHit(VDJCHit hit) {
                    return hit.getGene().getGeneName();
                }
            });
        }
        // All families
        for (final GeneType type : GeneType.values()) {
            char l = type.getLetter();
            descriptorsList.add(new StringExtractor("-" + Character.toLowerCase(l) + "Families", "Export all " + l + " gene family anmes (e.g. TRBV12 for TRBV12-3*00)", "All " + l + " families", "all" + l + "Families", type) {

                @Override
                String extractStringForHit(VDJCHit hit) {
                    return hit.getGene().getFamilyName();
                }
            });
        }
        // Best alignment
        for (final GeneType type : GeneType.values()) {
            char l = type.getLetter();
            descriptorsList.add(new PL_O("-" + Character.toLowerCase(l) + "Alignment", "Export best " + l + " alignment", "Best " + l + " alignment", "best" + l + "Alignment") {

                @Override
                protected String extract(VDJCObject object) {
                    VDJCHit bestHit = object.getBestHit(type);
                    if (bestHit == null)
                        return NULL;
                    StringBuilder sb = new StringBuilder();
                    for (int i = 0; ; i++) {
                        Alignment<NucleotideSequence> alignment = bestHit.getAlignment(i);
                        if (alignment == null)
                            sb.append(NULL);
                        else
                            sb.append(alignment.toCompactString());
                        if (i == object.numberOfTargets() - 1)
                            break;
                        sb.append(",");
                    }
                    return sb.toString();
                }
            });
        }
        // All alignments
        for (final GeneType type : GeneType.values()) {
            char l = type.getLetter();
            descriptorsList.add(new PL_O("-" + Character.toLowerCase(l) + "Alignments", "Export all " + l + " alignments", "All " + l + " alignments", "all" + l + "Alignments") {

                @Override
                protected String extract(VDJCObject object) {
                    VDJCHit[] hits = object.getHits(type);
                    if (hits.length == 0)
                        return "";
                    StringBuilder sb = new StringBuilder();
                    for (int j = 0; ; ++j) {
                        for (int i = 0; ; i++) {
                            Alignment<NucleotideSequence> alignment = hits[j].getAlignment(i);
                            if (alignment == null)
                                sb.append(NULL);
                            else
                                sb.append(alignment.toCompactString());
                            if (i == object.numberOfTargets() - 1)
                                break;
                            sb.append(',');
                        }
                        if (j == hits.length - 1)
                            break;
                        sb.append(';');
                    }
                    return sb.toString();
                }
            });
        }
        descriptorsList.add(new FeatureExtractors.NSeqExtractor("-nFeature", "Export nucleotide sequence of specified gene feature", "N. Seq. ", "nSeq") {

            @Override
            public String convert(NSequenceWithQuality seq) {
                return seq.getSequence().toString();
            }
        });
        descriptorsList.add(new FeatureExtractors.NSeqExtractor("-qFeature", "Export quality string of specified gene feature", "Qual. ", "qual") {

            @Override
            public String convert(NSequenceWithQuality seq) {
                return seq.getQuality().toString();
            }
        });
        descriptorsList.add(new FeatureExtractors.WithHeader("-aaFeature", "Export amino acid sequence of specified gene feature", 1, new String[] { "AA. Seq. " }, new String[] { "aaSeq" }) {

            @Override
            protected String extractValue(VDJCObject object, GeneFeature[] parameters) {
                GeneFeature geneFeature = parameters[parameters.length - 1];
                NSequenceWithQuality feature = object.getFeature(geneFeature);
                if (feature == null)
                    return NULL;
                int targetId = object.getTargetContainingFeature(geneFeature);
                TranslationParameters tr = targetId == -1 ? TranslationParameters.FromLeftWithIncompleteCodon : object.getPartitionedTarget(targetId).getPartitioning().getTranslationParameters(geneFeature);
                if (tr == null)
                    return NULL;
                return AminoAcidSequence.translate(feature.getSequence(), tr).toString();
            }
        });
        // descriptorsList.add(new FeatureExtractorDescriptor("-aaFeatureFromLeft", "Export amino acid sequence of " +
        // "specified gene feature starting from the leftmost nucleotide (differs from -aaFeature only for " +
        // "sequences which length are not multiple of 3)", "AA. Seq.", "aaSeq") {
        // @Override
        // public String convert(NSequenceWithQuality seq) {
        // return AminoAcidSequence.translate(seq.getSequence(), FromLeftWithoutIncompleteCodon).toString();
        // }
        // });
        // 
        // descriptorsList.add(new FeatureExtractorDescriptor("-aaFeatureFromRight", "Export amino acid sequence of " +
        // "specified gene feature starting from the rightmost nucleotide (differs from -aaFeature only for " +
        // "sequences which length are not multiple of 3)", "AA. Seq.", "aaSeq") {
        // @Override
        // public String convert(NSequenceWithQuality seq) {
        // return AminoAcidSequence.translate(seq.getSequence(), FromRightWithoutIncompleteCodon).toString();
        // }
        // });
        descriptorsList.add(new FeatureExtractors.NSeqExtractor("-minFeatureQuality", "Export minimal quality of specified gene feature", "Min. qual. ", "minQual") {

            @Override
            public String convert(NSequenceWithQuality seq) {
                return "" + seq.getQuality().minValue();
            }
        });
        descriptorsList.add(new FeatureExtractors.NSeqExtractor("-avrgFeatureQuality", "Export average quality of specified gene feature", "Mean. qual. ", "meanQual") {

            @Override
            public String convert(NSequenceWithQuality seq) {
                return "" + seq.getQuality().meanValue();
            }
        });
        descriptorsList.add(new FeatureExtractors.NSeqExtractor("-lengthOf", "Exports length of specified gene feature.", "Length of ", "lengthOf") {

            @Override
            public String convert(NSequenceWithQuality seq) {
                return "" + seq.size();
            }
        });
        descriptorsList.add(new FeatureExtractors.MutationsExtractor("-nMutations", "Extract nucleotide mutations for specific gene feature; relative to germline sequence.", 1, new String[] { "N. Mutations in " }, new String[] { "nMutations" }) {

            @Override
            String convert(Mutations<NucleotideSequence> mutations, NucleotideSequence seq1, NucleotideSequence seq2, TranslationParameters tr) {
                return mutations.encode(",");
            }
        });
        descriptorsList.add(new FeatureExtractors.MutationsExtractor("-nMutationsRelative", "Extract nucleotide mutations for specific gene feature relative to another feature.", 2, new String[] { "N. Mutations in ", " relative to " }, new String[] { "nMutationsIn", "Relative" }) {

            @Override
            String convert(Mutations<NucleotideSequence> mutations, NucleotideSequence seq1, NucleotideSequence seq2, TranslationParameters tr) {
                return mutations.encode(",");
            }
        });
        final class AAMutations extends FeatureExtractors.MutationsExtractor {

            AAMutations(String command, String description, int nArgs, String[] hPrefix, String[] sPrefix) {
                super(command, description, nArgs, hPrefix, sPrefix);
            }

            @Override
            String convert(Mutations<NucleotideSequence> mutations, NucleotideSequence seq1, NucleotideSequence seq2, TranslationParameters tr) {
                if (tr == null)
                    return "-";
                Mutations<AminoAcidSequence> aaMuts = MutationsUtil.nt2aa(seq1, mutations, tr);
                if (aaMuts == null)
                    return "-";
                return aaMuts.encode(",");
            }
        }
        descriptorsList.add(new AAMutations("-aaMutations", "Extract amino acid mutations for specific gene feature", 1, new String[] { "AA. Mutations in " }, new String[] { "aaMutations" }));
        descriptorsList.add(new AAMutations("-aaMutationsRelative", "Extract amino acid mutations for specific gene feature relative to another feature.", 2, new String[] { "AA. Mutations in ", " relative to " }, new String[] { "aaMutationsIn", "Relative" }));
        final class MutationsDetailed extends FeatureExtractors.MutationsExtractor {

            MutationsDetailed(String command, String description, int nArgs, String[] hPrefix, String[] sPrefix) {
                super(command, description, nArgs, hPrefix, sPrefix);
            }

            @Override
            String convert(Mutations<NucleotideSequence> mutations, NucleotideSequence seq1, NucleotideSequence seq2, TranslationParameters tr) {
                if (tr == null)
                    return "-";
                MutationsUtil.MutationNt2AADescriptor[] descriptors = MutationsUtil.nt2aaDetailed(seq1, mutations, tr, 10);
                if (descriptors == null)
                    return "-";
                StringBuilder sb = new StringBuilder();
                for (int i = 0; i < descriptors.length; i++) {
                    sb.append(descriptors[i]);
                    if (i == descriptors.length - 1)
                        break;
                    sb.append(",");
                }
                return sb.toString();
            }
        }
        String detailedMutationsFormat = "Format <nt_mutation>:<aa_mutation_individual>:<aa_mutation_cumulative>, where <aa_mutation_individual> is an expected amino acid " + "mutation given no other mutations have occurred, and <aa_mutation_cumulative> amino acid mutation is the observed amino acid " + "mutation combining effect from all other. WARNING: format may change in following versions.";
        descriptorsList.add(new MutationsDetailed("-mutationsDetailed", "Detailed list of nucleotide and corresponding amino acid mutations. " + detailedMutationsFormat, 1, new String[] { "Detailed mutations in " }, new String[] { "mutationsDetailedIn" }));
        descriptorsList.add(new MutationsDetailed("-mutationsDetailedRelative", "Detailed list of nucleotide and corresponding amino acid mutations written, positions relative to specified gene feature. " + detailedMutationsFormat, 2, new String[] { "Detailed mutations in ", " relative to " }, new String[] { "mutationsDetailedIn", "Relative" }));
        descriptorsList.add(new ExtractReferencePointPosition());
        descriptorsList.add(new ExtractDefaultReferencePointsPositions());
        descriptorsList.add(new PL_A("-readId", "Export id of read corresponding to alignment", "Read id", "readId") {

            @Override
            protected String extract(VDJCAlignments object) {
                return "" + object.getMinReadId();
            }

            @Override
            public FieldExtractor<VDJCAlignments> create(OutputMode outputMode, String[] args) {
                System.out.println("WARNING: -readId is deprecated. Use -readIds");
                return super.create(outputMode, args);
            }
        });
        descriptorsList.add(new PL_A("-readIds", "Export id of read corresponding to alignment", "Read id", "readId") {

            @Override
            protected String extract(VDJCAlignments object) {
                long[] readIds = object.getReadIds();
                StringBuilder sb = new StringBuilder();
                for (int i = 0; ; i++) {
                    sb.append(readIds[i]);
                    if (i == readIds.length - 1)
                        return sb.toString();
                    sb.append(",");
                }
            }
        });
        descriptorsList.add(new ExtractSequence(VDJCAlignments.class, "-sequence", "Export aligned sequence (initial read), or 2 sequences in case of paired-end reads", "Read(s) sequence", "readSequence"));
        descriptorsList.add(new ExtractSequenceQuality(VDJCAlignments.class, "-quality", "Export initial read quality, or 2 qualities in case of paired-end reads", "Read(s) sequence qualities", "readQuality"));
        descriptorsList.add(new PL_C("-cloneId", "Unique clone identifier", "Clone ID", "cloneId") {

            @Override
            protected String extract(Clone object) {
                return "" + object.getId();
            }
        });
        descriptorsList.add(new PL_C("-count", "Export clone count", "Clone count", "cloneCount") {

            @Override
            protected String extract(Clone object) {
                return "" + object.getCount();
            }
        });
        descriptorsList.add(new PL_C("-fraction", "Export clone fraction", "Clone fraction", "cloneFraction") {

            @Override
            protected String extract(Clone object) {
                return "" + object.getFraction();
            }
        });
        descriptorsList.add(new ExtractSequence(Clone.class, "-sequence", "Export aligned sequence (initial read), or 2 sequences in case of paired-end reads", "Clonal sequence(s)", "clonalSequence"));
        descriptorsList.add(new ExtractSequenceQuality(Clone.class, "-quality", "Export initial read quality, or 2 qualities in case of paired-end reads", "Clonal sequence quality(s)", "clonalSequenceQuality"));
        descriptorsList.add(new PL_A("-descrR1", "Export description line from initial .fasta or .fastq file " + "of the first read (only available if --save-description was used in align command)", "Description R1", "descrR1") {

            @Override
            protected String extract(VDJCAlignments object) {
                List<SequenceRead> reads = object.getOriginalReads();
                if (reads == null)
                    throw new IllegalArgumentException("Error for option \'-descrR1\':\n" + "No description available for read: either re-run align action with -OsaveOriginalReads=true option " + "or don't use \'-descrR1\' in exportAlignments");
                return reads.get(0).getRead(0).getDescription();
            }

            @Override
            public FieldExtractor<VDJCAlignments> create(OutputMode outputMode, String[] args) {
                System.out.println("WARNING: -descrR1 is deprecated. Use -descrsR1");
                return super.create(outputMode, args);
            }
        });
        descriptorsList.add(new PL_A("-descrR2", "Export description line from initial .fasta or .fastq file " + "of the second read (only available if --save-description was used in align command)", "Description R2", "descrR2") {

            @Override
            protected String extract(VDJCAlignments object) {
                List<SequenceRead> reads = object.getOriginalReads();
                if (reads == null)
                    throw new IllegalArgumentException("Error for option \'-descrR1\':\n" + "No description available for read: either re-run align action with -OsaveOriginalReads=true option " + "or don't use \'-descrR1\' in exportAlignments");
                SequenceRead read = reads.get(0);
                if (read.numberOfReads() < 2)
                    throw new IllegalArgumentException("Error for option \'-descrR2\':\n" + "No description available for second read: your input data was single-end");
                return read.getRead(1).getDescription();
            }

            @Override
            public FieldExtractor<VDJCAlignments> create(OutputMode outputMode, String[] args) {
                System.out.println("WARNING: -descrR2 is deprecated. Use -descrsR2");
                return super.create(outputMode, args);
            }
        });
        descriptorsList.add(new PL_A("-descrsR1", "Export description lines from initial .fasta or .fastq file " + "of the first reads (only available if -OsaveOriginalReads=true was used in align command)", "Descriptions R1", "descrsR1") {

            @Override
            protected String extract(VDJCAlignments object) {
                List<SequenceRead> reads = object.getOriginalReads();
                if (reads == null)
                    throw new IllegalArgumentException("Error for option \'-descrR1\':\n" + "No description available for read: either re-run align action with -OsaveOriginalReads option " + "or don't use \'-descrR1\' in exportAlignments");
                StringBuilder sb = new StringBuilder();
                for (int i = 0; ; i++) {
                    sb.append(reads.get(i).getRead(0).getDescription());
                    if (i == reads.size() - 1)
                        return sb.toString();
                    sb.append(",");
                }
            }
        });
        descriptorsList.add(new PL_A("-descrsR2", "Export description lines from initial .fasta or .fastq file " + "of the second reads (only available if -OsaveOriginalReads=true was used in align command)", "Descriptions R2", "descrsR2") {

            @Override
            protected String extract(VDJCAlignments object) {
                List<SequenceRead> reads = object.getOriginalReads();
                if (reads == null)
                    throw new IllegalArgumentException("Error for option \'-descrR1\':\n" + "No description available for read: either re-run align action with -OsaveOriginalReads option " + "or don't use \'-descrR1\' in exportAlignments");
                StringBuilder sb = new StringBuilder();
                for (int i = 0; ; i++) {
                    SequenceRead read = reads.get(i);
                    if (read.numberOfReads() < 2)
                        throw new IllegalArgumentException("Error for option \'-descrsR2\':\n" + "No description available for second read: your input data was single-end");
                    sb.append(read.getRead(1).getDescription());
                    if (i == reads.size() - 1)
                        return sb.toString();
                    sb.append(",");
                }
            }
        });
        descriptorsList.add(new PL_A("-readHistory", "Export read history", "Read history", "readHistory") {

            @Override
            protected String extract(VDJCAlignments object) {
                try {
                    return GlobalObjectMappers.toOneLine(object.getHistory());
                } catch (JsonProcessingException ex) {
                    throw new RuntimeException(ex);
                }
            }
        });
        for (final GeneType type : GeneType.values()) {
            String c = Character.toLowerCase(type.getLetter()) + "IdentityPercents";
            descriptorsList.add(new PL_O("-" + c, type.getLetter() + " alignment identity percents", type.getLetter() + " alignment identity percents", c) {

                @Override
                protected String extract(VDJCObject object) {
                    VDJCHit[] hits = object.getHits(type);
                    if (hits == null)
                        return NULL;
                    StringBuilder sb = new StringBuilder();
                    sb.append("");
                    for (int i = 0; ; i++) {
                        sb.append(hits[i].getIdentity());
                        if (i == hits.length - 1)
                            return sb.toString();
                        sb.append(",");
                    }
                }
            });
        }
        for (final GeneType type : GeneType.values()) {
            String c = Character.toLowerCase(type.getLetter()) + "BestIdentityPercent";
            descriptorsList.add(new PL_O("-" + c, type.getLetter() + "best alignment identity percent", type.getLetter() + "best alignment identity percent", c) {

                @Override
                protected String extract(VDJCObject object) {
                    VDJCHit hit = object.getBestHit(type);
                    if (hit == null)
                        return NULL;
                    return Float.toString(hit.getIdentity());
                }
            });
        }
        descriptorsList.add(new PL_O("-chains", "Chains", "Chains", "Chains") {

            @Override
            protected String extract(VDJCObject object) {
                return object.commonChains().toString();
            }
        });
        descriptorsList.add(new PL_O("-topChains", "Top chains", "Top chains", "topChains") {

            @Override
            protected String extract(VDJCObject object) {
                return object.commonTopChains().toString();
            }
        });
        descriptors = descriptorsList.toArray(new Field[descriptorsList.size()]);
    }
    return descriptors;
}
Also used : GeneFeature(io.repseq.core.GeneFeature) ArrayList(java.util.ArrayList) VDJCObject(com.milaboratory.mixcr.basictypes.VDJCObject) Alignment(com.milaboratory.core.alignment.Alignment) AminoAcidSequence(com.milaboratory.core.sequence.AminoAcidSequence) NSequenceWithQuality(com.milaboratory.core.sequence.NSequenceWithQuality) ArrayList(java.util.ArrayList) List(java.util.List) VDJCAlignments(com.milaboratory.mixcr.basictypes.VDJCAlignments) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) Clone(com.milaboratory.mixcr.basictypes.Clone) Mutations(com.milaboratory.core.mutations.Mutations) ReferencePoint(io.repseq.core.ReferencePoint) TranslationParameters(com.milaboratory.core.sequence.TranslationParameters) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence) SequenceRead(com.milaboratory.core.io.sequence.SequenceRead) GeneType(io.repseq.core.GeneType) VDJCHit(com.milaboratory.mixcr.basictypes.VDJCHit)

Aggregations

NucleotideSequence (com.milaboratory.core.sequence.NucleotideSequence)81 Test (org.junit.Test)32 Range (com.milaboratory.core.Range)19 VDJCGene (io.repseq.core.VDJCGene)15 GeneType (io.repseq.core.GeneType)14 NSequenceWithQuality (com.milaboratory.core.sequence.NSequenceWithQuality)13 GeneFeature (io.repseq.core.GeneFeature)9 VDJCAlignments (com.milaboratory.mixcr.basictypes.VDJCAlignments)8 VDJCHit (com.milaboratory.mixcr.basictypes.VDJCHit)8 ReferencePoint (io.repseq.core.ReferencePoint)7 VDJCLibrary (io.repseq.core.VDJCLibrary)7 ArrayList (java.util.ArrayList)7 VDJCAlignerParameters (com.milaboratory.mixcr.vdjaligners.VDJCAlignerParameters)6 AlignmentHit (com.milaboratory.core.alignment.batch.AlignmentHit)5 PairedRead (com.milaboratory.core.io.sequence.PairedRead)5 AminoAcidSequence (com.milaboratory.core.sequence.AminoAcidSequence)5 Path (java.nio.file.Path)5 Alignment (com.milaboratory.core.alignment.Alignment)4 Pattern (java.util.regex.Pattern)4 AlignmentHelper (com.milaboratory.core.alignment.AlignmentHelper)3