Search in sources :

Example 1 with Chains

use of io.repseq.core.Chains in project mixcr by milaboratory.

the class PartialAlignmentsAssemblerAligner method process0.

@Override
@SuppressWarnings("unchecked")
protected VDJCAlignmentResult<VDJCMultiRead> process0(VDJCMultiRead input) {
    final int nReads = input.numberOfReads();
    EnumMap<GeneType, VDJCHit[]> vdjcHits = new EnumMap<>(GeneType.class);
    NSequenceWithQuality[] targets = new NSequenceWithQuality[nReads];
    Chains currentChains = Chains.ALL;
    // Across all gene types
    int lastAlignedTarget = 0;
    int firstJTarget = -1;
    int lastVTarget = -1;
    for (int g = 0; g < GeneType.VJC_REFERENCE.length; g++) {
        GeneType gt = GeneType.VJC_REFERENCE[g];
        AlignmentHit<NucleotideSequence, VDJCGene>[][] alignmentHits = new AlignmentHit[nReads][];
        Arrays.fill(alignmentHits, new AlignmentHit[0]);
        for (int targetId = lastAlignedTarget; targetId < nReads; targetId++) {
            targets[targetId] = input.getRead(targetId).getData();
            final NucleotideSequence sequence = input.getRead(targetId).getData().getSequence();
            AlignmentResult<AlignmentHit<NucleotideSequence, VDJCGene>> als;
            final BatchAlignerWithBaseWithFilter<NucleotideSequence, VDJCGene, AlignmentHit<NucleotideSequence, VDJCGene>> aligner = getAligner(gt);
            if (aligner != null) {
                int pointer = 0;
                if (g != 0) {
                    // Not V gene
                    VDJCHit[] vdjcHits1 = vdjcHits.get(GeneType.VJC_REFERENCE[g - 1]);
                    Alignment<NucleotideSequence> alignment;
                    if (vdjcHits1.length != 0 && (alignment = vdjcHits1[0].getAlignment(targetId)) != null)
                        pointer = alignment.getSequence2Range().getTo();
                }
                als = aligner.align(sequence, pointer, sequence.size(), getFilter(gt, currentChains));
                if (als != null && als.hasHits()) {
                    lastAlignedTarget = targetId;
                    if (// V
                    g == 0)
                        lastVTarget = targetId;
                    if (// J
                    g == 1)
                        firstJTarget = targetId;
                    alignmentHits[targetId] = als.getHits().toArray(new AlignmentHit[als.getHits().size()]);
                }
            }
        }
        Chains chains = Chains.EMPTY;
        for (AlignmentHit<NucleotideSequence, VDJCGene>[] alignmentHit0 : alignmentHits) if (alignmentHit0 != null)
            for (AlignmentHit<NucleotideSequence, VDJCGene> hit : alignmentHit0) chains = chains.merge(hit.getRecordPayload().getChains());
        currentChains = currentChains.intersection(chains);
        vdjcHits.put(gt, combine(parameters.getFeatureToAlign(gt), alignmentHits));
    }
    boolean fineVAlignmentPerformed = false, fineJAlignmentPerformed = false;
    // Additional (fine) alignment step for V gene
    VDJCHit[] vHits = vdjcHits.get(GeneType.Variable);
    final AlignmentScoring<NucleotideSequence> vScoring = parameters.getVAlignerParameters().getParameters().getScoring();
    if (// TODO implement AffineGapAlignmentScoring
    vHits != null && vHits.length > 0 && !(vScoring instanceof AffineGapAlignmentScoring) && vdjcHits.get(GeneType.Joining) != null && vdjcHits.get(GeneType.Joining).length > 0) {
        int minimalVSpace = getAbsoluteMinScore(parameters.getVAlignerParameters().getParameters()) / vScoring.getMaximalMatchScore();
        // Assert
        if (firstJTarget == -1)
            throw new AssertionError();
        for (int targetId = 1; targetId <= firstJTarget; targetId++) {
            int vSpace;
            final NucleotideSequence sequence2 = targets[targetId].getSequence();
            if (vdjcHits.get(GeneType.Joining)[0].getAlignment(targetId) != null && (vSpace = vdjcHits.get(GeneType.Joining)[0].getAlignment(targetId).getSequence2Range().getFrom()) >= minimalVSpace) {
                for (int vHitIndex = 0; vHitIndex < vHits.length; vHitIndex++) {
                    VDJCHit vHit = vHits[vHitIndex];
                    // Perform fine alignment only if target is not already aligned by fast aligner
                    if (vHit.getAlignment(targetId) != null)
                        continue;
                    Alignment<NucleotideSequence> leftAlignment = vHit.getAlignment(targetId - 1);
                    if (leftAlignment == null)
                        continue;
                    final NucleotideSequence sequence1 = leftAlignment.getSequence1();
                    final int beginFR3 = vHit.getGene().getPartitioning().getRelativePosition(parameters.getFeatureToAlign(GeneType.Variable), ReferencePoint.FR3Begin);
                    if (beginFR3 == -1)
                        continue;
                    final Alignment alignment = AlignerCustom.alignLinearSemiLocalLeft0((LinearGapAlignmentScoring<NucleotideSequence>) vScoring, sequence1, sequence2, beginFR3, sequence1.size() - beginFR3, 0, vSpace, false, true, NucleotideSequence.ALPHABET, linearMatrixCache.get());
                    if (alignment.getScore() < getAbsoluteMinScore(parameters.getVAlignerParameters().getParameters()))
                        continue;
                    fineVAlignmentPerformed = true;
                    vHits[vHitIndex] = vHit.setAlignment(targetId, alignment);
                }
            }
        }
    }
    Arrays.sort(vHits);
    vdjcHits.put(GeneType.Variable, cutRelativeScore(vHits, parameters.getVAlignerParameters().getRelativeMinScore(), parameters.getVAlignerParameters().getParameters().getMaxHits()));
    // Additional (fine) alignment step for J gene
    VDJCHit[] jHits = vdjcHits.get(GeneType.Joining);
    final AlignmentScoring<NucleotideSequence> jScoring = parameters.getJAlignerParameters().getParameters().getScoring();
    if (// TODO implement AffineGapAlignmentScoring
    jHits != null && jHits.length > 0 && !(jScoring instanceof AffineGapAlignmentScoring) && vdjcHits.get(GeneType.Variable) != null && vdjcHits.get(GeneType.Variable).length > 0) {
        int minimalJSpace = getAbsoluteMinScore(parameters.getJAlignerParameters().getParameters()) / jScoring.getMaximalMatchScore();
        // Assert
        if (lastVTarget == -1)
            throw new AssertionError();
        for (int targetId = lastVTarget; targetId < nReads - 1; targetId++) {
            int jSpaceBegin;
            final NucleotideSequence sequence2 = targets[targetId].getSequence();
            if (vdjcHits.get(GeneType.Variable)[0].getAlignment(targetId) != null && (sequence2.size() - (jSpaceBegin = vdjcHits.get(GeneType.Variable)[0].getAlignment(targetId).getSequence2Range().getTo())) >= minimalJSpace) {
                for (int jHitIndex = 0; jHitIndex < jHits.length; jHitIndex++) {
                    VDJCHit jHit = jHits[jHitIndex];
                    // Perform fine alignment only if target is not already aligned by fast aligner
                    if (jHit.getAlignment(targetId) != null)
                        continue;
                    Alignment<NucleotideSequence> rightAlignment = jHit.getAlignment(targetId + 1);
                    if (rightAlignment == null)
                        continue;
                    final NucleotideSequence sequence1 = rightAlignment.getSequence1();
                    final Alignment alignment = AlignerCustom.alignLinearSemiLocalRight0((LinearGapAlignmentScoring) jScoring, sequence1, sequence2, 0, sequence1.size(), jSpaceBegin, sequence2.size() - jSpaceBegin, false, true, NucleotideSequence.ALPHABET, linearMatrixCache.get());
                    if (alignment.getScore() < getAbsoluteMinScore(parameters.getJAlignerParameters().getParameters()))
                        continue;
                    fineJAlignmentPerformed = true;
                    jHits[jHitIndex] = jHit.setAlignment(targetId, alignment);
                }
            }
        }
    }
    Arrays.sort(jHits);
    vdjcHits.put(GeneType.Joining, cutRelativeScore(jHits, parameters.getJAlignerParameters().getRelativeMinScore(), parameters.getJAlignerParameters().getParameters().getMaxHits()));
    int dGeneTarget = -1;
    VDJCHit[] vResult = vdjcHits.get(GeneType.Variable);
    VDJCHit[] jResult = vdjcHits.get(GeneType.Joining);
    if (vResult.length != 0 && jResult.length != 0)
        for (int i = 0; i < nReads; i++) if (vResult[0].getAlignment(i) != null && jResult[0].getAlignment(i) != null) {
            dGeneTarget = i;
            break;
        }
    // if (fineVAlignmentPerformed && fineJAlignmentPerformed)
    // System.out.println("sd");
    VDJCHit[] dResult;
    if (dGeneTarget == -1)
        dResult = new VDJCHit[0];
    else {
        final Alignment<NucleotideSequence> vAl = vResult[0].getAlignment(dGeneTarget);
        final Alignment<NucleotideSequence> jAl = jResult[0].getAlignment(dGeneTarget);
        if (vAl == null || jAl == null || singleDAligner == null)
            dResult = new VDJCHit[0];
        else
            dResult = singleDAligner.align(targets[dGeneTarget].getSequence(), getPossibleDLoci(vResult, jResult), vAl.getSequence2Range().getTo(), jAl.getSequence2Range().getFrom(), dGeneTarget, nReads);
    }
    final VDJCAlignments alignment = new VDJCAlignments(vResult, dResult, jResult, cutRelativeScore(vdjcHits.get(GeneType.Constant), parameters.getCAlignerParameters().getRelativeMinScore(), parameters.getMaxHits()), targets, input.getHistory(), input.getOriginalReads());
    return new VDJCAlignmentResult<>(input, alignment);
}
Also used : Chains(io.repseq.core.Chains) NSequenceWithQuality(com.milaboratory.core.sequence.NSequenceWithQuality) EnumMap(java.util.EnumMap) VDJCAlignments(com.milaboratory.mixcr.basictypes.VDJCAlignments) VDJCAlignmentResult(com.milaboratory.mixcr.vdjaligners.VDJCAlignmentResult) ReferencePoint(io.repseq.core.ReferencePoint) AlignmentHit(com.milaboratory.core.alignment.batch.AlignmentHit) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence) VDJCGene(io.repseq.core.VDJCGene) GeneType(io.repseq.core.GeneType) VDJCHit(com.milaboratory.mixcr.basictypes.VDJCHit)

Example 2 with Chains

use of io.repseq.core.Chains in project mixcr by milaboratory.

the class ActionExportParameters method getFilter.

@SuppressWarnings("unchecked")
public Filter<T> getFilter() {
    List<Filter<T>> filters = new ArrayList<>();
    final Chains chains = getChains();
    filters.add(new Filter<T>() {

        @Override
        public boolean accept(T object) {
            for (GeneType gt : GeneType.VJC_REFERENCE) {
                VDJCHit bestHit = object.getBestHit(gt);
                if (bestHit != null && chains.intersects(bestHit.getGene().getChains()))
                    return true;
            }
            return false;
        }
    });
    if (filters.isEmpty())
        return ACCEPT_ALL;
    if (filters.size() == 1)
        return filters.get(0);
    return and(filters.toArray(new Filter[filters.size()]));
}
Also used : Filter(cc.redberry.primitives.Filter) Chains(io.repseq.core.Chains) GeneType(io.repseq.core.GeneType) VDJCHit(com.milaboratory.mixcr.basictypes.VDJCHit)

Example 3 with Chains

use of io.repseq.core.Chains in project mixcr by milaboratory.

the class VDJCAlignerAbstract method init.

@Override
protected void init() {
    DAlignerParameters dAlignerParameters = parameters.getDAlignerParameters();
    List<VDJCGene> dGenes = genesToAlign.get(GeneType.Diversity);
    if (dAlignerParameters != null && dGenes.size() != 0)
        singleDAligner = new SingleDAligner(dAlignerParameters, genesToAlign.get(GeneType.Diversity));
    vAligner = createKAligner(GeneType.Variable);
    jAligner = createKAligner(GeneType.Joining);
    cAligner = createKAligner(GeneType.Constant);
    Chains chains = new Chains();
    for (VDJCGene gene : getUsedGenes()) chains = chains.merge(gene.getChains());
    filters = new EnumMap<>(GeneType.class);
    for (GeneType geneType : GeneType.VJC_REFERENCE) {
        HashMap<String, BitArray> f = new HashMap<>();
        for (final String chain : chains) {
            BatchAlignerWithBaseWithFilter<NucleotideSequence, VDJCGene, AlignmentHit<NucleotideSequence, VDJCGene>> aligner = getAligner(geneType);
            if (aligner != null)
                f.put(chain, aligner.createFilter(new Filter<VDJCGene>() {

                    @Override
                    public boolean accept(VDJCGene object) {
                        return object.getChains().contains(chain);
                    }
                }));
        }
        filters.put(geneType, f);
    }
}
Also used : HashMap(java.util.HashMap) Chains(io.repseq.core.Chains) AlignmentHit(com.milaboratory.core.alignment.batch.AlignmentHit) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence) VDJCGene(io.repseq.core.VDJCGene) GeneType(io.repseq.core.GeneType) BitArray(com.milaboratory.util.BitArray)

Example 4 with Chains

use of io.repseq.core.Chains in project repseqio by repseqio.

the class ExportCloneSequencesAction method go.

@Override
public void go(ActionHelper helper) throws Exception {
    Chains chains = params.getChains();
    GeneFeature geneFeature = params.getGeneFeature();
    RandomGenerator random = new Well19937c(1232434);
    try (GRepertoireReader input = new GRepertoireReader(createBufferedReader(params.getInput()));
        FastaWriter<NucleotideSequence> output = createSingleFastaWriter(params.getOutput())) {
        List<DescriptionExtractor> extractors = params.getExtractors(input.getLibrary());
        long i = 0;
        for (GClone clone : CUtils.it(input)) {
            int f = params.factor == null ? 1 : randomizedRound(clone.abundance * params.factor, random);
            for (int j = 0; j < f; j++) for (Map.Entry<String, GGene> e : clone.genes.entrySet()) if (chains.contains(e.getKey())) {
                StringBuilder descriptionLine = new StringBuilder("GClone");
                for (DescriptionExtractor extractor : extractors) descriptionLine.append("|").append(extractor.extract(clone, e.getValue(), e.getKey()));
                output.write(new FastaRecord<>(i++, descriptionLine.toString(), e.getValue().getFeature(geneFeature)));
            }
        }
    }
}
Also used : GeneFeature(io.repseq.core.GeneFeature) Chains(io.repseq.core.Chains) Well19937c(org.apache.commons.math3.random.Well19937c) RandomGenerator(org.apache.commons.math3.random.RandomGenerator) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence)

Example 5 with Chains

use of io.repseq.core.Chains in project mixcr by milaboratory.

the class RunMiXCRTest method test1.

@Test
public void test1() throws Exception {
    RunMiXCR.RunMiXCRAnalysis params = new RunMiXCR.RunMiXCRAnalysis(RunMiXCR.class.getResource("/sequences/test_R1.fastq").getFile(), RunMiXCR.class.getResource("/sequences/test_R2.fastq").getFile());
    RunMiXCR.AlignResult align = RunMiXCR.align(params);
    RunMiXCR.AssembleResult assemble = RunMiXCR.assemble(align);
    for (Clone clone : assemble.cloneSet.getClones()) {
        Chains vjLoci = VDJCAligner.getPossibleDLoci(clone.getHits(GeneType.Variable), clone.getHits(GeneType.Joining));
        for (VDJCHit dHit : clone.getHits(GeneType.Diversity)) Assert.assertTrue(vjLoci.intersects(dHit.getGene().getChains()));
    }
}
Also used : Chains(io.repseq.core.Chains) Test(org.junit.Test)

Aggregations

Chains (io.repseq.core.Chains)5 NucleotideSequence (com.milaboratory.core.sequence.NucleotideSequence)3 GeneType (io.repseq.core.GeneType)3 AlignmentHit (com.milaboratory.core.alignment.batch.AlignmentHit)2 VDJCHit (com.milaboratory.mixcr.basictypes.VDJCHit)2 VDJCGene (io.repseq.core.VDJCGene)2 Filter (cc.redberry.primitives.Filter)1 NSequenceWithQuality (com.milaboratory.core.sequence.NSequenceWithQuality)1 VDJCAlignments (com.milaboratory.mixcr.basictypes.VDJCAlignments)1 VDJCAlignmentResult (com.milaboratory.mixcr.vdjaligners.VDJCAlignmentResult)1 BitArray (com.milaboratory.util.BitArray)1 GeneFeature (io.repseq.core.GeneFeature)1 ReferencePoint (io.repseq.core.ReferencePoint)1 EnumMap (java.util.EnumMap)1 HashMap (java.util.HashMap)1 RandomGenerator (org.apache.commons.math3.random.RandomGenerator)1 Well19937c (org.apache.commons.math3.random.Well19937c)1 Test (org.junit.Test)1