use of com.milaboratory.mixcr.basictypes.Clone in project mixcr by milaboratory.
the class FullSeqAssemblerTest method test1.
@Test
public void test1() throws Exception {
int len = 140;
PairedRead read1 = new PairedRead(new SingleReadImpl(0, new NSequenceWithQuality(masterSeq1WT.getRangeFromCDR3Begin(-20, len)), "R1"), new SingleReadImpl(0, new NSequenceWithQuality(masterSeq1WT.getRangeFromCDR3Begin(-200, len).getReverseComplement()), "R2"));
PairedRead read2 = new PairedRead(new SingleReadImpl(1, new NSequenceWithQuality(masterSeq1WT.getRangeFromCDR3Begin(-30, len)), "R1"), new SingleReadImpl(1, new NSequenceWithQuality(masterSeq1WT.getRangeFromCDR3Begin(-150, len).getReverseComplement()), "R2"));
RunMiXCR.RunMiXCRAnalysis params = new RunMiXCR.RunMiXCRAnalysis(read1, read2);
// [-200, -60] [-20, 120]
// [-150, 110]
//
// [-200, -150], [110, 120] = 60
// [-60, -20] = 40
params.alignerParameters = VDJCParametersPresets.getByName("rna-seq");
params.alignerParameters.setSaveOriginalReads(true);
params.cloneAssemblerParameters.updateFrom(params.alignerParameters);
RunMiXCR.AlignResult align = RunMiXCR.align(params);
// for (VDJCAlignments al : align.alignments) {
// for (int i = 0; i < al.numberOfTargets(); i++)
// System.out.println(VDJCAlignmentsFormatter.getTargetAsMultiAlignment(al, i));
// System.out.println();
// }
RunMiXCR.AssembleResult assemble = RunMiXCR.assemble(align);
CloneFactory cloneFactory = new CloneFactory(align.parameters.cloneAssemblerParameters.getCloneFactoryParameters(), align.parameters.cloneAssemblerParameters.getAssemblingFeatures(), align.usedGenes, align.parameters.alignerParameters.getFeaturesToAlignMap());
FullSeqAssembler agg = new FullSeqAssembler(cloneFactory, DEFAULT_PARAMETERS, assemble.cloneSet.get(0), align.parameters.alignerParameters);
PointSequence[] r2s = agg.toPointSequences(align.alignments.get(1));
TIntHashSet p2 = new TIntHashSet(Arrays.stream(r2s).mapToInt(s -> s.point).toArray());
Assert.assertEquals(261 - masterSeq1WT.cdr3Part, p2.size());
PointSequence[] r1s = agg.toPointSequences(align.alignments.get(0));
TIntHashSet p1 = new TIntHashSet(Arrays.stream(r1s).mapToInt(s -> s.point).toArray());
Assert.assertEquals(281 - masterSeq1WT.cdr3Part, p1.size());
FullSeqAssembler.RawVariantsData prep = agg.calculateRawData(() -> CUtils.asOutputPort(align.alignments));
long uniq1 = StreamSupport.stream(CUtils.it(prep.createPort()).spliterator(), false).mapToInt(l -> l[0]).filter(c -> c == 0xFFFFFFFF).count();
long uniq2 = StreamSupport.stream(CUtils.it(prep.createPort()).spliterator(), false).mapToInt(l -> l[1]).filter(c -> c == 0xFFFFFFFF).count();
Assert.assertEquals(40, uniq1);
Assert.assertEquals(60, uniq2);
for (Clone clone : new CloneSet(Arrays.asList(agg.callVariants(prep))).getClones()) {
ActionExportClonesPretty.outputCompact(System.out, clone);
System.out.println();
System.out.println(" ================================================ ");
System.out.println();
}
}
use of com.milaboratory.mixcr.basictypes.Clone in project mixcr by milaboratory.
the class FullSeqAssemblerTest method testLargeCloneNoMismatches.
@Test
public void testLargeCloneNoMismatches() throws Exception {
MasterSequence master = FullSeqAssemblerTest.masterSeq1WT;
NSequenceWithQuality seq = new NSequenceWithQuality(master.getRange(-master.vPart + 10, 80), SequenceQuality.GOOD_QUALITY_VALUE);
RunMiXCR.RunMiXCRAnalysis params0 = new RunMiXCR.RunMiXCRAnalysis(new SingleReadImpl(0, seq, ""));
params0.cloneAssemblerParameters.setAssemblingFeatures(new GeneFeature[] { GeneFeature.VDJRegion });
Clone largeClone = RunMiXCR.assemble(RunMiXCR.align(params0)).cloneSet.get(0);
// ActionExportClonesPretty.outputCompact(System.out, largeClone);
// System.exit(0);
Well44497b rnd = new Well44497b(1234567889L);
int nReads = 100_000;
int readLength = 75, readGap = 150;
// slice seq randomly
PairedRead[] slicedReads = new PairedRead[nReads];
for (int i = 0; i < nReads; ++i) {
int r1from = rnd.nextInt(seq.size() - readLength - 1), r1to = r1from + readLength, r2from = r1from + 1 + rnd.nextInt(seq.size() - r1from - readLength - 1), r2to = r2from + readLength;
assert r2from > r1from;
slicedReads[i] = new PairedRead(new SingleReadImpl(i, seq.getRange(r1from, r1to), "" + i), new SingleReadImpl(i, seq.getRange(r2from, r2to).getReverseComplement(), "" + i));
}
RunMiXCR.RunMiXCRAnalysis params = new RunMiXCR.RunMiXCRAnalysis(slicedReads);
// params.alignerParameters = VDJCParametersPresets.getByName("rna-seq");
params.alignerParameters.setSaveOriginalReads(true);
RunMiXCR.AlignResult align = RunMiXCR.align(params);
RunMiXCR.AssembleResult assemble = RunMiXCR.assemble(align);
// for (VDJCAlignments al : align.alignments) {
// for (int i = 0; i < al.numberOfTargets(); i++) {
// System.out.println(VDJCAlignmentsFormatter.getTargetAsMultiAlignment(al, i));
// System.out.println();
// }
// System.out.println();
// System.out.println(" ================================================ ");
// System.out.println();
// }
Assert.assertEquals(1, assemble.cloneSet.size());
Clone initialClone = assemble.cloneSet.get(0);
NSequenceWithQuality cdr3 = initialClone.getFeature(GeneFeature.CDR3);
List<VDJCAlignments> alignments = align.alignments.stream().filter(al -> cdr3.equals(al.getFeature(GeneFeature.CDR3))).collect(Collectors.toList());
alignments.stream().filter(al -> Arrays.stream(al.getBestHit(GeneType.Variable).getAlignments()).filter(Objects::nonNull).anyMatch(a -> !a.getAbsoluteMutations().isEmpty())).filter(al -> al.getBestHit(GeneType.Variable).getGene().getName().contains("3-74")).forEach(al -> {
for (int i = 0; i < al.numberOfTargets(); i++) {
System.out.println(VDJCAlignmentsFormatter.getTargetAsMultiAlignment(al, i));
System.out.println();
}
System.out.println();
System.out.println(" ================================================ ");
System.out.println();
});
// System.exit(0);
System.out.println("=> Agg");
CloneFactory cloneFactory = new CloneFactory(align.parameters.cloneAssemblerParameters.getCloneFactoryParameters(), align.parameters.cloneAssemblerParameters.getAssemblingFeatures(), align.usedGenes, align.parameters.alignerParameters.getFeaturesToAlignMap());
FullSeqAssembler agg = new FullSeqAssembler(cloneFactory, DEFAULT_PARAMETERS, initialClone, align.parameters.alignerParameters);
FullSeqAssembler.RawVariantsData prep = agg.calculateRawData(() -> CUtils.asOutputPort(alignments));
List<Clone> clones = new ArrayList<>(new CloneSet(Arrays.asList(agg.callVariants(prep))).getClones());
clones.sort(Comparator.comparingDouble(Clone::getCount).reversed());
for (Clone clone : clones) {
ActionExportClonesPretty.outputCompact(System.out, clone);
System.out.println();
System.out.println(" ================================================ ");
System.out.println();
}
}
use of com.milaboratory.mixcr.basictypes.Clone in project mixcr by milaboratory.
the class ActionClonesDiff method populate.
private void populate(Map<CKey, CRec> recs, CloneSet cs, int i) {
for (Clone clone : cs) {
CKey key = getKey(clone);
CRec cRec = recs.get(key);
if (cRec == null)
recs.put(key, cRec = new CRec());
if (cRec.clones[i] != null) {
String error = "";
char letter = 'X';
if (!Objects.equals(getBestGene(cRec.clones[i], GeneType.Variable), getBestGene(clone, GeneType.Variable)))
letter = 'v';
if (!Objects.equals(getBestGene(cRec.clones[i], GeneType.Joining), getBestGene(clone, GeneType.Joining)))
letter = 'j';
if (!Objects.equals(getBestGene(cRec.clones[i], GeneType.Constant), getBestGene(clone, GeneType.Constant)))
letter = 'c';
if (letter != 'X')
error = "Error: clones with the same key present in one of the clonesets. Seems that clones were assembled " + "using -OseparateBy" + Character.toUpperCase(letter) + "=true option, please add -" + letter + " option to this command.";
throw new ParameterException(error);
}
cRec.clones[i] = clone;
}
}
use of com.milaboratory.mixcr.basictypes.Clone in project mixcr by milaboratory.
the class CloneFactory method create.
public Clone create(int id, double count, EnumMap<GeneType, TObjectFloatHashMap<VDJCGeneId>> geneScores, NSequenceWithQuality[] targets) {
EnumMap<GeneType, VDJCHit[]> hits = new EnumMap<>(GeneType.class);
for (GeneType geneType : GeneType.VJC_REFERENCE) {
VJCClonalAlignerParameters vjcParameters = parameters.getVJCParameters(geneType);
if (vjcParameters == null)
continue;
GeneFeature featureToAlign = featuresToAlign.get(geneType);
TObjectFloatHashMap<VDJCGeneId> gtGeneScores = geneScores.get(geneType);
if (gtGeneScores == null)
continue;
GeneFeature[] intersectingFeatures = new GeneFeature[assemblingFeatures.length];
for (int i = 0; i < assemblingFeatures.length; ++i) {
intersectingFeatures[i] = GeneFeature.intersection(featureToAlign, assemblingFeatures[i]);
if (intersectingFeatures[i] != null)
switch(geneType) {
case Variable:
if (!intersectingFeatures[i].getFirstPoint().equals(assemblingFeatures[i].getFirstPoint()))
throw new IllegalArgumentException();
break;
case Joining:
if (!intersectingFeatures[i].getLastPoint().equals(assemblingFeatures[i].getLastPoint()))
throw new IllegalArgumentException();
break;
}
}
VDJCHit[] result = new VDJCHit[gtGeneScores.size()];
int pointer = 0;
TObjectFloatIterator<VDJCGeneId> iterator = gtGeneScores.iterator();
while (iterator.hasNext()) {
iterator.advance();
VDJCGene gene = usedGenes.get(iterator.key());
Alignment<NucleotideSequence>[] alignments = new Alignment[assemblingFeatures.length];
for (int i = 0; i < assemblingFeatures.length; ++i) {
if (intersectingFeatures[i] == null)
continue;
NucleotideSequence referenceSequence = gene.getFeature(featureToAlign);
Range rangeInReference = gene.getPartitioning().getRelativeRange(featureToAlign, intersectingFeatures[i]);
if (rangeInReference == null || referenceSequence == null)
continue;
Boolean leftSide;
switch(geneType) {
case Variable:
leftSide = intersectingFeatures[i].getLastPoint().isTrimmable() ? true : null;
break;
case Joining:
leftSide = intersectingFeatures[i].getFirstPoint().isTrimmable() ? false : null;
break;
case Constant:
leftSide = null;
break;
default:
throw new RuntimeException();
}
BandedAlignerParameters<NucleotideSequence> alignmentParameters = vjcParameters.getAlignmentParameters();
int referenceLength = rangeInReference.length();
NucleotideSequence target = targets[i].getSequence();
if (alignmentParameters.getScoring() instanceof LinearGapAlignmentScoring) {
if (leftSide == null) {
alignments[i] = BandedLinearAligner.align((LinearGapAlignmentScoring<NucleotideSequence>) alignmentParameters.getScoring(), referenceSequence, target, rangeInReference.getFrom(), referenceLength, 0, target.size(), alignmentParameters.getWidth());
} else if (leftSide) {
assert rangeInReference.getFrom() + referenceLength == referenceSequence.size();
alignments[i] = BandedLinearAligner.alignSemiLocalLeft((LinearGapAlignmentScoring<NucleotideSequence>) alignmentParameters.getScoring(), referenceSequence, target, rangeInReference.getFrom(), referenceLength, 0, target.size(), alignmentParameters.getWidth(), alignmentParameters.getStopPenalty());
} else {
assert rangeInReference.getFrom() == 0;
// int offset2 = Math.max(0, target.size() - referenceLength);
alignments[i] = BandedLinearAligner.alignSemiLocalRight((LinearGapAlignmentScoring<NucleotideSequence>) alignmentParameters.getScoring(), referenceSequence, target, rangeInReference.getFrom(), referenceLength, 0, target.size(), alignmentParameters.getWidth(), alignmentParameters.getStopPenalty());
}
} else {
if (leftSide == null) {
alignments[i] = BandedAffineAligner.align((AffineGapAlignmentScoring<NucleotideSequence>) alignmentParameters.getScoring(), referenceSequence, target, rangeInReference.getFrom(), referenceLength, 0, target.size(), alignmentParameters.getWidth());
} else if (leftSide) {
assert rangeInReference.getFrom() + referenceLength == referenceSequence.size();
alignments[i] = BandedAffineAligner.semiLocalRight((AffineGapAlignmentScoring<NucleotideSequence>) alignmentParameters.getScoring(), referenceSequence, target, rangeInReference.getFrom(), referenceLength, 0, target.size(), alignmentParameters.getWidth());
} else {
assert rangeInReference.getFrom() == 0;
// int offset2 = Math.max(0, target.size() - referenceLength);
alignments[i] = BandedAffineAligner.semiLocalLeft((AffineGapAlignmentScoring<NucleotideSequence>) alignmentParameters.getScoring(), referenceSequence, target, rangeInReference.getFrom(), referenceLength, 0, target.size(), alignmentParameters.getWidth());
}
}
}
result[pointer++] = new VDJCHit(gene, alignments, featureToAlign, iterator.value());
}
Arrays.sort(result, 0, pointer);
hits.put(geneType, pointer < result.length ? Arrays.copyOf(result, pointer) : result);
}
// D
NucleotideSequence sequenceToAlign = targets[indexOfAssemblingFeatureWithD].getSequence();
int from = 0;
int to = sequenceToAlign.size();
VDJCHit[] hs = hits.get(GeneType.Variable);
if (hs != null && hs.length > 0) {
int p = hs[0].getPartitioningForTarget(indexOfAssemblingFeatureWithD).getPosition(ReferencePoint.VEndTrimmed);
if (p != -1) {
if (p < 0)
p = -2 - p;
from = p;
}
}
hs = hits.get(GeneType.Joining);
if (hs != null && hs.length > 0) {
int p = hs[0].getPartitioningForTarget(indexOfAssemblingFeatureWithD).getPosition(ReferencePoint.JBeginTrimmed);
if (p != -1) {
if (p < 0)
p = -2 - p;
to = p;
}
}
if (from < to)
hits.put(GeneType.Diversity, dAligner.align(sequenceToAlign, VDJCAligner.getPossibleDLoci(hits.get(GeneType.Variable), hits.get(GeneType.Joining)), from, to, indexOfAssemblingFeatureWithD, assemblingFeatures.length));
else
hits.put(GeneType.Diversity, new VDJCHit[0]);
return new Clone(targets, hits, count, id);
}
Aggregations