use of com.milaboratory.core.sequence.NSequenceWithQuality in project mixcr by milaboratory.
the class FullSeqAssemblerTest method test1.
@Test
public void test1() throws Exception {
int len = 140;
PairedRead read1 = new PairedRead(new SingleReadImpl(0, new NSequenceWithQuality(masterSeq1WT.getRangeFromCDR3Begin(-20, len)), "R1"), new SingleReadImpl(0, new NSequenceWithQuality(masterSeq1WT.getRangeFromCDR3Begin(-200, len).getReverseComplement()), "R2"));
PairedRead read2 = new PairedRead(new SingleReadImpl(1, new NSequenceWithQuality(masterSeq1WT.getRangeFromCDR3Begin(-30, len)), "R1"), new SingleReadImpl(1, new NSequenceWithQuality(masterSeq1WT.getRangeFromCDR3Begin(-150, len).getReverseComplement()), "R2"));
RunMiXCR.RunMiXCRAnalysis params = new RunMiXCR.RunMiXCRAnalysis(read1, read2);
// [-200, -60] [-20, 120]
// [-150, 110]
//
// [-200, -150], [110, 120] = 60
// [-60, -20] = 40
params.alignerParameters = VDJCParametersPresets.getByName("rna-seq");
params.alignerParameters.setSaveOriginalReads(true);
params.cloneAssemblerParameters.updateFrom(params.alignerParameters);
RunMiXCR.AlignResult align = RunMiXCR.align(params);
// for (VDJCAlignments al : align.alignments) {
// for (int i = 0; i < al.numberOfTargets(); i++)
// System.out.println(VDJCAlignmentsFormatter.getTargetAsMultiAlignment(al, i));
// System.out.println();
// }
RunMiXCR.AssembleResult assemble = RunMiXCR.assemble(align);
CloneFactory cloneFactory = new CloneFactory(align.parameters.cloneAssemblerParameters.getCloneFactoryParameters(), align.parameters.cloneAssemblerParameters.getAssemblingFeatures(), align.usedGenes, align.parameters.alignerParameters.getFeaturesToAlignMap());
FullSeqAssembler agg = new FullSeqAssembler(cloneFactory, DEFAULT_PARAMETERS, assemble.cloneSet.get(0), align.parameters.alignerParameters);
PointSequence[] r2s = agg.toPointSequences(align.alignments.get(1));
TIntHashSet p2 = new TIntHashSet(Arrays.stream(r2s).mapToInt(s -> s.point).toArray());
Assert.assertEquals(261 - masterSeq1WT.cdr3Part, p2.size());
PointSequence[] r1s = agg.toPointSequences(align.alignments.get(0));
TIntHashSet p1 = new TIntHashSet(Arrays.stream(r1s).mapToInt(s -> s.point).toArray());
Assert.assertEquals(281 - masterSeq1WT.cdr3Part, p1.size());
FullSeqAssembler.RawVariantsData prep = agg.calculateRawData(() -> CUtils.asOutputPort(align.alignments));
long uniq1 = StreamSupport.stream(CUtils.it(prep.createPort()).spliterator(), false).mapToInt(l -> l[0]).filter(c -> c == 0xFFFFFFFF).count();
long uniq2 = StreamSupport.stream(CUtils.it(prep.createPort()).spliterator(), false).mapToInt(l -> l[1]).filter(c -> c == 0xFFFFFFFF).count();
Assert.assertEquals(40, uniq1);
Assert.assertEquals(60, uniq2);
for (Clone clone : new CloneSet(Arrays.asList(agg.callVariants(prep))).getClones()) {
ActionExportClonesPretty.outputCompact(System.out, clone);
System.out.println();
System.out.println(" ================================================ ");
System.out.println();
}
}
use of com.milaboratory.core.sequence.NSequenceWithQuality in project mixcr by milaboratory.
the class FullSeqAssemblerTest method testLargeCloneNoMismatches.
@Test
public void testLargeCloneNoMismatches() throws Exception {
MasterSequence master = FullSeqAssemblerTest.masterSeq1WT;
NSequenceWithQuality seq = new NSequenceWithQuality(master.getRange(-master.vPart + 10, 80), SequenceQuality.GOOD_QUALITY_VALUE);
RunMiXCR.RunMiXCRAnalysis params0 = new RunMiXCR.RunMiXCRAnalysis(new SingleReadImpl(0, seq, ""));
params0.cloneAssemblerParameters.setAssemblingFeatures(new GeneFeature[] { GeneFeature.VDJRegion });
Clone largeClone = RunMiXCR.assemble(RunMiXCR.align(params0)).cloneSet.get(0);
// ActionExportClonesPretty.outputCompact(System.out, largeClone);
// System.exit(0);
Well44497b rnd = new Well44497b(1234567889L);
int nReads = 100_000;
int readLength = 75, readGap = 150;
// slice seq randomly
PairedRead[] slicedReads = new PairedRead[nReads];
for (int i = 0; i < nReads; ++i) {
int r1from = rnd.nextInt(seq.size() - readLength - 1), r1to = r1from + readLength, r2from = r1from + 1 + rnd.nextInt(seq.size() - r1from - readLength - 1), r2to = r2from + readLength;
assert r2from > r1from;
slicedReads[i] = new PairedRead(new SingleReadImpl(i, seq.getRange(r1from, r1to), "" + i), new SingleReadImpl(i, seq.getRange(r2from, r2to).getReverseComplement(), "" + i));
}
RunMiXCR.RunMiXCRAnalysis params = new RunMiXCR.RunMiXCRAnalysis(slicedReads);
// params.alignerParameters = VDJCParametersPresets.getByName("rna-seq");
params.alignerParameters.setSaveOriginalReads(true);
RunMiXCR.AlignResult align = RunMiXCR.align(params);
RunMiXCR.AssembleResult assemble = RunMiXCR.assemble(align);
// for (VDJCAlignments al : align.alignments) {
// for (int i = 0; i < al.numberOfTargets(); i++) {
// System.out.println(VDJCAlignmentsFormatter.getTargetAsMultiAlignment(al, i));
// System.out.println();
// }
// System.out.println();
// System.out.println(" ================================================ ");
// System.out.println();
// }
Assert.assertEquals(1, assemble.cloneSet.size());
Clone initialClone = assemble.cloneSet.get(0);
NSequenceWithQuality cdr3 = initialClone.getFeature(GeneFeature.CDR3);
List<VDJCAlignments> alignments = align.alignments.stream().filter(al -> cdr3.equals(al.getFeature(GeneFeature.CDR3))).collect(Collectors.toList());
alignments.stream().filter(al -> Arrays.stream(al.getBestHit(GeneType.Variable).getAlignments()).filter(Objects::nonNull).anyMatch(a -> !a.getAbsoluteMutations().isEmpty())).filter(al -> al.getBestHit(GeneType.Variable).getGene().getName().contains("3-74")).forEach(al -> {
for (int i = 0; i < al.numberOfTargets(); i++) {
System.out.println(VDJCAlignmentsFormatter.getTargetAsMultiAlignment(al, i));
System.out.println();
}
System.out.println();
System.out.println(" ================================================ ");
System.out.println();
});
// System.exit(0);
System.out.println("=> Agg");
CloneFactory cloneFactory = new CloneFactory(align.parameters.cloneAssemblerParameters.getCloneFactoryParameters(), align.parameters.cloneAssemblerParameters.getAssemblingFeatures(), align.usedGenes, align.parameters.alignerParameters.getFeaturesToAlignMap());
FullSeqAssembler agg = new FullSeqAssembler(cloneFactory, DEFAULT_PARAMETERS, initialClone, align.parameters.alignerParameters);
FullSeqAssembler.RawVariantsData prep = agg.calculateRawData(() -> CUtils.asOutputPort(alignments));
List<Clone> clones = new ArrayList<>(new CloneSet(Arrays.asList(agg.callVariants(prep))).getClones());
clones.sort(Comparator.comparingDouble(Clone::getCount).reversed());
for (Clone clone : clones) {
ActionExportClonesPretty.outputCompact(System.out, clone);
System.out.println();
System.out.println(" ================================================ ");
System.out.println();
}
}
use of com.milaboratory.core.sequence.NSequenceWithQuality in project mixcr by milaboratory.
the class ClonalSequenceTest method createRandomTestDara.
private TestData createRandomTestDara(int size, RandomGenerator random) {
MutationsBuilder<NucleotideSequence> builder = new MutationsBuilder<>(NucleotideSequence.ALPHABET);
NSequenceWithQuality[] c1 = new NSequenceWithQuality[size];
NSequenceWithQuality[] c2 = new NSequenceWithQuality[size];
int offset = 0;
for (int i = 0; i < size; ++i) {
NucleotideSequence s1 = TestUtil.randomSequence(NucleotideSequence.ALPHABET, random, 5, 15);
Mutations<NucleotideSequence> muts = MutationsGenerator.generateMutations(s1, MutationModels.getEmpiricalNucleotideMutationModel(), 1, s1.size() - 1);
NucleotideSequence s2 = muts.mutate(s1);
c1[i] = new NSequenceWithQuality(s1, SequenceQuality.getUniformQuality((byte) 0, s1.size()));
c2[i] = new NSequenceWithQuality(s2, SequenceQuality.getUniformQuality((byte) 0, s2.size()));
builder.append(muts.move(offset));
offset += s1.size();
}
return new TestData(new ClonalSequence(c1), new ClonalSequence(c2), builder.createAndDestroy());
}
use of com.milaboratory.core.sequence.NSequenceWithQuality in project mixcr by milaboratory.
the class ClonalSequenceTest method create.
private ClonalSequence create(String... strings) {
NSequenceWithQuality[] data = new NSequenceWithQuality[strings.length];
for (int i = 0; i < strings.length; ++i) {
NucleotideSequence s = new NucleotideSequence(strings[i]);
SequenceQuality q = SequenceQuality.getUniformQuality((byte) 0, s.size());
data[i] = new NSequenceWithQuality(s, q);
}
return new ClonalSequence(data);
}
use of com.milaboratory.core.sequence.NSequenceWithQuality in project mixcr by milaboratory.
the class TargetMerger method merge.
@SuppressWarnings("unchecked")
public AlignedTarget merge(AlignedTarget targetLeft, AlignedTarget targetRight, int offset, OverlapType overlapType, int nMismatches) {
if (offset < 0)
return merge(targetRight, targetLeft, -offset, overlapType, nMismatches);
final NSequenceWithQuality mergedTarget = merger.overlap(targetLeft.getTarget(), targetRight.getTarget(), offset);
EnumMap<GeneType, VDJCHit[]> result = new EnumMap<>(GeneType.class);
for (GeneType geneType : GeneType.VJC_REFERENCE) {
final BatchAlignerWithBaseParameters bp = ((KGeneAlignmentParameters) alignerParameters.getGeneAlignerParameters(geneType)).getParameters();
final VDJCHit[] leftHits = targetLeft.getAlignments().getHits(geneType);
final VDJCHit[] rightHits = targetRight.getAlignments().getHits(geneType);
GeneFeature alignedFeature = leftHits.length == 0 ? rightHits.length == 0 ? null : rightHits[0].getAlignedFeature() : leftHits[0].getAlignedFeature();
Map<VDJCGeneId, HitMappingRecord> map = extractHitsMapping(targetLeft, targetRight, geneType);
ArrayList<VDJCHit> resultingHits = new ArrayList<>();
for (Map.Entry<VDJCGeneId, HitMappingRecord> mE : map.entrySet()) {
final VDJCGene gene = mE.getValue().gene;
Alignment<NucleotideSequence> mergedAl = merge(bp.getScoring(), extractBandedWidth(bp), mergedTarget.getSequence(), offset, mE.getValue().alignments[0], mE.getValue().alignments[1]);
resultingHits.add(new VDJCHit(gene, mergedAl, alignedFeature));
}
Collections.sort(resultingHits);
// final float relativeMinScore = extractRelativeMinScore(bp);
// int threshold = (int) (resultingHits.size() > 0 ? resultingHits.get(0).getScore() * relativeMinScore : 0);
// for (int i = resultingHits.size() - 1; i > 0; --i)
// if (resultingHits.get(i).getScore() < threshold)
// resultingHits.remove(i);
result.put(geneType, resultingHits.toArray(new VDJCHit[resultingHits.size()]));
}
VDJCAlignments alignments = new VDJCAlignments(result, new NSequenceWithQuality[] { mergedTarget }, new SequenceHistory[] { new SequenceHistory.Merge(overlapType, targetLeft.getHistory(), targetRight.getHistory(), offset, nMismatches) }, VDJCAlignments.mergeOriginalReads(targetLeft.getAlignments(), targetRight.getAlignments()));
AlignedTarget resultTarget = new AlignedTarget(alignments, 0);
for (BPoint bPoint : BPoint.values()) {
int leftPoint = targetLeft.getBPoint(bPoint);
int rightPoint = targetRight.getBPoint(bPoint);
if (leftPoint != -1 && rightPoint != -1)
throw new IllegalArgumentException("Same bPoint defined in both input targets.");
else if (leftPoint != -1)
resultTarget = resultTarget.setBPoint(bPoint, leftPoint);
else if (rightPoint != -1)
resultTarget = resultTarget.setBPoint(bPoint, offset + rightPoint);
}
return resultTarget;
}
Aggregations