use of com.milaboratory.mixcr.assembler.CloneFactory in project mixcr by milaboratory.
the class ActionAssembleContigs method go.
@Override
public void go(ActionHelper helper) throws Exception {
// TODO FIX!!!!!!!!!!!!!
if (parameters.threads > 1)
throw new ParameterException("Multithreaded processing is not supported yet.");
long beginTimestamp = System.currentTimeMillis();
FullSeqAssemblerParameters p = FullSeqAssemblerParameters.getByName("default");
if (!parameters.overrides.isEmpty()) {
// Perform parameters overriding
p = JsonOverrider.override(p, FullSeqAssemblerParameters.class, parameters.overrides);
if (p == null)
throw new ProcessException("Failed to override some parameter.");
}
final FullSeqAssemblerReport report = new FullSeqAssemblerReport();
FullSeqAssemblerParameters assemblerParameters = p;
int totalClonesCount = 0;
List<VDJCGene> genes;
VDJCAlignerParameters alignerParameters;
CloneAssemblerParameters cloneAssemblerParameters;
try (ClnAReader reader = new ClnAReader(parameters.getInputFileName(), VDJCLibraryRegistry.getDefault());
PrimitivO tmpOut = new PrimitivO(new BufferedOutputStream(new FileOutputStream(parameters.getOutputFileName())))) {
final CloneFactory cloneFactory = new CloneFactory(reader.getAssemblerParameters().getCloneFactoryParameters(), reader.getAssemblingFeatures(), reader.getGenes(), reader.getAlignerParameters().getFeaturesToAlignMap());
alignerParameters = reader.getAlignerParameters();
cloneAssemblerParameters = reader.getAssemblerParameters();
genes = reader.getGenes();
IOUtil.registerGeneReferences(tmpOut, genes, alignerParameters);
ClnAReader.CloneAlignmentsPort cloneAlignmentsPort = reader.clonesAndAlignments();
SmartProgressReporter.startProgressReport("Assembling", cloneAlignmentsPort);
OutputPort<Clone[]> parallelProcessor = new ParallelProcessor<>(cloneAlignmentsPort, cloneAlignments -> {
FullSeqAssembler fullSeqAssembler = new FullSeqAssembler(cloneFactory, assemblerParameters, cloneAlignments.clone, alignerParameters);
fullSeqAssembler.setReport(report);
FullSeqAssembler.RawVariantsData rawVariantsData = fullSeqAssembler.calculateRawData(() -> {
try {
return cloneAlignments.alignments();
} catch (IOException e) {
throw new RuntimeException(e);
}
});
return fullSeqAssembler.callVariants(rawVariantsData);
}, parameters.threads);
for (Clone[] clones : CUtils.it(parallelProcessor)) {
totalClonesCount += clones.length;
for (Clone cl : clones) tmpOut.writeObject(cl);
}
assert report.getInitialCloneCount() == reader.numberOfClones();
}
assert report.getFinalCloneCount() == totalClonesCount;
assert report.getFinalCloneCount() >= report.getInitialCloneCount();
Clone[] clones = new Clone[totalClonesCount];
try (PrimitivI tmpIn = new PrimitivI(new BufferedInputStream(new FileInputStream(parameters.getOutputFileName())))) {
IOUtil.registerGeneReferences(tmpIn, genes, alignerParameters);
int i = 0;
for (Clone clone : CUtils.it(new PipeDataInputReader<>(Clone.class, tmpIn, totalClonesCount))) clones[i++] = clone;
}
Arrays.sort(clones, Comparator.comparingDouble(c -> -c.getCount()));
for (int i = 0; i < clones.length; i++) clones[i] = clones[i].setId(i);
CloneSet cloneSet = new CloneSet(Arrays.asList(clones), genes, alignerParameters.getFeaturesToAlignMap(), alignerParameters, cloneAssemblerParameters);
try (CloneSetIO.CloneSetWriter writer = new CloneSetIO.CloneSetWriter(cloneSet, parameters.getOutputFileName())) {
SmartProgressReporter.startProgressReport(writer);
writer.write();
}
ReportWrapper reportWrapper = new ReportWrapper(command(), report);
reportWrapper.setStartMillis(beginTimestamp);
reportWrapper.setInputFiles(parameters.getInputFileName());
reportWrapper.setOutputFiles(parameters.getOutputFileName());
reportWrapper.setCommandLine(helper.getCommandLineArguments());
reportWrapper.setFinishMillis(System.currentTimeMillis());
// Writing report to stout
System.out.println("============= Report ==============");
Util.writeReportToStdout(report);
if (parameters.report != null)
Util.writeReport(parameters.report, reportWrapper);
if (parameters.jsonReport != null)
Util.writeJsonReport(parameters.jsonReport, reportWrapper);
}
use of com.milaboratory.mixcr.assembler.CloneFactory in project mixcr by milaboratory.
the class FullSeqAssemblerTest method testRandom1.
@Test
public void testRandom1() throws Exception {
CloneFraction[] clones = { new CloneFraction(750, masterSeq1WT), // V: S346:G->T
new CloneFraction(1000, masterSeq1VSub1), // J: D55:A
new CloneFraction(1000, masterSeq1VDel1JDel1), // J: D62:C
new CloneFraction(500, masterSeq1VDel1JDelVSub2) };
Well19937c rand = new Well19937c();
rand.setSeed(12345);
RandomDataGenerator rdg = new RandomDataGenerator(rand);
List<SequenceRead> readsOrig = new ArrayList<>();
int readLength = 100;
int id = -1;
for (CloneFraction clone : clones) {
for (int i = 0; i < clone.count; i++) {
// Left read with CDR3
++id;
readsOrig.add(new PairedRead(new SingleReadImpl(id, new NSequenceWithQuality(clone.seq.getRangeFromCDR3Begin(-rand.nextInt(readLength - clone.seq.cdr3Part), readLength)), "R1_" + id), new SingleReadImpl(id, new NSequenceWithQuality(clone.seq.getRangeFromCDR3End(rdg.nextInt(-clone.seq.cdr3Part / 2, clone.seq.jPart), readLength).getReverseComplement()), "R2_" + id)));
++id;
readsOrig.add(new PairedRead(new SingleReadImpl(id, new NSequenceWithQuality(clone.seq.getRangeFromCDR3Begin(rdg.nextInt(-clone.seq.vPart, clone.seq.cdr3Part / 2 - readLength), readLength)), "R1_" + id), new SingleReadImpl(id, new NSequenceWithQuality(clone.seq.getRangeFromCDR3Begin(-rand.nextInt(readLength - clone.seq.cdr3Part), readLength)).getReverseComplement(), "R2_" + id)));
}
}
// readsOrig = Arrays.asList(setReadId(0, readsOrig.get(12)), setReadId(1, readsOrig.get(13)));
int[] perm = rdg.nextPermutation(readsOrig.size(), readsOrig.size());
List<SequenceRead> reads = new ArrayList<>();
for (int i = 0; i < readsOrig.size(); i++) reads.add(readsOrig.get(perm[i]));
RunMiXCR.RunMiXCRAnalysis params = new RunMiXCR.RunMiXCRAnalysis(new SequenceReaderCloseable<SequenceRead>() {
int counter = 0;
@Override
public void close() {
}
@Override
public long getNumberOfReads() {
return counter;
}
@Override
public synchronized SequenceRead take() {
if (counter == reads.size())
return null;
return reads.get(counter++);
}
}, true);
params.alignerParameters = VDJCParametersPresets.getByName("rna-seq");
params.alignerParameters.setSaveOriginalReads(true);
params.alignerParameters.setVAlignmentParameters(params.alignerParameters.getVAlignerParameters().setGeneFeatureToAlign(GeneFeature.VTranscriptWithP));
RunMiXCR.AlignResult align = RunMiXCR.align(params);
// // TODO exception for translation
// for (VDJCAlignments al : align.alignments) {
// for (int i = 0; i < al.numberOfTargets(); i++) {
// System.out.println(VDJCAlignmentsFormatter.getTargetAsMultiAlignment(al, i));
// System.out.println();
// }
// System.out.println();
// System.out.println(" ================================================ ");
// System.out.println();
// }
RunMiXCR.AssembleResult assemble = RunMiXCR.assemble(align);
Assert.assertEquals(1, assemble.cloneSet.size());
CloneFactory cloneFactory = new CloneFactory(align.parameters.cloneAssemblerParameters.getCloneFactoryParameters(), align.parameters.cloneAssemblerParameters.getAssemblingFeatures(), align.usedGenes, align.parameters.alignerParameters.getFeaturesToAlignMap());
FullSeqAssembler agg = new FullSeqAssembler(cloneFactory, DEFAULT_PARAMETERS, assemble.cloneSet.get(0), align.parameters.alignerParameters);
FullSeqAssembler.RawVariantsData prep = agg.calculateRawData(() -> CUtils.asOutputPort(align.alignments.stream().filter(a -> a.getFeature(GeneFeature.CDR3) != null).collect(Collectors.toList())));
List<Clone> clns = new ArrayList<>(new CloneSet(Arrays.asList(agg.callVariants(prep))).getClones());
clns.sort(Comparator.comparingDouble(Clone::getCount).reversed());
System.out.println("# Clones: " + clns.size());
id = 0;
for (Clone clone : clns) {
clone = clone.setId(id++);
System.out.println(clone.numberOfTargets());
System.out.println(clone.getCount());
System.out.println(clone.getFraction());
System.out.println(clone.getBestHit(GeneType.Variable).getAlignment(0).getAbsoluteMutations());
System.out.println(clone.getBestHit(GeneType.Joining).getAlignment(0).getAbsoluteMutations());
System.out.println();
// ActionExportClonesPretty.outputCompact(System.out, clone);
}
}
use of com.milaboratory.mixcr.assembler.CloneFactory in project mixcr by milaboratory.
the class FullSeqAssemblerTest method test1.
@Test
public void test1() throws Exception {
int len = 140;
PairedRead read1 = new PairedRead(new SingleReadImpl(0, new NSequenceWithQuality(masterSeq1WT.getRangeFromCDR3Begin(-20, len)), "R1"), new SingleReadImpl(0, new NSequenceWithQuality(masterSeq1WT.getRangeFromCDR3Begin(-200, len).getReverseComplement()), "R2"));
PairedRead read2 = new PairedRead(new SingleReadImpl(1, new NSequenceWithQuality(masterSeq1WT.getRangeFromCDR3Begin(-30, len)), "R1"), new SingleReadImpl(1, new NSequenceWithQuality(masterSeq1WT.getRangeFromCDR3Begin(-150, len).getReverseComplement()), "R2"));
RunMiXCR.RunMiXCRAnalysis params = new RunMiXCR.RunMiXCRAnalysis(read1, read2);
// [-200, -60] [-20, 120]
// [-150, 110]
//
// [-200, -150], [110, 120] = 60
// [-60, -20] = 40
params.alignerParameters = VDJCParametersPresets.getByName("rna-seq");
params.alignerParameters.setSaveOriginalReads(true);
params.cloneAssemblerParameters.updateFrom(params.alignerParameters);
RunMiXCR.AlignResult align = RunMiXCR.align(params);
// for (VDJCAlignments al : align.alignments) {
// for (int i = 0; i < al.numberOfTargets(); i++)
// System.out.println(VDJCAlignmentsFormatter.getTargetAsMultiAlignment(al, i));
// System.out.println();
// }
RunMiXCR.AssembleResult assemble = RunMiXCR.assemble(align);
CloneFactory cloneFactory = new CloneFactory(align.parameters.cloneAssemblerParameters.getCloneFactoryParameters(), align.parameters.cloneAssemblerParameters.getAssemblingFeatures(), align.usedGenes, align.parameters.alignerParameters.getFeaturesToAlignMap());
FullSeqAssembler agg = new FullSeqAssembler(cloneFactory, DEFAULT_PARAMETERS, assemble.cloneSet.get(0), align.parameters.alignerParameters);
PointSequence[] r2s = agg.toPointSequences(align.alignments.get(1));
TIntHashSet p2 = new TIntHashSet(Arrays.stream(r2s).mapToInt(s -> s.point).toArray());
Assert.assertEquals(261 - masterSeq1WT.cdr3Part, p2.size());
PointSequence[] r1s = agg.toPointSequences(align.alignments.get(0));
TIntHashSet p1 = new TIntHashSet(Arrays.stream(r1s).mapToInt(s -> s.point).toArray());
Assert.assertEquals(281 - masterSeq1WT.cdr3Part, p1.size());
FullSeqAssembler.RawVariantsData prep = agg.calculateRawData(() -> CUtils.asOutputPort(align.alignments));
long uniq1 = StreamSupport.stream(CUtils.it(prep.createPort()).spliterator(), false).mapToInt(l -> l[0]).filter(c -> c == 0xFFFFFFFF).count();
long uniq2 = StreamSupport.stream(CUtils.it(prep.createPort()).spliterator(), false).mapToInt(l -> l[1]).filter(c -> c == 0xFFFFFFFF).count();
Assert.assertEquals(40, uniq1);
Assert.assertEquals(60, uniq2);
for (Clone clone : new CloneSet(Arrays.asList(agg.callVariants(prep))).getClones()) {
ActionExportClonesPretty.outputCompact(System.out, clone);
System.out.println();
System.out.println(" ================================================ ");
System.out.println();
}
}
use of com.milaboratory.mixcr.assembler.CloneFactory in project mixcr by milaboratory.
the class FullSeqAssemblerTest method testLargeCloneNoMismatches.
@Test
public void testLargeCloneNoMismatches() throws Exception {
MasterSequence master = FullSeqAssemblerTest.masterSeq1WT;
NSequenceWithQuality seq = new NSequenceWithQuality(master.getRange(-master.vPart + 10, 80), SequenceQuality.GOOD_QUALITY_VALUE);
RunMiXCR.RunMiXCRAnalysis params0 = new RunMiXCR.RunMiXCRAnalysis(new SingleReadImpl(0, seq, ""));
params0.cloneAssemblerParameters.setAssemblingFeatures(new GeneFeature[] { GeneFeature.VDJRegion });
Clone largeClone = RunMiXCR.assemble(RunMiXCR.align(params0)).cloneSet.get(0);
// ActionExportClonesPretty.outputCompact(System.out, largeClone);
// System.exit(0);
Well44497b rnd = new Well44497b(1234567889L);
int nReads = 100_000;
int readLength = 75, readGap = 150;
// slice seq randomly
PairedRead[] slicedReads = new PairedRead[nReads];
for (int i = 0; i < nReads; ++i) {
int r1from = rnd.nextInt(seq.size() - readLength - 1), r1to = r1from + readLength, r2from = r1from + 1 + rnd.nextInt(seq.size() - r1from - readLength - 1), r2to = r2from + readLength;
assert r2from > r1from;
slicedReads[i] = new PairedRead(new SingleReadImpl(i, seq.getRange(r1from, r1to), "" + i), new SingleReadImpl(i, seq.getRange(r2from, r2to).getReverseComplement(), "" + i));
}
RunMiXCR.RunMiXCRAnalysis params = new RunMiXCR.RunMiXCRAnalysis(slicedReads);
// params.alignerParameters = VDJCParametersPresets.getByName("rna-seq");
params.alignerParameters.setSaveOriginalReads(true);
RunMiXCR.AlignResult align = RunMiXCR.align(params);
RunMiXCR.AssembleResult assemble = RunMiXCR.assemble(align);
// for (VDJCAlignments al : align.alignments) {
// for (int i = 0; i < al.numberOfTargets(); i++) {
// System.out.println(VDJCAlignmentsFormatter.getTargetAsMultiAlignment(al, i));
// System.out.println();
// }
// System.out.println();
// System.out.println(" ================================================ ");
// System.out.println();
// }
Assert.assertEquals(1, assemble.cloneSet.size());
Clone initialClone = assemble.cloneSet.get(0);
NSequenceWithQuality cdr3 = initialClone.getFeature(GeneFeature.CDR3);
List<VDJCAlignments> alignments = align.alignments.stream().filter(al -> cdr3.equals(al.getFeature(GeneFeature.CDR3))).collect(Collectors.toList());
alignments.stream().filter(al -> Arrays.stream(al.getBestHit(GeneType.Variable).getAlignments()).filter(Objects::nonNull).anyMatch(a -> !a.getAbsoluteMutations().isEmpty())).filter(al -> al.getBestHit(GeneType.Variable).getGene().getName().contains("3-74")).forEach(al -> {
for (int i = 0; i < al.numberOfTargets(); i++) {
System.out.println(VDJCAlignmentsFormatter.getTargetAsMultiAlignment(al, i));
System.out.println();
}
System.out.println();
System.out.println(" ================================================ ");
System.out.println();
});
// System.exit(0);
System.out.println("=> Agg");
CloneFactory cloneFactory = new CloneFactory(align.parameters.cloneAssemblerParameters.getCloneFactoryParameters(), align.parameters.cloneAssemblerParameters.getAssemblingFeatures(), align.usedGenes, align.parameters.alignerParameters.getFeaturesToAlignMap());
FullSeqAssembler agg = new FullSeqAssembler(cloneFactory, DEFAULT_PARAMETERS, initialClone, align.parameters.alignerParameters);
FullSeqAssembler.RawVariantsData prep = agg.calculateRawData(() -> CUtils.asOutputPort(alignments));
List<Clone> clones = new ArrayList<>(new CloneSet(Arrays.asList(agg.callVariants(prep))).getClones());
clones.sort(Comparator.comparingDouble(Clone::getCount).reversed());
for (Clone clone : clones) {
ActionExportClonesPretty.outputCompact(System.out, clone);
System.out.println();
System.out.println(" ================================================ ");
System.out.println();
}
}
Aggregations