use of com.milaboratory.mixcr.basictypes.VDJCAlignments in project mixcr by milaboratory.
the class ActionAlign method go.
@Override
@SuppressWarnings("unchecked")
public void go(ActionHelper helper) throws Exception {
// FIXME remove in 2.2
if (actionParameters.printNonFunctionalWarnings())
System.out.println("WARNING: -wf / --non-functional-warnings option is deprecated, will be removed in 2.2 " + "release. Use --verbose instead.");
// Saving initial timestamp
long beginTimestamp = System.currentTimeMillis();
// Getting aligner parameters
VDJCAlignerParameters alignerParameters = actionParameters.getAlignerParameters();
// FIXME remove in 2.3
if (actionParameters.getSaveOriginalReads()) {
System.out.println("WARNING: -g / --save-reads option is deprecated, will be removed in 2.3 " + "release. Use -OsaveOriginalReads=true.");
alignerParameters.setSaveOriginalReads(true);
}
// FIXME remove in 2.3
if (actionParameters.getSaveReadDescription()) {
System.out.println("WARNING: -a / --save-description option is deprecated, will be removed in 2.3 " + "release. Use -OsaveOriginalReads=true.");
alignerParameters.setSaveOriginalReads(true);
}
if (!actionParameters.overrides.isEmpty()) {
// Perform parameters overriding
alignerParameters = JsonOverrider.override(alignerParameters, VDJCAlignerParameters.class, actionParameters.overrides);
if (alignerParameters == null)
throw new ProcessException("Failed to override some parameter.");
}
// FIXME remove in 2.2
if (actionParameters.allowDifferentVJLoci != null && actionParameters.allowDifferentVJLoci) {
System.out.println("Warning: usage of --diff-loci is deprecated. Use -OallowChimeras=true instead.");
alignerParameters.setAllowChimeras(true);
}
// Creating aligner
VDJCAligner aligner = VDJCAligner.createAligner(alignerParameters, actionParameters.isInputPaired(), !actionParameters.getNoMerge());
// Detect if automatic featureToAlign correction is required
int totalV = 0, totalVErrors = 0, hasVRegion = 0;
GeneFeature correctingFeature = alignerParameters.getVAlignerParameters().getGeneFeatureToAlign().hasReversedRegions() ? GeneFeature.VRegionWithP : GeneFeature.VRegion;
VDJCLibrary library = VDJCLibraryRegistry.getDefault().getLibrary(actionParameters.library, actionParameters.species);
System.out.println("Reference library: " + library.getLibraryId());
// Printing library level warnings, if specified for the library
if (!library.getWarnings().isEmpty()) {
System.out.println("Library warnings:");
for (String l : library.getWarnings()) System.out.println(l);
}
// Printing citation notice, if specified for the library
if (!library.getCitations().isEmpty()) {
System.out.println("Please cite:");
for (String l : library.getCitations()) System.out.println(l);
}
for (VDJCGene gene : library.getGenes(actionParameters.getChains())) {
if (gene.getGeneType() == GeneType.Variable) {
totalV++;
if (!alignerParameters.containsRequiredFeature(gene)) {
totalVErrors++;
if (gene.getPartitioning().isAvailable(correctingFeature))
hasVRegion++;
}
}
}
// Performing V featureToAlign correction if needed
if (totalVErrors > totalV * 0.9 && hasVRegion > totalVErrors * 0.8) {
System.out.println("WARNING: forcing -OvParameters.geneFeatureToAlign=" + GeneFeature.encode(correctingFeature) + " since current gene feature (" + GeneFeature.encode(alignerParameters.getVAlignerParameters().getGeneFeatureToAlign()) + ") is absent in " + Util.PERCENT_FORMAT.format(100.0 * totalVErrors / totalV) + "% of V genes.");
alignerParameters.getVAlignerParameters().setGeneFeatureToAlign(correctingFeature);
}
int numberOfExcludedNFGenes = 0;
int numberOfExcludedFGenes = 0;
for (VDJCGene gene : library.getGenes(actionParameters.getChains())) {
NucleotideSequence featureSequence = alignerParameters.extractFeatureToAlign(gene);
// exclusionReason is null ==> gene is not excluded
String exclusionReason = null;
if (featureSequence == null)
exclusionReason = "absent " + GeneFeature.encode(alignerParameters.getFeatureToAlign(gene.getGeneType()));
else if (featureSequence.containsWildcards())
exclusionReason = "wildcard symbols in " + GeneFeature.encode(alignerParameters.getFeatureToAlign(gene.getGeneType()));
if (exclusionReason == null)
// If there are no reasons to exclude the gene, adding it to aligner
aligner.addGene(gene);
else {
if (gene.isFunctional()) {
++numberOfExcludedFGenes;
if (actionParameters.verbose())
System.out.println("WARNING: Functional gene " + gene.getName() + " excluded due to " + exclusionReason);
} else
++numberOfExcludedNFGenes;
}
}
if (actionParameters.printWarnings() && numberOfExcludedFGenes > 0)
System.out.println("WARNING: " + numberOfExcludedFGenes + " functional genes were excluded, re-run " + "with --verbose option to see the list of excluded genes and exclusion reason.");
if (actionParameters.verbose() && numberOfExcludedNFGenes > 0)
System.out.println("WARNING: " + numberOfExcludedNFGenes + " non-functional genes excluded.");
if (aligner.getVGenesToAlign().isEmpty())
throw new ProcessException("No V genes to align. Aborting execution. See warnings for more info " + "(turn on verbose warnings by adding --verbose option).");
if (aligner.getJGenesToAlign().isEmpty())
throw new ProcessException("No J genes to align. Aborting execution. See warnings for more info " + "(turn on verbose warnings by adding --verbose option).");
AlignerReport report = new AlignerReport();
report.setStartMillis(beginTimestamp);
report.setInputFiles(actionParameters.getInputsForReport());
report.setOutputFiles(actionParameters.getOutputsForReport());
report.setCommandLine(helper.getCommandLineArguments());
// Attaching report to aligner
aligner.setEventsListener(report);
try (SequenceReaderCloseable<? extends SequenceRead> reader = actionParameters.createReader();
VDJCAlignmentsWriter writer = actionParameters.getOutputName().equals(".") ? null : new VDJCAlignmentsWriter(actionParameters.getOutputName());
SequenceWriter notAlignedWriter = actionParameters.failedReadsR1 == null ? null : (actionParameters.isInputPaired() ? new PairedFastqWriter(actionParameters.failedReadsR1, actionParameters.failedReadsR2) : new SingleFastqWriter(actionParameters.failedReadsR1))) {
if (writer != null)
writer.header(aligner);
OutputPort<? extends SequenceRead> sReads = reader;
CanReportProgress progress = (CanReportProgress) reader;
if (actionParameters.limit != 0) {
sReads = new CountLimitingOutputPort<>(sReads, actionParameters.limit);
progress = SmartProgressReporter.extractProgress((CountLimitingOutputPort<?>) sReads);
}
final boolean writeAllResults = actionParameters.getWriteAllResults();
EnumMap<GeneType, VDJCHit[]> emptyHits = new EnumMap<>(GeneType.class);
for (GeneType gt : GeneType.values()) if (alignerParameters.getGeneAlignerParameters(gt) != null)
emptyHits.put(gt, new VDJCHit[0]);
final PairedEndReadsLayout readsLayout = alignerParameters.getReadsLayout();
SmartProgressReporter.startProgressReport("Alignment", progress);
OutputPort<Chunk<? extends SequenceRead>> mainInputReads = CUtils.buffered((OutputPort) chunked(sReads, 64), 16);
OutputPort<VDJCAlignmentResult> alignments = unchunked(new ParallelProcessor(mainInputReads, chunked(aligner), actionParameters.threads));
for (VDJCAlignmentResult result : CUtils.it(new OrderedOutputPort<>(alignments, new Indexer<VDJCAlignmentResult>() {
@Override
public long getIndex(VDJCAlignmentResult o) {
return o.read.getId();
}
}))) {
VDJCAlignments alignment = result.alignment;
SequenceRead read = result.read;
if (alignment == null) {
if (writeAllResults) // Creating empty alignment object if alignment for current read failed
{
Target target = readsLayout.createTargets(read)[0];
alignment = new VDJCAlignments(emptyHits, target.targets, SequenceHistory.RawSequence.of(read.getId(), target), alignerParameters.isSaveOriginalReads() ? new SequenceRead[] { read } : null);
} else {
if (notAlignedWriter != null)
notAlignedWriter.write(result.read);
continue;
}
}
if (alignment.isChimera())
report.onChimera();
if (writer != null)
writer.write(alignment);
}
if (writer != null)
writer.setNumberOfProcessedReads(reader.getNumberOfReads());
}
report.setFinishMillis(System.currentTimeMillis());
// Writing report to stout
System.out.println("============= Report ==============");
Util.writeReportToStdout(report);
if (actionParameters.report != null)
Util.writeReport(actionParameters.report, report);
if (actionParameters.jsonReport != null)
Util.writeJsonReport(actionParameters.jsonReport, report);
}
use of com.milaboratory.mixcr.basictypes.VDJCAlignments in project mixcr by milaboratory.
the class ActionExportAlignments method go0.
@Override
public void go0() throws Exception {
try (VDJCAlignmentsReader reader = new VDJCAlignmentsReader(parameters.getInputFile(), VDJCLibraryRegistry.getDefault());
InfoWriter<VDJCAlignments> writer = new InfoWriter<>(parameters.getOutputFile())) {
SmartProgressReporter.startProgressReport("Exporting alignments", reader, System.err);
writer.attachInfoProviders((List) parameters.exporters);
writer.ensureHeader();
VDJCAlignments alignments;
long count = 0;
long limit = parameters.getLimit();
OutputPort<VDJCAlignments> alignmentsPort = new FilteringPort<>(reader, parameters.getFilter());
while ((alignments = alignmentsPort.take()) != null && count < limit) {
writer.put(alignments);
++count;
}
}
}
use of com.milaboratory.mixcr.basictypes.VDJCAlignments in project mixcr by milaboratory.
the class FullSeqAssembler method calculateRawData.
public RawVariantsData calculateRawData(Supplier<OutputPort<VDJCAlignments>> alignments) {
if (!sequenceToVariantId.isEmpty())
throw new IllegalStateException();
for (byte letter = 0; letter < NucleotideSequence.ALPHABET.basicSize(); letter++) {
NucleotideSequence seq = new NucleotideSequence(new byte[] { letter });
sequenceToVariantId.put(seq, letter);
variantIdToSequence.put(letter, seq);
}
sequenceToVariantId.put(NucleotideSequence.EMPTY, NucleotideSequence.ALPHABET.basicSize());
variantIdToSequence.put(NucleotideSequence.ALPHABET.basicSize(), NucleotideSequence.EMPTY);
TIntIntHashMap coverage = new TIntIntHashMap();
TIntObjectHashMap<TIntObjectHashMap<VariantAggregator>> variants = new TIntObjectHashMap<>();
int nAlignments = 0;
for (VDJCAlignments al : CUtils.it(alignments.get())) {
++nAlignments;
for (PointSequence point : toPointSequences(al)) {
int seqIndex = getVariantIndex(point.sequence.getSequence());
coverage.adjustOrPutValue(point.point, 1, 1);
TIntObjectHashMap<VariantAggregator> map = variants.get(point.point);
if (map == null)
variants.put(point.point, map = new TIntObjectHashMap<>());
VariantAggregator var = map.get(seqIndex);
if (var == null)
map.put(point.point, var = new VariantAggregator());
var.count += 1;
var.sumQuality += 0x7F & point.quality;
}
}
assert nAlignments > 0;
long[] forSort = new long[coverage.size()];
TIntIntIterator iterator = coverage.iterator();
int i = 0;
while (iterator.hasNext()) {
iterator.advance();
forSort[i++] = -((((long) iterator.value()) << 32) | iterator.key());
}
Arrays.sort(forSort);
int[] pointsArray = Arrays.stream(forSort).mapToInt(l -> (int) (-l)).toArray();
TIntIntHashMap revIndex = new TIntIntHashMap();
for (int j = 0; j < pointsArray.length; j++) revIndex.put(pointsArray[j], j);
int[] coverageArray = Arrays.stream(forSort).mapToInt(l -> (int) ((-l) >> 32)).toArray();
int[][] packedData = new int[pointsArray.length][nAlignments];
for (int[] aPackedData : packedData) Arrays.fill(aPackedData, -1);
i = 0;
for (VDJCAlignments al : CUtils.it(alignments.get())) {
for (PointSequence point : toPointSequences(al)) {
int pointIndex = revIndex.get(point.point);
packedData[pointIndex][i] = (sequenceToVariantId.get(point.sequence.getSequence()) << 8) | (0xFF & point.quality);
}
i++;
}
return new RawVariantsData(nAlignments, pointsArray, coverageArray) {
@Override
OutputPort<int[]> createPort() {
return CUtils.asOutputPort(Arrays.asList(packedData));
}
};
}
use of com.milaboratory.mixcr.basictypes.VDJCAlignments in project mixcr by milaboratory.
the class VDJCObjectExtenderTest method testTripleRead.
@Test
public void testTripleRead() throws Exception {
final boolean print = true;
Well44497b rg = new Well44497b(12312);
final VDJCAlignerParameters rnaSeqParams = VDJCParametersPresets.getByName("rna-seq");
final PartialAlignmentsAssemblerAligner aligner = new PartialAlignmentsAssemblerAligner(rnaSeqParams);
final VDJCLibrary lib = VDJCLibraryRegistry.getDefault().getLibrary("default", "hs");
for (VDJCGene gene : VDJCLibraryRegistry.getDefault().getLibrary("default", "hs").getGenes()) if (gene.isFunctional())
aligner.addGene(gene);
TargetBuilder.VDJCGenes genes = new TargetBuilder.VDJCGenes(lib, "TRBV12-3*00", "TRBD1*00", "TRBJ1-3*00", "TRBC2*00");
// | 310 | 338 | 438
// 250V + 60CDR3 (20V 7N 10D 3N 20J) + 28J + 100C + 100N
final NucleotideSequence baseSeq = TargetBuilder.generateSequence(genes, "{CDR3Begin(-250)}V*270 NNNNNNN {DBegin(0)}D*10 NNN {CDR3End(-20):FR4End} {CBegin}C*100 N*100", rg);
F4 goAssert = new F4() {
@Override
public VDJCAlignments go(int len, int offset1, int offset2, int offset3) {
NucleotideSequence seq1 = baseSeq.getRange(offset1, offset1 + len);
NucleotideSequence seq2 = baseSeq.getRange(offset2, offset2 + len);
NucleotideSequence seq3 = offset3 == -1 ? null : baseSeq.getRange(offset3, offset3 + len);
VDJCAlignmentResult<VDJCMultiRead> alignment = offset3 == -1 ? aligner.process(MiXCRTestUtils.createMultiRead(seq1, seq2)) : aligner.process(MiXCRTestUtils.createMultiRead(seq1, seq2, seq3));
VDJCAlignments al = alignment.alignment;
Assert.assertNotNull(al);
VDJCObjectExtender<VDJCAlignments> extender = new VDJCObjectExtender<>(Chains.TCR, (byte) 35, rnaSeqParams.getVAlignerParameters().getParameters().getScoring(), rnaSeqParams.getJAlignerParameters().getParameters().getScoring(), 100, 70, ReferencePoint.CDR3Begin, ReferencePoint.CDR3End);
MiXCRTestUtils.assertAlignments(al);
if (print) {
MiXCRTestUtils.printAlignment(al);
System.out.println();
System.out.println("-------------------------------------------");
System.out.println();
}
VDJCAlignments processed = extender.process(al);
if (print) {
MiXCRTestUtils.printAlignment(processed);
System.out.println();
System.out.println("===========================================");
System.out.println();
System.out.println();
}
MiXCRTestUtils.assertAlignments(processed);
Assert.assertEquals(al.getFeature(GeneFeature.VDJunction), processed.getFeature(GeneFeature.VDJunction));
Assert.assertEquals(al.getFeature(GeneFeature.DJJunction), processed.getFeature(GeneFeature.DJJunction));
Assert.assertEquals(al.getFeature(GeneFeature.VJJunction), processed.getFeature(GeneFeature.VJJunction));
return processed;
}
};
VDJCAlignments a1 = goAssert.go(60, 245, 307, 450);
Assert.assertEquals(2, a1.numberOfTargets());
VDJCAlignments a2 = goAssert.go(60, 245, 315, 450);
Assert.assertEquals(3, a2.numberOfTargets());
VDJCAlignments a3 = goAssert.go(60, 245, 315, -1);
Assert.assertEquals(2, a3.numberOfTargets());
VDJCAlignments a4 = goAssert.go(60, 245, 307, -1);
Assert.assertEquals(1, a4.numberOfTargets());
VDJCAlignments a5 = goAssert.go(53, 252, 307, -1);
Assert.assertEquals(1, a5.numberOfTargets());
VDJCAlignments a6 = goAssert.go(53, 252, 307, 450);
Assert.assertEquals(2, a6.numberOfTargets());
}
use of com.milaboratory.mixcr.basictypes.VDJCAlignments in project mixcr by milaboratory.
the class VDJCAlignerPVFirstTest method test2.
@Test
@Ignore
public void test2() throws Exception {
PairedRead read1 = new PairedRead(new SingleReadImpl(0, new NSequenceWithQuality(new NucleotideSequence("GCTGTGTATTACTGTGCAAGAGGGCCCCAAGAAAATAGTGGTTATTACTACGGGTTTGACTACTGGGGCCAGGGA"), SequenceQuality.GOOD_QUALITY_VALUE), "206"), new SingleReadImpl(0, new NSequenceWithQuality(new NucleotideSequence("GGCGCCAGGGGGAAGACCGATGGGCCCTTGGTGGAGGCTGAGGAGACGGTGACCAGGGTTCCCTGGCCCCAGTAG"), SequenceQuality.GOOD_QUALITY_VALUE), "206"));
PairedRead read2 = new PairedRead(new SingleReadImpl(1, new NSequenceWithQuality(new NucleotideSequence("GCTGTGTATTACTGTGCAAGAGGGCCCCAAGAAAATAGTGGTTATTACTACGGGTTTGACTACTGGGGCCAGGGA"), SequenceQuality.GOOD_QUALITY_VALUE), "11621"), new SingleReadImpl(1, new NSequenceWithQuality(new NucleotideSequence("GGCGCCAGGGGGAAGACCGATGGGCCCTTGGTGGAGGCTGAGGAGACGGTGACCAGGGTTCCCTGGCCCCAGTAG"), SequenceQuality.GOOD_QUALITY_VALUE), "11621"));
RunMiXCR.RunMiXCRAnalysis params = new RunMiXCR.RunMiXCRAnalysis(read1);
RunMiXCR.AlignResult align = RunMiXCR.align(params);
for (VDJCAlignments al : align.alignments) {
for (int i = 0; i < al.numberOfTargets(); i++) {
System.out.println(VDJCAlignmentsFormatter.getTargetAsMultiAlignment(al, i));
System.out.println();
}
System.out.println();
System.out.println(" ================================================ ");
System.out.println();
}
}
Aggregations