use of io.repseq.core.VDJCGene in project mixcr by milaboratory.
the class TargetMerger method merge.
@SuppressWarnings("unchecked")
public AlignedTarget merge(AlignedTarget targetLeft, AlignedTarget targetRight, int offset, OverlapType overlapType, int nMismatches) {
if (offset < 0)
return merge(targetRight, targetLeft, -offset, overlapType, nMismatches);
final NSequenceWithQuality mergedTarget = merger.overlap(targetLeft.getTarget(), targetRight.getTarget(), offset);
EnumMap<GeneType, VDJCHit[]> result = new EnumMap<>(GeneType.class);
for (GeneType geneType : GeneType.VJC_REFERENCE) {
final BatchAlignerWithBaseParameters bp = ((KGeneAlignmentParameters) alignerParameters.getGeneAlignerParameters(geneType)).getParameters();
final VDJCHit[] leftHits = targetLeft.getAlignments().getHits(geneType);
final VDJCHit[] rightHits = targetRight.getAlignments().getHits(geneType);
GeneFeature alignedFeature = leftHits.length == 0 ? rightHits.length == 0 ? null : rightHits[0].getAlignedFeature() : leftHits[0].getAlignedFeature();
Map<VDJCGeneId, HitMappingRecord> map = extractHitsMapping(targetLeft, targetRight, geneType);
ArrayList<VDJCHit> resultingHits = new ArrayList<>();
for (Map.Entry<VDJCGeneId, HitMappingRecord> mE : map.entrySet()) {
final VDJCGene gene = mE.getValue().gene;
Alignment<NucleotideSequence> mergedAl = merge(bp.getScoring(), extractBandedWidth(bp), mergedTarget.getSequence(), offset, mE.getValue().alignments[0], mE.getValue().alignments[1]);
resultingHits.add(new VDJCHit(gene, mergedAl, alignedFeature));
}
Collections.sort(resultingHits);
// final float relativeMinScore = extractRelativeMinScore(bp);
// int threshold = (int) (resultingHits.size() > 0 ? resultingHits.get(0).getScore() * relativeMinScore : 0);
// for (int i = resultingHits.size() - 1; i > 0; --i)
// if (resultingHits.get(i).getScore() < threshold)
// resultingHits.remove(i);
result.put(geneType, resultingHits.toArray(new VDJCHit[resultingHits.size()]));
}
VDJCAlignments alignments = new VDJCAlignments(result, new NSequenceWithQuality[] { mergedTarget }, new SequenceHistory[] { new SequenceHistory.Merge(overlapType, targetLeft.getHistory(), targetRight.getHistory(), offset, nMismatches) }, VDJCAlignments.mergeOriginalReads(targetLeft.getAlignments(), targetRight.getAlignments()));
AlignedTarget resultTarget = new AlignedTarget(alignments, 0);
for (BPoint bPoint : BPoint.values()) {
int leftPoint = targetLeft.getBPoint(bPoint);
int rightPoint = targetRight.getBPoint(bPoint);
if (leftPoint != -1 && rightPoint != -1)
throw new IllegalArgumentException("Same bPoint defined in both input targets.");
else if (leftPoint != -1)
resultTarget = resultTarget.setBPoint(bPoint, leftPoint);
else if (rightPoint != -1)
resultTarget = resultTarget.setBPoint(bPoint, offset + rightPoint);
}
return resultTarget;
}
use of io.repseq.core.VDJCGene in project mixcr by milaboratory.
the class RunMiXCR method align.
public static AlignResult align(RunMiXCRAnalysis parameters) throws Exception {
VDJCAlignerParameters alignerParameters = parameters.alignerParameters;
VDJCAligner aligner = VDJCAligner.createAligner(alignerParameters, parameters.isInputPaired(), alignerParameters.getMergerParameters() != null);
List<VDJCGene> genes = new ArrayList<>();
for (VDJCGene gene : VDJCLibraryRegistry.getDefault().getLibrary(parameters.library, parameters.species).getGenes(parameters.chains)) if (alignerParameters.containsRequiredFeature(gene) && (gene.isFunctional() || !parameters.isFunctionalOnly)) {
genes.add(gene);
aligner.addGene(gene);
}
AlignerReport report = new AlignerReport();
aligner.setEventsListener(report);
try (SequenceReaderCloseable<? extends SequenceRead> reader = parameters.getReader()) {
// start progress reporting
if (reader instanceof CanReportProgress)
SmartProgressReporter.startProgressReport("align", (CanReportProgress) reader);
OutputPort<Chunk<SequenceRead>> mainInputReads = CUtils.buffered((OutputPort) chunked(reader, 64), 16);
OutputPort<VDJCAlignmentResult> alignments = unchunked(new ParallelProcessor(mainInputReads, chunked(aligner), parameters.threads));
List<VDJCAlignments> als = new ArrayList<>();
int ind = 0;
for (VDJCAlignmentResult t : CUtils.it(new OrderedOutputPort<>(alignments, new Indexer<VDJCAlignmentResult>() {
@Override
public long getIndex(VDJCAlignmentResult r) {
return r.read.getId();
}
}))) {
if (t.alignment != null) {
t.alignment.setAlignmentsIndex(ind++);
als.add(t.alignment);
}
}
return new AlignResult(parameters, reader.getNumberOfReads(), report, als, genes, aligner);
}
}
use of io.repseq.core.VDJCGene in project mixcr by milaboratory.
the class PartialAlignmentsAssemblerAlignerTest method basicTest1.
@Test
public void basicTest1() throws Exception {
Well44497b rg = new Well44497b(12312);
VDJCAlignerParameters rnaSeqParams = VDJCParametersPresets.getByName("rna-seq");
PartialAlignmentsAssemblerAligner aligner = new PartialAlignmentsAssemblerAligner(rnaSeqParams);
VDJCLibrary lib = VDJCLibraryRegistry.getDefault().getLibrary("default", "hs");
for (VDJCGene gene : VDJCLibraryRegistry.getDefault().getLibrary("default", "hs").getGenes()) if (gene.isFunctional())
aligner.addGene(gene);
TargetBuilder.VDJCGenes genes = new TargetBuilder.VDJCGenes(lib, "TRBV12-3*00", "TRBD1*00", "TRBJ1-3*00", "TRBC2*00");
// | 305
// 250V + 55CDR3 (20V 7N 10D 3N 15J) + 28J + 100C
NucleotideSequence baseSeq = TargetBuilder.generateSequence(genes, "{CDR3Begin(-250)}V*270 NNNNNNN {DBegin(0)}D*10 NNN {CDR3End(-15):FR4End} {CBegin}C*100", rg);
int len = 70;
NucleotideSequence seq1 = baseSeq.getRange(0, len);
NucleotideSequence seq2 = baseSeq.getRange(245, 245 + len);
NucleotideSequence seq3 = baseSeq.getRange(320, 320 + len);
VDJCAlignmentResult<VDJCMultiRead> alignment = aligner.process(MiXCRTestUtils.createMultiRead(seq1, seq2, seq3));
VDJCAlignments al = alignment.alignment;
Assert.assertNotNull(al);
assertInHits(genes.v, al);
assertInHits(genes.d, al);
assertInHits(genes.j, al);
assertInHits(genes.c, al);
VDJCHit bestV = al.getBestHit(GeneType.Variable);
VDJCHit bestD = al.getBestHit(GeneType.Diversity);
VDJCHit bestJ = al.getBestHit(GeneType.Joining);
VDJCHit bestC = al.getBestHit(GeneType.Constant);
Assert.assertNotNull(bestV.getAlignment(0));
Assert.assertNotNull(bestV.getAlignment(1));
Assert.assertNull(bestV.getAlignment(2));
Assert.assertNull(bestD.getAlignment(0));
Assert.assertNotNull(bestD.getAlignment(1));
Assert.assertNull(bestD.getAlignment(2));
Assert.assertNull(bestJ.getAlignment(0));
Assert.assertNotNull(bestJ.getAlignment(1));
Assert.assertNotNull(bestJ.getAlignment(2));
Assert.assertNull(bestC.getAlignment(0));
Assert.assertNull(bestC.getAlignment(1));
Assert.assertNotNull(bestC.getAlignment(2));
}
use of io.repseq.core.VDJCGene in project mixcr by milaboratory.
the class VDJCAlignerAbstract method init.
@Override
protected void init() {
DAlignerParameters dAlignerParameters = parameters.getDAlignerParameters();
List<VDJCGene> dGenes = genesToAlign.get(GeneType.Diversity);
if (dAlignerParameters != null && dGenes.size() != 0)
singleDAligner = new SingleDAligner(dAlignerParameters, genesToAlign.get(GeneType.Diversity));
vAligner = createKAligner(GeneType.Variable);
jAligner = createKAligner(GeneType.Joining);
cAligner = createKAligner(GeneType.Constant);
Chains chains = new Chains();
for (VDJCGene gene : getUsedGenes()) chains = chains.merge(gene.getChains());
filters = new EnumMap<>(GeneType.class);
for (GeneType geneType : GeneType.VJC_REFERENCE) {
HashMap<String, BitArray> f = new HashMap<>();
for (final String chain : chains) {
BatchAlignerWithBaseWithFilter<NucleotideSequence, VDJCGene, AlignmentHit<NucleotideSequence, VDJCGene>> aligner = getAligner(geneType);
if (aligner != null)
f.put(chain, aligner.createFilter(new Filter<VDJCGene>() {
@Override
public boolean accept(VDJCGene object) {
return object.getChains().contains(chain);
}
}));
}
filters.put(geneType, f);
}
}
use of io.repseq.core.VDJCGene in project repseqio by repseqio.
the class CompileAction method compile.
public static void compile(Path source, Path destination, int surroundings) throws IOException {
VDJCLibraryRegistry.resetDefaultRegistry();
VDJCLibraryRegistry reg = VDJCLibraryRegistry.getDefault();
reg.registerLibraries(source, "lib");
List<VDJCLibraryData> result = new ArrayList<>();
for (VDJCLibrary lib : reg.getLoadedLibraries()) {
VDJCDataUtils.FragmentsBuilder fragmentsBuilder = new VDJCDataUtils.FragmentsBuilder();
for (KnownSequenceFragmentData fragment : lib.getData().getSequenceFragments()) fragmentsBuilder.addRegion(fragment);
for (VDJCGene gene : lib.getGenes()) {
if (!gene.getData().getBaseSequence().isPureOriginalSequence())
throw new IllegalArgumentException("Don't support mutated sequences yet.");
URI uri = gene.getData().getBaseSequence().getOrigin();
Range region = gene.getPartitioning().getContainingRegion();
region = region.expand(surroundings);
NucleotideSequence seq;
try {
seq = gene.getSequenceProvider().getRegion(region);
} catch (SequenceProviderIndexOutOfBoundsException e) {
region = e.getAvailableRange();
if (region == null)
throw new IllegalArgumentException("Wrong anchor points for " + gene.getName() + " ?");
seq = gene.getSequenceProvider().getRegion(region);
}
fragmentsBuilder.addRegion(uri, region, seq);
}
result.add(new VDJCLibraryData(lib.getTaxonId(), lib.getData().getSpeciesNames(), lib.getData().getGenes(), lib.getData().getMeta(), fragmentsBuilder.getFragments()));
}
VDJCDataUtils.writeToFile(result, destination, true);
log.info("{} compiled successfully.", source);
}
Aggregations