use of io.repseq.core.VDJCGene in project mixcr by milaboratory.
the class VDJCAlignerWithMergeTest method test1.
@Test
public void test1() throws Exception {
VDJCAlignerParameters parameters = VDJCParametersPresets.getByName("default");
ByteArrayOutputStream bos = new ByteArrayOutputStream();
List<VDJCAlignments> alignemntsList = new ArrayList<>();
int header;
int total = 0;
int leftHit = 0;
try (PairedFastqReader reader = new PairedFastqReader(VDJCAlignerSTest.class.getClassLoader().getResourceAsStream("sequences/sample_IGH_R1.fastq"), VDJCAlignerSTest.class.getClassLoader().getResourceAsStream("sequences/sample_IGH_R2.fastq"), true)) {
VDJCAlignerWithMerge aligner = new VDJCAlignerWithMerge(parameters);
for (VDJCGene gene : VDJCLibraryRegistry.getDefault().getLibrary("default", "hs").getGenes(Chains.IGH)) {
if (parameters.containsRequiredFeature(gene))
aligner.addGene(gene);
}
for (PairedRead read : CUtils.it(reader)) {
++total;
VDJCAlignmentResult<PairedRead> result = aligner.process(read);
if (result.alignment != null) {
alignemntsList.add(result.alignment);
for (VDJCHit hit : result.alignment.getHits(GeneType.Variable)) if (hit.getAlignment(0) != null && hit.getAlignment(1) != null)
++leftHit;
}
}
}
// for (VDJCAlignments alignments : alignemntsList) {
// for (int i = 0; i < alignments.numberOfTargets(); i++) {
// System.out.println(VDJCAlignmentsFormatter.getTargetAsMultiAlignment(alignments, i));
// }
// }
System.out.println(alignemntsList.size());
System.out.println(total);
System.out.println(leftHit);
Assert.assertTrue(alignemntsList.size() > 10);
// System.out.println("Bytes per alignment: " + (bos.size() - header) / alignemntsList.size());
//
// try (VDJCAlignmentsReader reader = new VDJCAlignmentsReader(new ByteArrayInputStream(bos.toByteArray()), ll)) {
// int i = 0;
// for (VDJCAlignments alignments : CUtils.it(reader))
// Assert.assertEquals(alignemntsList.get(i++), alignments);
// }
}
use of io.repseq.core.VDJCGene in project mixcr by milaboratory.
the class AlignedTarget method orderTargets.
public static List<AlignedTarget> orderTargets(List<AlignedTarget> targets) {
// Selecting best gene by total score
final EnumMap<GeneType, VDJCGene> bestGenes = new EnumMap<>(GeneType.class);
for (GeneType geneType : GeneType.VDJC_REFERENCE) {
TObjectLongMap<VDJCGene> scores = new TObjectLongHashMap<>();
for (AlignedTarget target : targets) {
for (VDJCHit hit : target.getAlignments().getHits(geneType)) {
Alignment<NucleotideSequence> alignment = hit.getAlignment(target.getTargetId());
if (alignment != null)
scores.adjustOrPutValue(hit.getGene(), (long) alignment.getScore(), (long) alignment.getScore());
}
}
VDJCGene bestGene = null;
long bestScore = Long.MIN_VALUE;
TObjectLongIterator<VDJCGene> it = scores.iterator();
while (it.hasNext()) {
it.advance();
if (bestScore < it.value()) {
bestScore = it.value();
bestGene = it.key();
}
}
if (bestGene != null)
bestGenes.put(geneType, bestGene);
}
// Class to facilitate comparison between targets
final class Wrapper implements Comparable<Wrapper> {
final AlignedTarget target;
final EnumMap<GeneType, Alignment<NucleotideSequence>> alignments = new EnumMap<>(GeneType.class);
Wrapper(AlignedTarget target) {
this.target = target;
for (VDJCGene gene : bestGenes.values()) for (VDJCHit hit : target.getAlignments().getHits(gene.getGeneType())) if (hit.getGene() == gene) {
Alignment<NucleotideSequence> alignment = hit.getAlignment(target.targetId);
if (alignment != null) {
alignments.put(gene.getGeneType(), alignment);
break;
}
}
}
GeneType firstAlignedGeneType() {
for (GeneType geneType : GeneType.VDJC_REFERENCE) if (alignments.containsKey(geneType))
return geneType;
return null;
}
@Override
public int compareTo(Wrapper o) {
GeneType thisFirstGene = firstAlignedGeneType();
GeneType otherFirstGene = o.firstAlignedGeneType();
int cmp = Byte.compare(thisFirstGene.getOrder(), otherFirstGene.getOrder());
return cmp != 0 ? cmp : Integer.compare(alignments.get(thisFirstGene).getSequence1Range().getLower(), o.alignments.get(thisFirstGene).getSequence1Range().getLower());
}
}
// Creating wrappers and sorting list
List<Wrapper> wrappers = new ArrayList<>(targets.size());
for (AlignedTarget target : targets) {
Wrapper wrapper = new Wrapper(target);
if (wrapper.firstAlignedGeneType() == null)
continue;
wrappers.add(wrapper);
}
Collections.sort(wrappers);
// Creating result
List<AlignedTarget> result = new ArrayList<>(wrappers.size());
for (Wrapper wrapper : wrappers) result.add(wrapper.target);
return result;
}
use of io.repseq.core.VDJCGene in project mixcr by milaboratory.
the class PartialAlignmentsAssemblerAligner method process0.
@Override
@SuppressWarnings("unchecked")
protected VDJCAlignmentResult<VDJCMultiRead> process0(VDJCMultiRead input) {
final int nReads = input.numberOfReads();
EnumMap<GeneType, VDJCHit[]> vdjcHits = new EnumMap<>(GeneType.class);
NSequenceWithQuality[] targets = new NSequenceWithQuality[nReads];
Chains currentChains = Chains.ALL;
// Across all gene types
int lastAlignedTarget = 0;
int firstJTarget = -1;
int lastVTarget = -1;
for (int g = 0; g < GeneType.VJC_REFERENCE.length; g++) {
GeneType gt = GeneType.VJC_REFERENCE[g];
AlignmentHit<NucleotideSequence, VDJCGene>[][] alignmentHits = new AlignmentHit[nReads][];
Arrays.fill(alignmentHits, new AlignmentHit[0]);
for (int targetId = lastAlignedTarget; targetId < nReads; targetId++) {
targets[targetId] = input.getRead(targetId).getData();
final NucleotideSequence sequence = input.getRead(targetId).getData().getSequence();
AlignmentResult<AlignmentHit<NucleotideSequence, VDJCGene>> als;
final BatchAlignerWithBaseWithFilter<NucleotideSequence, VDJCGene, AlignmentHit<NucleotideSequence, VDJCGene>> aligner = getAligner(gt);
if (aligner != null) {
int pointer = 0;
if (g != 0) {
// Not V gene
VDJCHit[] vdjcHits1 = vdjcHits.get(GeneType.VJC_REFERENCE[g - 1]);
Alignment<NucleotideSequence> alignment;
if (vdjcHits1.length != 0 && (alignment = vdjcHits1[0].getAlignment(targetId)) != null)
pointer = alignment.getSequence2Range().getTo();
}
als = aligner.align(sequence, pointer, sequence.size(), getFilter(gt, currentChains));
if (als != null && als.hasHits()) {
lastAlignedTarget = targetId;
if (// V
g == 0)
lastVTarget = targetId;
if (// J
g == 1)
firstJTarget = targetId;
alignmentHits[targetId] = als.getHits().toArray(new AlignmentHit[als.getHits().size()]);
}
}
}
Chains chains = Chains.EMPTY;
for (AlignmentHit<NucleotideSequence, VDJCGene>[] alignmentHit0 : alignmentHits) if (alignmentHit0 != null)
for (AlignmentHit<NucleotideSequence, VDJCGene> hit : alignmentHit0) chains = chains.merge(hit.getRecordPayload().getChains());
currentChains = currentChains.intersection(chains);
vdjcHits.put(gt, combine(parameters.getFeatureToAlign(gt), alignmentHits));
}
boolean fineVAlignmentPerformed = false, fineJAlignmentPerformed = false;
// Additional (fine) alignment step for V gene
VDJCHit[] vHits = vdjcHits.get(GeneType.Variable);
final AlignmentScoring<NucleotideSequence> vScoring = parameters.getVAlignerParameters().getParameters().getScoring();
if (// TODO implement AffineGapAlignmentScoring
vHits != null && vHits.length > 0 && !(vScoring instanceof AffineGapAlignmentScoring) && vdjcHits.get(GeneType.Joining) != null && vdjcHits.get(GeneType.Joining).length > 0) {
int minimalVSpace = getAbsoluteMinScore(parameters.getVAlignerParameters().getParameters()) / vScoring.getMaximalMatchScore();
// Assert
if (firstJTarget == -1)
throw new AssertionError();
for (int targetId = 1; targetId <= firstJTarget; targetId++) {
int vSpace;
final NucleotideSequence sequence2 = targets[targetId].getSequence();
if (vdjcHits.get(GeneType.Joining)[0].getAlignment(targetId) != null && (vSpace = vdjcHits.get(GeneType.Joining)[0].getAlignment(targetId).getSequence2Range().getFrom()) >= minimalVSpace) {
for (int vHitIndex = 0; vHitIndex < vHits.length; vHitIndex++) {
VDJCHit vHit = vHits[vHitIndex];
// Perform fine alignment only if target is not already aligned by fast aligner
if (vHit.getAlignment(targetId) != null)
continue;
Alignment<NucleotideSequence> leftAlignment = vHit.getAlignment(targetId - 1);
if (leftAlignment == null)
continue;
final NucleotideSequence sequence1 = leftAlignment.getSequence1();
final int beginFR3 = vHit.getGene().getPartitioning().getRelativePosition(parameters.getFeatureToAlign(GeneType.Variable), ReferencePoint.FR3Begin);
if (beginFR3 == -1)
continue;
final Alignment alignment = AlignerCustom.alignLinearSemiLocalLeft0((LinearGapAlignmentScoring<NucleotideSequence>) vScoring, sequence1, sequence2, beginFR3, sequence1.size() - beginFR3, 0, vSpace, false, true, NucleotideSequence.ALPHABET, linearMatrixCache.get());
if (alignment.getScore() < getAbsoluteMinScore(parameters.getVAlignerParameters().getParameters()))
continue;
fineVAlignmentPerformed = true;
vHits[vHitIndex] = vHit.setAlignment(targetId, alignment);
}
}
}
}
Arrays.sort(vHits);
vdjcHits.put(GeneType.Variable, cutRelativeScore(vHits, parameters.getVAlignerParameters().getRelativeMinScore(), parameters.getVAlignerParameters().getParameters().getMaxHits()));
// Additional (fine) alignment step for J gene
VDJCHit[] jHits = vdjcHits.get(GeneType.Joining);
final AlignmentScoring<NucleotideSequence> jScoring = parameters.getJAlignerParameters().getParameters().getScoring();
if (// TODO implement AffineGapAlignmentScoring
jHits != null && jHits.length > 0 && !(jScoring instanceof AffineGapAlignmentScoring) && vdjcHits.get(GeneType.Variable) != null && vdjcHits.get(GeneType.Variable).length > 0) {
int minimalJSpace = getAbsoluteMinScore(parameters.getJAlignerParameters().getParameters()) / jScoring.getMaximalMatchScore();
// Assert
if (lastVTarget == -1)
throw new AssertionError();
for (int targetId = lastVTarget; targetId < nReads - 1; targetId++) {
int jSpaceBegin;
final NucleotideSequence sequence2 = targets[targetId].getSequence();
if (vdjcHits.get(GeneType.Variable)[0].getAlignment(targetId) != null && (sequence2.size() - (jSpaceBegin = vdjcHits.get(GeneType.Variable)[0].getAlignment(targetId).getSequence2Range().getTo())) >= minimalJSpace) {
for (int jHitIndex = 0; jHitIndex < jHits.length; jHitIndex++) {
VDJCHit jHit = jHits[jHitIndex];
// Perform fine alignment only if target is not already aligned by fast aligner
if (jHit.getAlignment(targetId) != null)
continue;
Alignment<NucleotideSequence> rightAlignment = jHit.getAlignment(targetId + 1);
if (rightAlignment == null)
continue;
final NucleotideSequence sequence1 = rightAlignment.getSequence1();
final Alignment alignment = AlignerCustom.alignLinearSemiLocalRight0((LinearGapAlignmentScoring) jScoring, sequence1, sequence2, 0, sequence1.size(), jSpaceBegin, sequence2.size() - jSpaceBegin, false, true, NucleotideSequence.ALPHABET, linearMatrixCache.get());
if (alignment.getScore() < getAbsoluteMinScore(parameters.getJAlignerParameters().getParameters()))
continue;
fineJAlignmentPerformed = true;
jHits[jHitIndex] = jHit.setAlignment(targetId, alignment);
}
}
}
}
Arrays.sort(jHits);
vdjcHits.put(GeneType.Joining, cutRelativeScore(jHits, parameters.getJAlignerParameters().getRelativeMinScore(), parameters.getJAlignerParameters().getParameters().getMaxHits()));
int dGeneTarget = -1;
VDJCHit[] vResult = vdjcHits.get(GeneType.Variable);
VDJCHit[] jResult = vdjcHits.get(GeneType.Joining);
if (vResult.length != 0 && jResult.length != 0)
for (int i = 0; i < nReads; i++) if (vResult[0].getAlignment(i) != null && jResult[0].getAlignment(i) != null) {
dGeneTarget = i;
break;
}
// if (fineVAlignmentPerformed && fineJAlignmentPerformed)
// System.out.println("sd");
VDJCHit[] dResult;
if (dGeneTarget == -1)
dResult = new VDJCHit[0];
else {
final Alignment<NucleotideSequence> vAl = vResult[0].getAlignment(dGeneTarget);
final Alignment<NucleotideSequence> jAl = jResult[0].getAlignment(dGeneTarget);
if (vAl == null || jAl == null || singleDAligner == null)
dResult = new VDJCHit[0];
else
dResult = singleDAligner.align(targets[dGeneTarget].getSequence(), getPossibleDLoci(vResult, jResult), vAl.getSequence2Range().getTo(), jAl.getSequence2Range().getFrom(), dGeneTarget, nReads);
}
final VDJCAlignments alignment = new VDJCAlignments(vResult, dResult, jResult, cutRelativeScore(vdjcHits.get(GeneType.Constant), parameters.getCAlignerParameters().getRelativeMinScore(), parameters.getMaxHits()), targets, input.getHistory(), input.getOriginalReads());
return new VDJCAlignmentResult<>(input, alignment);
}
use of io.repseq.core.VDJCGene in project mixcr by milaboratory.
the class IOTest method testSerialization1.
@Test
public void testSerialization1() throws Exception {
VDJCAlignerParameters parameters = VDJCParametersPresets.getByName("default");
ByteArrayOutputStream bos = new ByteArrayOutputStream();
List<VDJCAlignments> alignemntsList = new ArrayList<>();
int header;
long numberOfReads;
try (SingleFastqReader reader = new SingleFastqReader(IOTest.class.getClassLoader().getResourceAsStream("sequences/sample_IGH_R1.fastq"), true)) {
VDJCAlignerS aligner = new VDJCAlignerS(parameters);
for (VDJCGene gene : VDJCLibraryRegistry.getDefault().getLibrary("default", "hs").getGenes(Chains.IGH)) {
if (parameters.containsRequiredFeature(gene))
aligner.addGene(gene);
}
try (VDJCAlignmentsWriter writer = new VDJCAlignmentsWriter(bos)) {
writer.header(aligner);
header = bos.size();
for (SingleRead read : CUtils.it(reader)) {
VDJCAlignmentResult<SingleRead> result = aligner.process(read);
if (result.alignment != null) {
writer.write(result.alignment);
alignemntsList.add(result.alignment);
}
}
writer.setNumberOfProcessedReads(numberOfReads = reader.getNumberOfReads());
}
}
assertTrue(alignemntsList.size() > 10);
assertTrue(numberOfReads > 10);
System.out.println("Bytes per alignment: " + (bos.size() - header) / alignemntsList.size());
try (VDJCAlignmentsReader reader = new VDJCAlignmentsReader(new ByteArrayInputStream(bos.toByteArray()))) {
int i = 0;
for (VDJCAlignments alignments : CUtils.it(reader)) assertEquals(alignemntsList.get(i++), alignments);
Assert.assertEquals(numberOfReads, reader.getNumberOfReads());
}
}
use of io.repseq.core.VDJCGene in project mixcr by milaboratory.
the class TargetMerger method extractHitsMapping.
@SuppressWarnings("unchecked")
static Map<VDJCGeneId, HitMappingRecord> extractHitsMapping(AlignedTarget targetLeft, AlignedTarget targetRight, GeneType geneType) {
Map<VDJCGeneId, HitMappingRecord> map = new HashMap<>();
for (VDJCHit l : targetLeft.getAlignments().getHits(geneType)) {
final VDJCGene gene = l.getGene();
final Alignment<NucleotideSequence> al = l.getAlignment(targetLeft.getTargetId());
if (al != null)
map.put(gene.getId(), new HitMappingRecord(gene, new Alignment[] { al, null }));
}
for (VDJCHit r : targetRight.getAlignments().getHits(geneType)) {
final VDJCGene gene = r.getGene();
final Alignment<NucleotideSequence> alignment = r.getAlignment(targetRight.getTargetId());
if (alignment == null)
continue;
final HitMappingRecord als = map.get(gene.getId());
if (als == null)
map.put(gene.getId(), new HitMappingRecord(gene, new Alignment[] { null, alignment }));
else {
assert als.alignments[1] == null;
als.alignments[1] = alignment;
}
}
return map;
}
Aggregations