use of com.milaboratory.core.sequence.NucleotideSequence in project mixcr by milaboratory.
the class TargetMerger method merge.
/**
* @param targetLeft left sequence
* @param targetRight right sequence
* @param trySequenceMerge whether to try merging using sequence overlap (if alignment overlap failed)
*/
public TargetMergingResult merge(AlignedTarget targetLeft, AlignedTarget targetRight, boolean trySequenceMerge) {
for (GeneType geneType : GeneType.VJC_REFERENCE) {
if (!hasAlignments(targetLeft, geneType) || !hasAlignments(targetRight, geneType))
continue;
List<HitMappingRecord> als = extractSortedHits(targetLeft, targetRight, geneType);
Alignment<NucleotideSequence>[] topHits = als.get(0).alignments;
if (topHits[0] != null && topHits[1] != null) {
final Alignment<NucleotideSequence> left = topHits[0];
final Alignment<NucleotideSequence> right = topHits[1];
final int from = Math.max(left.getSequence1Range().getFrom(), right.getSequence1Range().getFrom());
final int to = Math.min(left.getSequence1Range().getTo(), right.getSequence1Range().getTo());
if (to <= from)
continue;
int delta = left.convertToSeq2Position(from) - right.convertToSeq2Position(from);
if (delta != left.convertToSeq2Position(to) - right.convertToSeq2Position(to))
continue;
int seq1Offset = delta > 0 ? delta : 0;
int seq2Offset = delta > 0 ? 0 : -delta;
int overlap = Math.min(targetLeft.getTarget().size() - seq1Offset, targetRight.getTarget().size() - seq2Offset);
int mismatches = SequencesUtils.mismatchCount(targetLeft.getTarget().getSequence(), seq1Offset, targetRight.getTarget().getSequence(), seq2Offset, overlap);
double identity = MismatchOnlyPairedReadMerger.identity(identityType, targetLeft.getTarget(), seq1Offset, targetRight.getTarget(), seq2Offset, overlap);
if (identity < minimalAlignmentMergeIdentity)
return new TargetMergingResult(geneType);
final AlignedTarget merge = merge(targetLeft, targetRight, delta, OverlapType.AlignmentOverlap, mismatches);
return new TargetMergingResult(true, null, merge, PairedReadMergingResult.MATCH_SCORE * (overlap - mismatches) + PairedReadMergingResult.MISMATCH_SCORE * mismatches, overlap, mismatches, delta);
}
}
if (!trySequenceMerge)
return new TargetMergingResult();
final PairedReadMergingResult merge = merger.merge(targetLeft.getTarget(), targetRight.getTarget());
if (!merge.isSuccessful())
return new TargetMergingResult();
return new TargetMergingResult(false, null, merge(targetLeft, targetRight, merge.getOffset(), OverlapType.SequenceOverlap, merge.getErrors()), merge.score(), merge.getOverlap(), merge.getErrors(), merge.getOffset());
}
use of com.milaboratory.core.sequence.NucleotideSequence in project repseqio by repseqio.
the class GGeneTest method serializationDeserializationTest.
@Test
public void serializationDeserializationTest() throws Exception {
VDJCLibrary library = VDJCLibraryRegistry.getDefaultLibrary("hs");
VDJCGene v = library.getSafe("TRBV12-3*00");
VDJCGene d = library.getSafe("TRBD1*00");
VDJCGene j = library.getSafe("TRBJ1-2*00");
VDJCGene c = library.getSafe("TRBC1*00");
GGene rWithD = new GGene(null, new VDJCGenes(v, d, j, c), new VDJTrimming(-3, 1, new DTrimming(2, 3)), new NucleotideSequence("ATAAG"), new NucleotideSequence("GACAT"));
GGene rWithoutD = new GGene(null, new VDJCGenes(v, null, j, c), new VDJTrimming(-3, 1), new NucleotideSequence("ATAAG"), null);
TestUtil.assertJson(rWithD);
TestUtil.assertJson(rWithoutD);
}
use of com.milaboratory.core.sequence.NucleotideSequence in project repseqio by repseqio.
the class GGeneTest method test1.
@Test
public void test1() throws Exception {
VDJCLibrary library = VDJCLibraryRegistry.getDefaultLibrary("hs");
VDJCGene v = library.getSafe("TRBV12-3*00");
VDJCGene d = library.getSafe("TRBD1*00");
VDJCGene j = library.getSafe("TRBJ1-2*00");
VDJCGene c = library.getSafe("TRBC1*00");
NucleotideSequence s0 = new NucleotideSequence("ATAAG");
NucleotideSequence s1 = new NucleotideSequence("GACAT");
GGene rg;
// Custom 1
rg = new GGene(null, new VDJCGenes(v, d, j, c), new VDJTrimming(-3, 1, 2, 3), s0, s1);
assertSequences(rg.getFeature(GeneFeature.CDR3), v.getFeature(new GeneFeature(GeneFeature.GermlineVCDR3Part, 0, -3)), s0, d.getFeature(new GeneFeature(ReferencePoint.DBegin, 2, 0)), d.getFeature(new GeneFeature(GeneFeature.DRegion, 0, 0)), d.getFeature(new GeneFeature(ReferencePoint.DEnd, 0, -3)), s1, j.getFeature(new GeneFeature(ReferencePoint.JBegin, 1, 0)), j.getFeature(new GeneFeature(GeneFeature.GermlineJCDR3Part, 0, 0)));
assertSequences(rg.getFeature(GeneFeature.CDR3.append(GeneFeature.FR4).append(GeneFeature.CExon1)), v.getFeature(new GeneFeature(GeneFeature.GermlineVCDR3Part, 0, -3)), s0, d.getFeature(new GeneFeature(ReferencePoint.DBegin, 2, 0)), d.getFeature(new GeneFeature(GeneFeature.DRegion, 0, 0)), d.getFeature(new GeneFeature(ReferencePoint.DEnd, 0, -3)), s1, j.getFeature(new GeneFeature(ReferencePoint.JBegin, 1, 0)), j.getFeature(new GeneFeature(GeneFeature.JRegion, 0, 0)), c.getFeature(GeneFeature.CExon1));
// All P segments 1
rg = new GGene(null, new VDJCGenes(v, d, j, c), new VDJTrimming(3, 1, 2, 3), s0, s1);
assertSequences(rg.getFeature(GeneFeature.CDR3), v.getFeature(new GeneFeature(GeneFeature.GermlineVCDR3Part, 0, 0)), v.getFeature(new GeneFeature(ReferencePoint.VEnd, 0, -3)), s0, d.getFeature(new GeneFeature(ReferencePoint.DBegin, 2, 0)), d.getFeature(new GeneFeature(GeneFeature.DRegion, 0, 0)), d.getFeature(new GeneFeature(ReferencePoint.DEnd, 0, -3)), s1, j.getFeature(new GeneFeature(ReferencePoint.JBegin, 1, 0)), j.getFeature(new GeneFeature(GeneFeature.GermlineJCDR3Part, 0, 0)));
assertSequences(rg.getFeature(GeneFeature.CDR3.append(GeneFeature.FR4).append(GeneFeature.CExon1)), v.getFeature(new GeneFeature(GeneFeature.GermlineVCDR3Part, 0, 0)), v.getFeature(new GeneFeature(ReferencePoint.VEnd, 0, -3)), s0, d.getFeature(new GeneFeature(ReferencePoint.DBegin, 2, 0)), d.getFeature(new GeneFeature(GeneFeature.DRegion, 0, 0)), d.getFeature(new GeneFeature(ReferencePoint.DEnd, 0, -3)), s1, j.getFeature(new GeneFeature(ReferencePoint.JBegin, 1, 0)), j.getFeature(new GeneFeature(GeneFeature.JRegion, 0, 0)), c.getFeature(GeneFeature.CExon1));
// No P segments 1
rg = new GGene(null, new VDJCGenes(v, d, j, c), new VDJTrimming(-4, -5, -2, -4), s0, s1);
assertSequences(rg.getFeature(GeneFeature.CDR3), v.getFeature(new GeneFeature(GeneFeature.GermlineVCDR3Part, 0, -4)), s0, d.getFeature(new GeneFeature(GeneFeature.DRegion, 2, -4)), s1, j.getFeature(new GeneFeature(GeneFeature.GermlineJCDR3Part, 5, 0)));
assertSequences(rg.getFeature(GeneFeature.CDR3.append(GeneFeature.FR4).append(GeneFeature.CExon1)), v.getFeature(new GeneFeature(GeneFeature.GermlineVCDR3Part, 0, -4)), s0, d.getFeature(new GeneFeature(GeneFeature.DRegion, 2, -4)), s1, j.getFeature(new GeneFeature(GeneFeature.JRegion, 5, 0)), c.getFeature(GeneFeature.CExon1));
// No D gene segments 1
rg = new GGene(null, new VDJCGenes(v, null, j, c), new VDJTrimming(-4, -5), s0, null);
assertSequences(rg.getFeature(GeneFeature.CDR3), v.getFeature(new GeneFeature(GeneFeature.GermlineVCDR3Part, 0, -4)), s0, j.getFeature(new GeneFeature(GeneFeature.GermlineJCDR3Part, 5, 0)));
assertSequences(rg.getFeature(GeneFeature.CDR3.append(GeneFeature.FR4).append(GeneFeature.CExon1)), v.getFeature(new GeneFeature(GeneFeature.GermlineVCDR3Part, 0, -4)), s0, j.getFeature(new GeneFeature(GeneFeature.JRegion, 5, 0)), c.getFeature(GeneFeature.CExon1));
}
use of com.milaboratory.core.sequence.NucleotideSequence in project repseqio by repseqio.
the class AbstractRAFastaResolver method resolveReader.
public synchronized RandomAccessFastaReader<NucleotideSequence> resolveReader(SequenceAddress address) {
for (int retry = 0; retry < 2; ++retry) {
// Getting reader key
String readerKey = resolveReaderId(address);
// Checking if reader already opened
RandomAccessFastaReader<NucleotideSequence> reader = readers.get(readerKey);
Path file = null;
try {
// Getting fasta file path
// Download occur here
file = getFASTAFile(address);
// Creating or loading index
RandomAccessFastaIndex index = RandomAccessFastaIndex.index(file, true, getReporter());
// Caching reader
readers.put(readerKey, reader = new RandomAccessFastaReader<>(file, index, NucleotideSequence.ALPHABET));
return reader;
} catch (Exception e) {
// Something went wrong with file, removing for re-download.
log.warn("Error opening {}." + (deleteOnError ? " Removing." : ""), file, e);
// removing source file and index if exists
if (deleteOnError && file != null)
try {
Files.delete(file);
Path indexFile = file.resolveSibling(file.getFileName() + RandomAccessFastaIndex.INDEX_SUFFIX);
if (Files.exists(indexFile))
Files.delete(indexFile);
} catch (IOException e1) {
throw new RuntimeException(e1);
}
// Retry
}
}
throw new RuntimeException();
}
use of com.milaboratory.core.sequence.NucleotideSequence in project repseqio by repseqio.
the class SequenceResolverTest method rawHttpTest2.
@Test
public void rawHttpTest2() throws Exception {
Path dir = TempFileManager.getTempDir().toPath().toAbsolutePath();
Path work = dir.resolve("work");
Files.createDirectories(work);
Path cache = dir.resolve("cache");
Files.createDirectories(cache);
SequenceResolvers.initDefaultResolver(cache);
SequenceResolver defaultResolver = SequenceResolvers.getDefault();
NucleotideSequence seq = defaultResolver.resolve(new SequenceAddress("http://files.milaboratory.com/test-data/test.fa.gz#testRecord")).getRegion(new Range(10, 30).inverse());
Assert.assertEquals(new NucleotideSequence("GCTCCACCACAAGACACTCT"), seq);
}
Aggregations