use of io.repseq.gen.GGene in project repseqio by repseqio.
the class MarkovInsertModel method create.
@Override
public InsertGenerator create(RandomGenerator random, final boolean v, List<VDJCGene> vGenes, List<VDJCGene> dGenes, List<VDJCGene> jGenes, List<VDJCGene> cGenes) {
Map<Byte, List<Pair<Byte, Double>>> distParams = new HashMap<>();
for (Map.Entry<String, Double> s : distribution.entrySet()) {
String[] split = s.getKey().split(">");
if (split.length != 2 || split[0].length() != 1 || split[1].length() != 1)
throw new IllegalArgumentException("Illegal distribution key: " + s.getKey() + ". " + "Expected something like \"A>C\"");
byte codeFrom = NucleotideSequence.ALPHABET.symbolToCode(split[0].charAt(0));
byte codeTo = NucleotideSequence.ALPHABET.symbolToCode(split[1].charAt(0));
if (codeFrom == -1 || codeTo == -1)
throw new IllegalArgumentException("Illegal nucleotide in: " + s.getKey() + ".");
List<Pair<Byte, Double>> pairs = distParams.get(codeFrom);
if (pairs == null)
distParams.put(codeFrom, pairs = new ArrayList<>());
pairs.add(new Pair<>(codeTo, s.getValue()));
}
final Map<Byte, EnumeratedDistribution<Byte>> dists = new HashMap<>();
for (byte from = 0; from < NucleotideSequence.ALPHABET.basicSize(); from++) {
List<Pair<Byte, Double>> d = distParams.get(from);
if (d == null)
throw new IllegalArgumentException("No distribution for letter: " + NucleotideSequence.ALPHABET.codeToSymbol(from));
dists.put(from, new EnumeratedDistribution<>(random, d));
}
final IndependentIntGenerator lengthDist = lengthDistribution.create(random);
return new InsertGenerator() {
@Override
public NucleotideSequence generate(GGene gene) {
ReferencePoint point = beginPoint(fromLeft, v);
int pointPosition = gene.getPartitioning().getPosition(point);
if (pointPosition == -1)
throw new RuntimeException("Point " + point + " is not available for gene " + gene);
byte letter = gene.getSequence(new Range(pointPosition, pointPosition + 1)).codeAt(0);
int length = lengthDist.sample();
byte[] array = new byte[length];
for (int i = 0; i < length; i++) {
byte cLetter = dists.get(letter).sample();
array[i] = cLetter;
letter = cLetter;
}
if (!fromLeft)
ArraysUtils.reverse(array);
return NucleotideSequence.ALPHABET.createBuilder().ensureCapacity(length).append(array).createAndDestroy();
}
};
}
use of io.repseq.gen.GGene in project repseqio by repseqio.
the class GenerateClonesAction method go.
@Override
public void go(ActionHelper helper) throws Exception {
GCloneModel model = GModels.getGCloneModelByName(params.getModelName());
GCloneGenerator generator = model.create(new Well19937c(params.getSeed()), VDJCLibraryRegistry.getDefault());
VDJCLibrary library = VDJCLibraryRegistry.getDefault().getLibrary(model.libraryId());
try (BufferedOutputStream s = new BufferedOutputStream(params.getOutput().equals(".") ? System.out : new FileOutputStream(params.getOutput()), 128 * 1024)) {
s.write(GlobalObjectMappers.toOneLine(model.libraryId()).getBytes());
s.write('\n');
ObjectWriter writer = GlobalObjectMappers.ONE_LINE.writerFor(new TypeReference<GClone>() {
}).withAttribute(VDJCGene.JSON_CURRENT_LIBRARY_ATTRIBUTE_KEY, library);
OUTER: for (int i = 0; i < params.numberOfClones; i++) {
GClone clone = generator.sample();
for (GGene g : clone.genes.values()) {
NucleotideSequence cdr3 = g.getFeature(GeneFeature.CDR3);
if (params.isInFrame())
if (cdr3.size() % 3 != 0) {
--i;
continue OUTER;
}
if (params.isNoStops())
if (AminoAcidSequence.translateFromCenter(cdr3).containStops()) {
--i;
continue OUTER;
}
}
writer.writeValue(new CloseShieldOutputStream(s), clone);
s.write('\n');
}
}
}
use of io.repseq.gen.GGene in project repseqio by repseqio.
the class BasicGCloneModel method create.
@Override
public GCloneGenerator create(RandomGenerator random, VDJCLibraryRegistry registry) {
VDJCLibrary library = registry.getLibrary(vdjcLibrary);
final IndependentRealGenerator abundanceGenerator = abundanceModel.create(random);
final Map<String, GGeneGenerator> geneGenerators = new HashMap<>();
for (Map.Entry<String, GGeneModel> e : geneModels.entrySet()) geneGenerators.put(e.getKey(), e.getValue().create(random, library));
return new GCloneGenerator() {
@Override
public GClone sample() {
double abundance = abundanceGenerator.generate();
Map<String, GGene> genes = new HashMap<>();
for (Map.Entry<String, GGeneGenerator> e : geneGenerators.entrySet()) genes.put(e.getKey(), e.getValue().generate());
return new GClone(abundance, genes);
}
};
}
use of io.repseq.gen.GGene in project repseqio by repseqio.
the class BasicGGeneModel method create.
@Override
public GGeneGenerator create(RandomGenerator random, VDJCLibrary library) {
final VDJCGenesGenerator vdjcGenesGenerator = vdjcGenesModel.create(random, library);
List<VDJCGene> vGenes = vdjcGenesGenerator.genes(GeneType.Variable);
List<VDJCGene> dGenes = vdjcGenesGenerator.genes(GeneType.Diversity);
List<VDJCGene> jGenes = vdjcGenesGenerator.genes(GeneType.Joining);
List<VDJCGene> cGenes = vdjcGenesGenerator.genes(GeneType.Constant);
final VDJTrimmingGenerator trimmingGenerator = trimmingModel.create(random, vGenes, dGenes, jGenes, cGenes);
final InsertGenerator vInsertGenerator = vInsertModel.create(random, true, vGenes, dGenes, jGenes, cGenes);
final InsertGenerator djInsertGenerator = dGenes.isEmpty() ? null : djInsertModel.create(random, false, vGenes, dGenes, jGenes, cGenes);
return new GGeneGenerator() {
@Override
public GGene generate() {
VDJCGenes vdjcGenes = vdjcGenesGenerator.sample();
VDJTrimming trimming = trimmingGenerator.sample(vdjcGenes);
assert !vdjcGenes.isDDefined() || (vdjcGenes.isDDefined() && djInsertGenerator != null);
NucleotideSequence vInsert;
NucleotideSequence djInsert;
if (vdjcGenes.isDDefined()) {
assert djInsertGenerator != null;
GGene tempGene = new GGene(null, vdjcGenes, trimming, NucleotideSequence.EMPTY, NucleotideSequence.EMPTY);
vInsert = vInsertGenerator.generate(tempGene);
djInsert = djInsertGenerator.generate(tempGene);
} else {
GGene tempGene = new GGene(null, vdjcGenes, trimming, NucleotideSequence.EMPTY, null);
vInsert = vInsertGenerator.generate(tempGene);
djInsert = null;
}
return new GGene(null, vdjcGenes, trimming, vInsert, djInsert);
}
};
}
use of io.repseq.gen.GGene in project repseqio by repseqio.
the class BasicGCloneModelTest method test1.
@Test
public void test1() throws Exception {
GGeneModel geneModel = new BasicGGeneModel(new IndependentVDJCGenesModel(b("TRBV12-2*00", 1.0).put("TRBV12-3*00", 0.0).get(), b("TRBD1*00", 1.0).put("TRBD2*00", 0.0).get(), b("TRBJ1-2*00", 1.0).put("TRBJ1-3*00", 0.0).get(), b("TRBC1*00", 1.0).put("TRBC2*00", 0.0).get()), new IndependentVDJTrimmingModel(new CommonCategoricalGeneTrimmingModel(b(-1, 1.0).get()), new CommonCategoricalDGeneTrimmingModel(b("-2|-3", 1.0).get()), new CommonCategoricalGeneTrimmingModel(b(-4, 1.0).get())), new FixedInsertModel(new NucleotideSequence("ATTA")), new FixedInsertModel(new NucleotideSequence("GACA")));
BasicGCloneModel model = new BasicGCloneModel(new VDJCLibraryId("default", 9606), new FixedRealModel(1.0), b("TRB", geneModel).get());
TestUtil.assertJson(model);
GCloneGenerator gen = model.create(new Well19937c(123), VDJCLibraryRegistry.getDefault());
GClone clone = gen.sample();
TestUtil.assertJson(clone);
GGene trb = clone.genes.get("TRB");
assertEquals(trb.vdjcGenes.v.getName(), "TRBV12-2*00");
assertEquals(trb.vdjcGenes.d.getName(), "TRBD1*00");
assertEquals(trb.vdjcGenes.j.getName(), "TRBJ1-2*00");
assertEquals(trb.vdjcGenes.c.getName(), "TRBC1*00");
assertEquals(new VDJTrimming(-1, -4, -2, -3), trb.vdjTrimming);
assertEquals(new NucleotideSequence("ATTA"), trb.vInsert);
assertEquals(new NucleotideSequence("GACA"), trb.djInsert);
}
Aggregations