use of io.repseq.core.VDJCGene in project repseqio by repseqio.
the class UTest method t1.
@Ignore
@Test
public void t1() throws Exception {
Map<String, Integer> mins = new HashMap<>();
Map<String, Integer> maxs = new HashMap<>();
for (VDJCGene gene : VDJCLibraryRegistry.getDefault().getLibrary("default", "hs").getGenes()) {
if (gene.getGeneType() != GeneType.Joining)
continue;
int vend = gene.getPartitioning().getPosition(ReferencePoint.JBegin);
int vbegin = gene.getPartitioning().getPosition(ReferencePoint.FR4End);
String key = gene.getData().getBaseSequence().getOrigin().toString();
add(maxs, key, vbegin, true);
add(maxs, key, vend, true);
add(mins, key, vbegin, false);
add(mins, key, vend, false);
}
System.out.println("MINS");
print(mins);
System.out.println("MAXS");
print(maxs);
}
use of io.repseq.core.VDJCGene in project repseqio by repseqio.
the class IndependentVDJTrimmingModel method create.
@Override
public VDJTrimmingGenerator create(RandomGenerator random, List<VDJCGene> vGenes, List<VDJCGene> dGenes, List<VDJCGene> jGenes, List<VDJCGene> cGenes) {
final Map<VDJCGene, GeneTrimmingGenerator> vGenerators = new HashMap<>();
final Map<VDJCGene, DTrimmingGenerator> dGenerators = new HashMap<>();
final Map<VDJCGene, GeneTrimmingGenerator> jGenerators = new HashMap<>();
for (VDJCGene gene : vGenes) vGenerators.put(gene, v.create(random, gene));
for (VDJCGene gene : dGenes) dGenerators.put(gene, d.create(random, gene));
for (VDJCGene gene : jGenes) jGenerators.put(gene, j.create(random, gene));
return new VDJTrimmingGenerator() {
@Override
public VDJTrimming sample(VDJCGenes genes) {
if (genes.d == null)
return new VDJTrimming(vGenerators.get(genes.v).sample(), jGenerators.get(genes.j).sample());
else
return new VDJTrimming(vGenerators.get(genes.v).sample(), jGenerators.get(genes.j).sample(), dGenerators.get(genes.d).sample());
}
};
}
use of io.repseq.core.VDJCGene in project repseqio by repseqio.
the class TsvAction method go.
@Override
public void go(ActionHelper helper) throws Exception {
VDJCLibraryRegistry reg = VDJCLibraryRegistry.getDefault();
if (!"default".equals(params.getInput()))
reg.registerLibraries(params.getInput());
else
reg.loadAllLibraries("default");
Pattern chainPattern = params.chain == null ? null : Pattern.compile(params.chain);
Pattern namePattern = params.name == null ? null : Pattern.compile(params.name);
Long taxonFilter = params.taxonId;
if (taxonFilter == null && params.species != null)
taxonFilter = reg.resolveSpecies(params.species);
try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(params.getOutputStream(), StandardCharsets.UTF_8))) {
writer.write("Gene\tChains\tFeature\tStart\tStop\tSource\tSequence\n");
for (VDJCLibrary lib : reg.getLoadedLibraries()) {
if (taxonFilter != null && taxonFilter != lib.getTaxonId())
continue;
for (VDJCGene gene : lib.getGenes()) {
if (chainPattern != null) {
boolean y = false;
for (String s : gene.getChains()) if (y |= chainPattern.matcher(s).matches())
break;
if (!y)
continue;
}
if (namePattern != null && !namePattern.matcher(gene.getName()).matches())
continue;
for (GeneFeatureWithOriginalName feature : params.features) {
GeneFeature geneFeature = feature.feature;
NucleotideSequence featureSequence = gene.getFeature(geneFeature);
if (featureSequence == null)
continue;
// Don't output start and end positions for composite gene features
Long start = geneFeature.isComposite() ? null : gene.getData().getAnchorPoints().get(geneFeature.getFirstPoint());
Long end = geneFeature.isComposite() ? null : gene.getData().getAnchorPoints().get(geneFeature.getLastPoint());
NucleotideSequence nSequence = gene.getFeature(geneFeature);
List<String> tokens = Arrays.asList(gene.getGeneName(), gene.getChains().toString(), feature.originalName, // (so essentially 1-based inclusive). Report both as 1-based.
(start == null ? "" : params.isOneBased() ? String.valueOf(start + 1) : String.valueOf(start)), (end == null ? "" : String.valueOf(end)), gene.getData().getBaseSequence().getOrigin().toString(), nSequence.toString());
String delim = "";
for (String t : tokens) {
writer.write(delim);
writer.write(t);
delim = "\t";
}
writer.write('\n');
}
}
}
}
}
use of io.repseq.core.VDJCGene in project repseqio by repseqio.
the class DebugAction method go.
@Override
public void go(ActionHelper helper) throws Exception {
VDJCLibraryRegistry reg = VDJCLibraryRegistry.getDefault();
reg.registerLibraries(params.getInput());
Pattern namePattern = params.name == null ? null : Pattern.compile(params.name);
GeneFeature cdr3FirstTriplet = new GeneFeature(ReferencePoint.CDR3Begin, 0, 3);
GeneFeature cdr3LastTriplet = new GeneFeature(ReferencePoint.CDR3End, -3, 0);
GeneFeature vIntronDonor = new GeneFeature(ReferencePoint.VIntronBegin, 0, 2);
GeneFeature vIntronAcceptor = new GeneFeature(ReferencePoint.VIntronEnd, -2, 0);
for (VDJCLibrary lib : reg.getLoadedLibraries()) {
for (VDJCGene gene : lib.getGenes()) {
if (namePattern != null && !namePattern.matcher(gene.getName()).matches())
continue;
// First generate list of warning messages
List<String> warnings = new ArrayList<>();
if (gene.isFunctional() || params.getCheckAll()) {
NucleotideSequence l3;
switch(gene.getGeneType()) {
case Variable:
// Flag AA residues flanking CDR3
l3 = gene.getFeature(cdr3FirstTriplet);
if (l3 == null)
warnings.add("Unable to find CDR3 start");
else if (l3.size() != 3)
warnings.add("Unable to translate sequence: " + gene.getName() + " / " + l3);
else if (AminoAcidSequence.translate(l3).codeAt(0) != AminoAcidAlphabet.C)
warnings.add("CDR3 does not start with C, was: " + l3.toString() + " / " + AminoAcidSequence.translate(l3).toString() + " / CDR3Begin: " + gene.getData().getAnchorPoints().get(ReferencePoint.CDR3Begin));
// Flag suspicious exon borders
// https://schneider.ncifcrf.gov/gallery/SequenceLogoSculpture.gif
NucleotideSequence vIntronDonorSeq = gene.getFeature(vIntronDonor);
if (vIntronDonorSeq != null && !vIntronDonorSeq.toString().equals("GT") && !vIntronDonorSeq.toString().equals("GC"))
warnings.add("Expected VIntron sequence to start with GT, was: " + vIntronDonorSeq.toString());
NucleotideSequence vIntronAcceptorSeq = gene.getFeature(vIntronAcceptor);
if (vIntronAcceptorSeq != null && !vIntronAcceptorSeq.toString().equals("AG"))
warnings.add("Expected VIntron sequence to end with AG, was: " + vIntronAcceptorSeq.toString());
ReferencePoints partitioning = gene.getPartitioning();
if (partitioning.isAvailable(GeneFeature.VTranscriptWithout5UTR)) {
// Iterating over all reading-frame bound anchor points of V gene
for (ReferencePoint anchorPoint : ReferencePoint.DefaultReferencePoints) {
if (anchorPoint.getGeneType() != GeneType.Variable || !anchorPoint.isTripletBoundary())
continue;
// And checking that they are in the same frame as Start (L1Begin)
int relativePosition = partitioning.getRelativePosition(GeneFeature.VTranscriptWithout5UTR, anchorPoint);
if (// Point is defined
relativePosition >= 0 && relativePosition % 3 != 0)
warnings.add("Expected " + anchorPoint + " to have position dividable by three inside VTranscriptWithout5UTR. " + "This may indicate an error in the L2 boundaries.");
}
}
break;
case Joining:
// Flag AA residues flanking CDR3
l3 = gene.getFeature(cdr3LastTriplet);
if (l3 == null)
warnings.add("Unable to find CDR3 end");
else if (l3.size() != 3)
warnings.add("Unable to translate sequence: " + gene.getName() + " / " + l3);
else if (AminoAcidSequence.translate(l3).codeAt(0) != AminoAcidAlphabet.W && AminoAcidSequence.translate(l3).codeAt(0) != AminoAcidAlphabet.F)
warnings.add("CDR3 does not end with W or F, was: " + l3.toString() + " / " + AminoAcidSequence.translate(l3).toString() + " / CDR3End: " + gene.getData().getAnchorPoints().get(ReferencePoint.CDR3End));
break;
}
for (GeneFeature geneFeature : aaGeneFeatures.get(gene.getGeneType())) {
AminoAcidSequence aaSequence = getAminoAcidSequence(gene, geneFeature, gene.getFeature(geneFeature));
if (aaSequence != null) {
// Flag if contains stop codon
if (aaSequence.numberOfStops() > 0)
warnings.add(GeneFeature.encode(geneFeature) + " contains a stop codon");
}
}
}
if (params.getProblemOnly() && warnings.isEmpty())
continue;
System.out.println(gene.getName() + " (" + (gene.isFunctional() ? "F" : "P") + ") " + gene.getChains() + " : " + lib.getTaxonId());
if (!warnings.isEmpty()) {
System.out.println();
System.out.println("WARNINGS: ");
for (String warning : warnings) {
System.out.println(warning);
}
System.out.println();
}
for (GeneFeature geneFeature : geneFeatures.get(gene.getGeneType())) {
System.out.println();
System.out.println(GeneFeature.encode(geneFeature));
NucleotideSequence nSequence = gene.getFeature(geneFeature);
AminoAcidSequence aaSequence = getAminoAcidSequence(gene, geneFeature, nSequence);
System.out.print("N: ");
if (nSequence == null)
System.out.println("Not Available");
else
System.out.println(nSequence);
if (GeneFeature.getFrameReference(geneFeature) != null) {
System.out.print("AA: ");
if (aaSequence == null)
System.out.println("Not Available");
else
System.out.println(aaSequence);
}
}
System.out.println("=========");
System.out.println();
}
}
}
use of io.repseq.core.VDJCGene in project repseqio by repseqio.
the class FastaAction method go.
@Override
public void go(ActionHelper helper) throws Exception {
GeneFeature geneFeature = params.getGeneFeature();
VDJCLibraryRegistry reg = VDJCLibraryRegistry.getDefault();
if (!"default".equals(params.getInput()))
reg.registerLibraries(params.getInput());
else
reg.loadAllLibraries("default");
Pattern chainPattern = params.chain == null ? null : Pattern.compile(params.chain);
Pattern namePattern = params.name == null ? null : Pattern.compile(params.name);
Long taxonFilter = params.taxonId;
if (taxonFilter == null && params.species != null)
taxonFilter = reg.resolveSpecies(params.species);
try (FastaWriter<NucleotideSequence> writer = CLIUtils.createSingleFastaWriter(params.getOutput())) {
for (VDJCLibrary lib : reg.getLoadedLibraries()) {
if (taxonFilter != null && taxonFilter != lib.getTaxonId())
continue;
for (VDJCGene gene : lib.getGenes()) {
if (chainPattern != null) {
boolean y = false;
for (String s : gene.getChains()) if (y |= chainPattern.matcher(s).matches())
break;
if (!y)
continue;
}
if (namePattern != null && !namePattern.matcher(gene.getName()).matches())
continue;
NucleotideSequence featureSequence = gene.getFeature(geneFeature);
if (featureSequence == null)
continue;
writer.write(gene.getName() + "|" + (gene.isFunctional() ? "F" : "P") + "|taxonId=" + gene.getParentLibrary().getTaxonId(), featureSequence);
}
}
}
}
Aggregations