use of io.repseq.core.ReferencePoint in project mixcr by milaboratory.
the class ActionAlignmentsStat method go.
@Override
public void go(ActionHelper helper) throws Exception {
long[] geneFeatureCounters = new long[targetFeatures.length];
AlignmentInfoCollector[] collectors = new AlignmentInfoCollector[targetFeatures.length + targetReferencePoints.length];
int i = 0;
for (GeneFeature targetFeature : targetFeatures) collectors[i++] = new GeneFeatureCoverageCollector(targetFeature);
for (ReferencePoint targetReferencePoint : targetReferencePoints) collectors[i++] = new ReferencePointCoverageCollector(targetReferencePoint, 40, 40);
final Collector collector = new Collector(collectors);
try (VDJCAlignmentsReader reader = new VDJCAlignmentsReader(actionParameters.getInputFileName());
PrintStream output = actionParameters.getOutputFileName().equals("-") ? System.out : new PrintStream(new BufferedOutputStream(new FileOutputStream(actionParameters.getOutputFileName()), 32768))) {
SmartProgressReporter.startProgressReport("Analysis", reader);
CUtils.processAllInParallel(reader, collector, Math.min(4, Runtime.getRuntime().availableProcessors()));
collector.end();
if (output == System.out)
output.println();
collector.write(output);
}
}
use of io.repseq.core.ReferencePoint in project mixcr by milaboratory.
the class VDJCAlignmentsFormatter method pd.
private static PointToDraw pd(ReferencePoint rp, String marker, int additionalOffset, Filter<SequencePartitioning> activator) {
int offset = marker.indexOf('>');
if (offset >= 0)
return new PointToDraw(rp.move(additionalOffset), marker, -1 - offset - additionalOffset, activator);
offset = marker.indexOf('<');
if (offset >= 0)
return new PointToDraw(rp.move(additionalOffset), marker, -offset - additionalOffset, activator);
return new PointToDraw(rp, marker, 0, activator);
}
use of io.repseq.core.ReferencePoint in project repseqio by repseqio.
the class VDJCGeneDataTest method testSerialization1.
@Test
public void testSerialization1() throws Exception {
VDJCGeneData gene = new VDJCGeneData(new BaseSequence("embedded://testseq"), "TRBV1", GeneType.Variable, true, Chains.TRB, new TreeMap<String, SortedSet<String>>(), new TreeMap<ReferencePoint, Long>());
gene.addMetaValue("key", "val");
String s1 = GlobalObjectMappers.toOneLine(gene);
JsonNode n1 = GlobalObjectMappers.ONE_LINE.readTree(s1);
assertTrue(n1.get("meta").get("key").isValueNode());
assertEquals(gene, GlobalObjectMappers.ONE_LINE.readValue(s1, VDJCGeneData.class));
gene.addMetaValue("key", "val1");
String s2 = GlobalObjectMappers.toOneLine(gene);
JsonNode n2 = GlobalObjectMappers.ONE_LINE.readTree(s2);
assertFalse(n2.get("meta").get("key").isValueNode());
assertEquals(gene, GlobalObjectMappers.ONE_LINE.readValue(s2, VDJCGeneData.class));
}
use of io.repseq.core.ReferencePoint in project repseqio by repseqio.
the class MarkovInsertModel method create.
@Override
public InsertGenerator create(RandomGenerator random, final boolean v, List<VDJCGene> vGenes, List<VDJCGene> dGenes, List<VDJCGene> jGenes, List<VDJCGene> cGenes) {
Map<Byte, List<Pair<Byte, Double>>> distParams = new HashMap<>();
for (Map.Entry<String, Double> s : distribution.entrySet()) {
String[] split = s.getKey().split(">");
if (split.length != 2 || split[0].length() != 1 || split[1].length() != 1)
throw new IllegalArgumentException("Illegal distribution key: " + s.getKey() + ". " + "Expected something like \"A>C\"");
byte codeFrom = NucleotideSequence.ALPHABET.symbolToCode(split[0].charAt(0));
byte codeTo = NucleotideSequence.ALPHABET.symbolToCode(split[1].charAt(0));
if (codeFrom == -1 || codeTo == -1)
throw new IllegalArgumentException("Illegal nucleotide in: " + s.getKey() + ".");
List<Pair<Byte, Double>> pairs = distParams.get(codeFrom);
if (pairs == null)
distParams.put(codeFrom, pairs = new ArrayList<>());
pairs.add(new Pair<>(codeTo, s.getValue()));
}
final Map<Byte, EnumeratedDistribution<Byte>> dists = new HashMap<>();
for (byte from = 0; from < NucleotideSequence.ALPHABET.basicSize(); from++) {
List<Pair<Byte, Double>> d = distParams.get(from);
if (d == null)
throw new IllegalArgumentException("No distribution for letter: " + NucleotideSequence.ALPHABET.codeToSymbol(from));
dists.put(from, new EnumeratedDistribution<>(random, d));
}
final IndependentIntGenerator lengthDist = lengthDistribution.create(random);
return new InsertGenerator() {
@Override
public NucleotideSequence generate(GGene gene) {
ReferencePoint point = beginPoint(fromLeft, v);
int pointPosition = gene.getPartitioning().getPosition(point);
if (pointPosition == -1)
throw new RuntimeException("Point " + point + " is not available for gene " + gene);
byte letter = gene.getSequence(new Range(pointPosition, pointPosition + 1)).codeAt(0);
int length = lengthDist.sample();
byte[] array = new byte[length];
for (int i = 0; i < length; i++) {
byte cLetter = dists.get(letter).sample();
array[i] = cLetter;
letter = cLetter;
}
if (!fromLeft)
ArraysUtils.reverse(array);
return NucleotideSequence.ALPHABET.createBuilder().ensureCapacity(length).append(array).createAndDestroy();
}
};
}
use of io.repseq.core.ReferencePoint in project repseqio by repseqio.
the class DebugAction method go.
@Override
public void go(ActionHelper helper) throws Exception {
VDJCLibraryRegistry reg = VDJCLibraryRegistry.getDefault();
reg.registerLibraries(params.getInput());
Pattern namePattern = params.name == null ? null : Pattern.compile(params.name);
GeneFeature cdr3FirstTriplet = new GeneFeature(ReferencePoint.CDR3Begin, 0, 3);
GeneFeature cdr3LastTriplet = new GeneFeature(ReferencePoint.CDR3End, -3, 0);
GeneFeature vIntronDonor = new GeneFeature(ReferencePoint.VIntronBegin, 0, 2);
GeneFeature vIntronAcceptor = new GeneFeature(ReferencePoint.VIntronEnd, -2, 0);
for (VDJCLibrary lib : reg.getLoadedLibraries()) {
for (VDJCGene gene : lib.getGenes()) {
if (namePattern != null && !namePattern.matcher(gene.getName()).matches())
continue;
// First generate list of warning messages
List<String> warnings = new ArrayList<>();
if (gene.isFunctional() || params.getCheckAll()) {
NucleotideSequence l3;
switch(gene.getGeneType()) {
case Variable:
// Flag AA residues flanking CDR3
l3 = gene.getFeature(cdr3FirstTriplet);
if (l3 == null)
warnings.add("Unable to find CDR3 start");
else if (l3.size() != 3)
warnings.add("Unable to translate sequence: " + gene.getName() + " / " + l3);
else if (AminoAcidSequence.translate(l3).codeAt(0) != AminoAcidAlphabet.C)
warnings.add("CDR3 does not start with C, was: " + l3.toString() + " / " + AminoAcidSequence.translate(l3).toString() + " / CDR3Begin: " + gene.getData().getAnchorPoints().get(ReferencePoint.CDR3Begin));
// Flag suspicious exon borders
// https://schneider.ncifcrf.gov/gallery/SequenceLogoSculpture.gif
NucleotideSequence vIntronDonorSeq = gene.getFeature(vIntronDonor);
if (vIntronDonorSeq != null && !vIntronDonorSeq.toString().equals("GT") && !vIntronDonorSeq.toString().equals("GC"))
warnings.add("Expected VIntron sequence to start with GT, was: " + vIntronDonorSeq.toString());
NucleotideSequence vIntronAcceptorSeq = gene.getFeature(vIntronAcceptor);
if (vIntronAcceptorSeq != null && !vIntronAcceptorSeq.toString().equals("AG"))
warnings.add("Expected VIntron sequence to end with AG, was: " + vIntronAcceptorSeq.toString());
ReferencePoints partitioning = gene.getPartitioning();
if (partitioning.isAvailable(GeneFeature.VTranscriptWithout5UTR)) {
// Iterating over all reading-frame bound anchor points of V gene
for (ReferencePoint anchorPoint : ReferencePoint.DefaultReferencePoints) {
if (anchorPoint.getGeneType() != GeneType.Variable || !anchorPoint.isTripletBoundary())
continue;
// And checking that they are in the same frame as Start (L1Begin)
int relativePosition = partitioning.getRelativePosition(GeneFeature.VTranscriptWithout5UTR, anchorPoint);
if (// Point is defined
relativePosition >= 0 && relativePosition % 3 != 0)
warnings.add("Expected " + anchorPoint + " to have position dividable by three inside VTranscriptWithout5UTR. " + "This may indicate an error in the L2 boundaries.");
}
}
break;
case Joining:
// Flag AA residues flanking CDR3
l3 = gene.getFeature(cdr3LastTriplet);
if (l3 == null)
warnings.add("Unable to find CDR3 end");
else if (l3.size() != 3)
warnings.add("Unable to translate sequence: " + gene.getName() + " / " + l3);
else if (AminoAcidSequence.translate(l3).codeAt(0) != AminoAcidAlphabet.W && AminoAcidSequence.translate(l3).codeAt(0) != AminoAcidAlphabet.F)
warnings.add("CDR3 does not end with W or F, was: " + l3.toString() + " / " + AminoAcidSequence.translate(l3).toString() + " / CDR3End: " + gene.getData().getAnchorPoints().get(ReferencePoint.CDR3End));
break;
}
for (GeneFeature geneFeature : aaGeneFeatures.get(gene.getGeneType())) {
AminoAcidSequence aaSequence = getAminoAcidSequence(gene, geneFeature, gene.getFeature(geneFeature));
if (aaSequence != null) {
// Flag if contains stop codon
if (aaSequence.numberOfStops() > 0)
warnings.add(GeneFeature.encode(geneFeature) + " contains a stop codon");
}
}
}
if (params.getProblemOnly() && warnings.isEmpty())
continue;
System.out.println(gene.getName() + " (" + (gene.isFunctional() ? "F" : "P") + ") " + gene.getChains() + " : " + lib.getTaxonId());
if (!warnings.isEmpty()) {
System.out.println();
System.out.println("WARNINGS: ");
for (String warning : warnings) {
System.out.println(warning);
}
System.out.println();
}
for (GeneFeature geneFeature : geneFeatures.get(gene.getGeneType())) {
System.out.println();
System.out.println(GeneFeature.encode(geneFeature));
NucleotideSequence nSequence = gene.getFeature(geneFeature);
AminoAcidSequence aaSequence = getAminoAcidSequence(gene, geneFeature, nSequence);
System.out.print("N: ");
if (nSequence == null)
System.out.println("Not Available");
else
System.out.println(nSequence);
if (GeneFeature.getFrameReference(geneFeature) != null) {
System.out.print("AA: ");
if (aaSequence == null)
System.out.println("Not Available");
else
System.out.println(aaSequence);
}
}
System.out.println("=========");
System.out.println();
}
}
}
Aggregations