use of io.repseq.core.GeneFeature in project repseqio by repseqio.
the class TsvAction method go.
@Override
public void go(ActionHelper helper) throws Exception {
VDJCLibraryRegistry reg = VDJCLibraryRegistry.getDefault();
if (!"default".equals(params.getInput()))
reg.registerLibraries(params.getInput());
else
reg.loadAllLibraries("default");
Pattern chainPattern = params.chain == null ? null : Pattern.compile(params.chain);
Pattern namePattern = params.name == null ? null : Pattern.compile(params.name);
Long taxonFilter = params.taxonId;
if (taxonFilter == null && params.species != null)
taxonFilter = reg.resolveSpecies(params.species);
try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(params.getOutputStream(), StandardCharsets.UTF_8))) {
writer.write("Gene\tChains\tFeature\tStart\tStop\tSource\tSequence\n");
for (VDJCLibrary lib : reg.getLoadedLibraries()) {
if (taxonFilter != null && taxonFilter != lib.getTaxonId())
continue;
for (VDJCGene gene : lib.getGenes()) {
if (chainPattern != null) {
boolean y = false;
for (String s : gene.getChains()) if (y |= chainPattern.matcher(s).matches())
break;
if (!y)
continue;
}
if (namePattern != null && !namePattern.matcher(gene.getName()).matches())
continue;
for (GeneFeatureWithOriginalName feature : params.features) {
GeneFeature geneFeature = feature.feature;
NucleotideSequence featureSequence = gene.getFeature(geneFeature);
if (featureSequence == null)
continue;
// Don't output start and end positions for composite gene features
Long start = geneFeature.isComposite() ? null : gene.getData().getAnchorPoints().get(geneFeature.getFirstPoint());
Long end = geneFeature.isComposite() ? null : gene.getData().getAnchorPoints().get(geneFeature.getLastPoint());
NucleotideSequence nSequence = gene.getFeature(geneFeature);
List<String> tokens = Arrays.asList(gene.getGeneName(), gene.getChains().toString(), feature.originalName, // (so essentially 1-based inclusive). Report both as 1-based.
(start == null ? "" : params.isOneBased() ? String.valueOf(start + 1) : String.valueOf(start)), (end == null ? "" : String.valueOf(end)), gene.getData().getBaseSequence().getOrigin().toString(), nSequence.toString());
String delim = "";
for (String t : tokens) {
writer.write(delim);
writer.write(t);
delim = "\t";
}
writer.write('\n');
}
}
}
}
}
use of io.repseq.core.GeneFeature in project repseqio by repseqio.
the class DebugAction method go.
@Override
public void go(ActionHelper helper) throws Exception {
VDJCLibraryRegistry reg = VDJCLibraryRegistry.getDefault();
reg.registerLibraries(params.getInput());
Pattern namePattern = params.name == null ? null : Pattern.compile(params.name);
GeneFeature cdr3FirstTriplet = new GeneFeature(ReferencePoint.CDR3Begin, 0, 3);
GeneFeature cdr3LastTriplet = new GeneFeature(ReferencePoint.CDR3End, -3, 0);
GeneFeature vIntronDonor = new GeneFeature(ReferencePoint.VIntronBegin, 0, 2);
GeneFeature vIntronAcceptor = new GeneFeature(ReferencePoint.VIntronEnd, -2, 0);
for (VDJCLibrary lib : reg.getLoadedLibraries()) {
for (VDJCGene gene : lib.getGenes()) {
if (namePattern != null && !namePattern.matcher(gene.getName()).matches())
continue;
// First generate list of warning messages
List<String> warnings = new ArrayList<>();
if (gene.isFunctional() || params.getCheckAll()) {
NucleotideSequence l3;
switch(gene.getGeneType()) {
case Variable:
// Flag AA residues flanking CDR3
l3 = gene.getFeature(cdr3FirstTriplet);
if (l3 == null)
warnings.add("Unable to find CDR3 start");
else if (l3.size() != 3)
warnings.add("Unable to translate sequence: " + gene.getName() + " / " + l3);
else if (AminoAcidSequence.translate(l3).codeAt(0) != AminoAcidAlphabet.C)
warnings.add("CDR3 does not start with C, was: " + l3.toString() + " / " + AminoAcidSequence.translate(l3).toString() + " / CDR3Begin: " + gene.getData().getAnchorPoints().get(ReferencePoint.CDR3Begin));
// Flag suspicious exon borders
// https://schneider.ncifcrf.gov/gallery/SequenceLogoSculpture.gif
NucleotideSequence vIntronDonorSeq = gene.getFeature(vIntronDonor);
if (vIntronDonorSeq != null && !vIntronDonorSeq.toString().equals("GT") && !vIntronDonorSeq.toString().equals("GC"))
warnings.add("Expected VIntron sequence to start with GT, was: " + vIntronDonorSeq.toString());
NucleotideSequence vIntronAcceptorSeq = gene.getFeature(vIntronAcceptor);
if (vIntronAcceptorSeq != null && !vIntronAcceptorSeq.toString().equals("AG"))
warnings.add("Expected VIntron sequence to end with AG, was: " + vIntronAcceptorSeq.toString());
ReferencePoints partitioning = gene.getPartitioning();
if (partitioning.isAvailable(GeneFeature.VTranscriptWithout5UTR)) {
// Iterating over all reading-frame bound anchor points of V gene
for (ReferencePoint anchorPoint : ReferencePoint.DefaultReferencePoints) {
if (anchorPoint.getGeneType() != GeneType.Variable || !anchorPoint.isTripletBoundary())
continue;
// And checking that they are in the same frame as Start (L1Begin)
int relativePosition = partitioning.getRelativePosition(GeneFeature.VTranscriptWithout5UTR, anchorPoint);
if (// Point is defined
relativePosition >= 0 && relativePosition % 3 != 0)
warnings.add("Expected " + anchorPoint + " to have position dividable by three inside VTranscriptWithout5UTR. " + "This may indicate an error in the L2 boundaries.");
}
}
break;
case Joining:
// Flag AA residues flanking CDR3
l3 = gene.getFeature(cdr3LastTriplet);
if (l3 == null)
warnings.add("Unable to find CDR3 end");
else if (l3.size() != 3)
warnings.add("Unable to translate sequence: " + gene.getName() + " / " + l3);
else if (AminoAcidSequence.translate(l3).codeAt(0) != AminoAcidAlphabet.W && AminoAcidSequence.translate(l3).codeAt(0) != AminoAcidAlphabet.F)
warnings.add("CDR3 does not end with W or F, was: " + l3.toString() + " / " + AminoAcidSequence.translate(l3).toString() + " / CDR3End: " + gene.getData().getAnchorPoints().get(ReferencePoint.CDR3End));
break;
}
for (GeneFeature geneFeature : aaGeneFeatures.get(gene.getGeneType())) {
AminoAcidSequence aaSequence = getAminoAcidSequence(gene, geneFeature, gene.getFeature(geneFeature));
if (aaSequence != null) {
// Flag if contains stop codon
if (aaSequence.numberOfStops() > 0)
warnings.add(GeneFeature.encode(geneFeature) + " contains a stop codon");
}
}
}
if (params.getProblemOnly() && warnings.isEmpty())
continue;
System.out.println(gene.getName() + " (" + (gene.isFunctional() ? "F" : "P") + ") " + gene.getChains() + " : " + lib.getTaxonId());
if (!warnings.isEmpty()) {
System.out.println();
System.out.println("WARNINGS: ");
for (String warning : warnings) {
System.out.println(warning);
}
System.out.println();
}
for (GeneFeature geneFeature : geneFeatures.get(gene.getGeneType())) {
System.out.println();
System.out.println(GeneFeature.encode(geneFeature));
NucleotideSequence nSequence = gene.getFeature(geneFeature);
AminoAcidSequence aaSequence = getAminoAcidSequence(gene, geneFeature, nSequence);
System.out.print("N: ");
if (nSequence == null)
System.out.println("Not Available");
else
System.out.println(nSequence);
if (GeneFeature.getFrameReference(geneFeature) != null) {
System.out.print("AA: ");
if (aaSequence == null)
System.out.println("Not Available");
else
System.out.println(aaSequence);
}
}
System.out.println("=========");
System.out.println();
}
}
}
use of io.repseq.core.GeneFeature in project repseqio by repseqio.
the class FastaAction method go.
@Override
public void go(ActionHelper helper) throws Exception {
GeneFeature geneFeature = params.getGeneFeature();
VDJCLibraryRegistry reg = VDJCLibraryRegistry.getDefault();
if (!"default".equals(params.getInput()))
reg.registerLibraries(params.getInput());
else
reg.loadAllLibraries("default");
Pattern chainPattern = params.chain == null ? null : Pattern.compile(params.chain);
Pattern namePattern = params.name == null ? null : Pattern.compile(params.name);
Long taxonFilter = params.taxonId;
if (taxonFilter == null && params.species != null)
taxonFilter = reg.resolveSpecies(params.species);
try (FastaWriter<NucleotideSequence> writer = CLIUtils.createSingleFastaWriter(params.getOutput())) {
for (VDJCLibrary lib : reg.getLoadedLibraries()) {
if (taxonFilter != null && taxonFilter != lib.getTaxonId())
continue;
for (VDJCGene gene : lib.getGenes()) {
if (chainPattern != null) {
boolean y = false;
for (String s : gene.getChains()) if (y |= chainPattern.matcher(s).matches())
break;
if (!y)
continue;
}
if (namePattern != null && !namePattern.matcher(gene.getName()).matches())
continue;
NucleotideSequence featureSequence = gene.getFeature(geneFeature);
if (featureSequence == null)
continue;
writer.write(gene.getName() + "|" + (gene.isFunctional() ? "F" : "P") + "|taxonId=" + gene.getParentLibrary().getTaxonId(), featureSequence);
}
}
}
}
use of io.repseq.core.GeneFeature in project mixcr by milaboratory.
the class FullSeqAssemblerTest method testLargeCloneNoMismatches.
@Test
public void testLargeCloneNoMismatches() throws Exception {
MasterSequence master = FullSeqAssemblerTest.masterSeq1WT;
NSequenceWithQuality seq = new NSequenceWithQuality(master.getRange(-master.vPart + 10, 80), SequenceQuality.GOOD_QUALITY_VALUE);
RunMiXCR.RunMiXCRAnalysis params0 = new RunMiXCR.RunMiXCRAnalysis(new SingleReadImpl(0, seq, ""));
params0.cloneAssemblerParameters.setAssemblingFeatures(new GeneFeature[] { GeneFeature.VDJRegion });
Clone largeClone = RunMiXCR.assemble(RunMiXCR.align(params0)).cloneSet.get(0);
// ActionExportClonesPretty.outputCompact(System.out, largeClone);
// System.exit(0);
Well44497b rnd = new Well44497b(1234567889L);
int nReads = 100_000;
int readLength = 75, readGap = 150;
// slice seq randomly
PairedRead[] slicedReads = new PairedRead[nReads];
for (int i = 0; i < nReads; ++i) {
int r1from = rnd.nextInt(seq.size() - readLength - 1), r1to = r1from + readLength, r2from = r1from + 1 + rnd.nextInt(seq.size() - r1from - readLength - 1), r2to = r2from + readLength;
assert r2from > r1from;
slicedReads[i] = new PairedRead(new SingleReadImpl(i, seq.getRange(r1from, r1to), "" + i), new SingleReadImpl(i, seq.getRange(r2from, r2to).getReverseComplement(), "" + i));
}
RunMiXCR.RunMiXCRAnalysis params = new RunMiXCR.RunMiXCRAnalysis(slicedReads);
// params.alignerParameters = VDJCParametersPresets.getByName("rna-seq");
params.alignerParameters.setSaveOriginalReads(true);
RunMiXCR.AlignResult align = RunMiXCR.align(params);
RunMiXCR.AssembleResult assemble = RunMiXCR.assemble(align);
// for (VDJCAlignments al : align.alignments) {
// for (int i = 0; i < al.numberOfTargets(); i++) {
// System.out.println(VDJCAlignmentsFormatter.getTargetAsMultiAlignment(al, i));
// System.out.println();
// }
// System.out.println();
// System.out.println(" ================================================ ");
// System.out.println();
// }
Assert.assertEquals(1, assemble.cloneSet.size());
Clone initialClone = assemble.cloneSet.get(0);
NSequenceWithQuality cdr3 = initialClone.getFeature(GeneFeature.CDR3);
List<VDJCAlignments> alignments = align.alignments.stream().filter(al -> cdr3.equals(al.getFeature(GeneFeature.CDR3))).collect(Collectors.toList());
alignments.stream().filter(al -> Arrays.stream(al.getBestHit(GeneType.Variable).getAlignments()).filter(Objects::nonNull).anyMatch(a -> !a.getAbsoluteMutations().isEmpty())).filter(al -> al.getBestHit(GeneType.Variable).getGene().getName().contains("3-74")).forEach(al -> {
for (int i = 0; i < al.numberOfTargets(); i++) {
System.out.println(VDJCAlignmentsFormatter.getTargetAsMultiAlignment(al, i));
System.out.println();
}
System.out.println();
System.out.println(" ================================================ ");
System.out.println();
});
// System.exit(0);
System.out.println("=> Agg");
CloneFactory cloneFactory = new CloneFactory(align.parameters.cloneAssemblerParameters.getCloneFactoryParameters(), align.parameters.cloneAssemblerParameters.getAssemblingFeatures(), align.usedGenes, align.parameters.alignerParameters.getFeaturesToAlignMap());
FullSeqAssembler agg = new FullSeqAssembler(cloneFactory, DEFAULT_PARAMETERS, initialClone, align.parameters.alignerParameters);
FullSeqAssembler.RawVariantsData prep = agg.calculateRawData(() -> CUtils.asOutputPort(alignments));
List<Clone> clones = new ArrayList<>(new CloneSet(Arrays.asList(agg.callVariants(prep))).getClones());
clones.sort(Comparator.comparingDouble(Clone::getCount).reversed());
for (Clone clone : clones) {
ActionExportClonesPretty.outputCompact(System.out, clone);
System.out.println();
System.out.println(" ================================================ ");
System.out.println();
}
}
use of io.repseq.core.GeneFeature in project mixcr by milaboratory.
the class TargetMerger method merge.
@SuppressWarnings("unchecked")
public AlignedTarget merge(AlignedTarget targetLeft, AlignedTarget targetRight, int offset, OverlapType overlapType, int nMismatches) {
if (offset < 0)
return merge(targetRight, targetLeft, -offset, overlapType, nMismatches);
final NSequenceWithQuality mergedTarget = merger.overlap(targetLeft.getTarget(), targetRight.getTarget(), offset);
EnumMap<GeneType, VDJCHit[]> result = new EnumMap<>(GeneType.class);
for (GeneType geneType : GeneType.VJC_REFERENCE) {
final BatchAlignerWithBaseParameters bp = ((KGeneAlignmentParameters) alignerParameters.getGeneAlignerParameters(geneType)).getParameters();
final VDJCHit[] leftHits = targetLeft.getAlignments().getHits(geneType);
final VDJCHit[] rightHits = targetRight.getAlignments().getHits(geneType);
GeneFeature alignedFeature = leftHits.length == 0 ? rightHits.length == 0 ? null : rightHits[0].getAlignedFeature() : leftHits[0].getAlignedFeature();
Map<VDJCGeneId, HitMappingRecord> map = extractHitsMapping(targetLeft, targetRight, geneType);
ArrayList<VDJCHit> resultingHits = new ArrayList<>();
for (Map.Entry<VDJCGeneId, HitMappingRecord> mE : map.entrySet()) {
final VDJCGene gene = mE.getValue().gene;
Alignment<NucleotideSequence> mergedAl = merge(bp.getScoring(), extractBandedWidth(bp), mergedTarget.getSequence(), offset, mE.getValue().alignments[0], mE.getValue().alignments[1]);
resultingHits.add(new VDJCHit(gene, mergedAl, alignedFeature));
}
Collections.sort(resultingHits);
// final float relativeMinScore = extractRelativeMinScore(bp);
// int threshold = (int) (resultingHits.size() > 0 ? resultingHits.get(0).getScore() * relativeMinScore : 0);
// for (int i = resultingHits.size() - 1; i > 0; --i)
// if (resultingHits.get(i).getScore() < threshold)
// resultingHits.remove(i);
result.put(geneType, resultingHits.toArray(new VDJCHit[resultingHits.size()]));
}
VDJCAlignments alignments = new VDJCAlignments(result, new NSequenceWithQuality[] { mergedTarget }, new SequenceHistory[] { new SequenceHistory.Merge(overlapType, targetLeft.getHistory(), targetRight.getHistory(), offset, nMismatches) }, VDJCAlignments.mergeOriginalReads(targetLeft.getAlignments(), targetRight.getAlignments()));
AlignedTarget resultTarget = new AlignedTarget(alignments, 0);
for (BPoint bPoint : BPoint.values()) {
int leftPoint = targetLeft.getBPoint(bPoint);
int rightPoint = targetRight.getBPoint(bPoint);
if (leftPoint != -1 && rightPoint != -1)
throw new IllegalArgumentException("Same bPoint defined in both input targets.");
else if (leftPoint != -1)
resultTarget = resultTarget.setBPoint(bPoint, leftPoint);
else if (rightPoint != -1)
resultTarget = resultTarget.setBPoint(bPoint, offset + rightPoint);
}
return resultTarget;
}
Aggregations