use of com.milaboratory.core.sequence.AminoAcidSequence in project mixcr by milaboratory.
the class VDJCAlignmentsFormatter method drawAASequence.
public static void drawAASequence(MultiAlignmentHelper helper, SequencePartitioning partitioning, NucleotideSequence target) {
List<RangeTranslationParameters> trParams = partitioning.getTranslationParameters(target.size());
char[] line = new char[helper.size()];
Arrays.fill(line, ' ');
for (RangeTranslationParameters trParam : trParams) {
NucleotideSequence mainSequence = target.getRange(trParam.range);
NucleotideSequence leftover = trParam.codonLeftoverRange == null ? null : target.getRange(trParam.codonLeftoverRange);
NucleotideSequence bigSeq = leftover == null ? mainSequence : trParam.leftIncompleteCodonRange() != null ? leftover.concatenate(mainSequence) : mainSequence.concatenate(leftover);
AminoAcidSequence aa = AminoAcidSequence.translate(bigSeq, trParam.translationParameters);
int aaPosition = 0;
int ntPosition = trParam.range.getFrom() + AminoAcidSequence.convertAAPositionToNt(aaPosition, mainSequence.size(), trParam.translationParameters);
if (aa.codeAt(aaPosition) == AminoAcidAlphabet.INCOMPLETE_CODON) {
line[helper.subjectToAlignmentPosition(ntPosition)] = // '_'
AminoAcidSequence.ALPHABET.codeToSymbol(aa.codeAt(aaPosition));
++aaPosition;
}
while (aaPosition < aa.size() && aa.codeAt(aaPosition) != AminoAcidAlphabet.INCOMPLETE_CODON) {
ntPosition = trParam.range.getFrom() + AminoAcidSequence.convertAAPositionToNt(aaPosition, mainSequence.size(), trParam.translationParameters);
line[helper.subjectToAlignmentPosition(ntPosition + 1)] = AminoAcidSequence.ALPHABET.codeToSymbol(aa.codeAt(aaPosition));
++aaPosition;
}
if (aaPosition < aa.size() && aa.codeAt(aaPosition) == AminoAcidAlphabet.INCOMPLETE_CODON) {
ntPosition = trParam.range.getFrom() + AminoAcidSequence.convertAAPositionToNt(aaPosition, mainSequence.size(), trParam.translationParameters);
line[helper.subjectToAlignmentPosition(ntPosition)] = AminoAcidSequence.ALPHABET.codeToSymbol(aa.codeAt(aaPosition));
}
}
helper.addAnnotationString("", new String(line));
}
use of com.milaboratory.core.sequence.AminoAcidSequence in project mixcr by milaboratory.
the class BackwardCompatibilityTests method assertGoodVDJCA.
public static void assertGoodVDJCA(String resource, int size) throws IOException {
try (VDJCAlignmentsReader reader = new VDJCAlignmentsReader(BackwardCompatibilityTests.class.getResource(resource).getFile())) {
int countGood = 0;
for (VDJCAlignments vdjcAlignments : CUtils.it(reader)) {
NSequenceWithQuality cdr3NQ = vdjcAlignments.getFeature(GeneFeature.CDR3);
if (cdr3NQ == null)
continue;
AminoAcidSequence aaCDR3 = AminoAcidSequence.translateFromCenter(cdr3NQ.getSequence());
if (aaCDR3.codeAt(0) == AminoAcidAlphabet.C && aaCDR3.codeAt(aaCDR3.size() - 1) == AminoAcidAlphabet.F)
++countGood;
}
Assert.assertEquals(size, countGood);
}
}
use of com.milaboratory.core.sequence.AminoAcidSequence in project mixcr by milaboratory.
the class FieldExtractors method getFields.
public static synchronized Field[] getFields() {
if (descriptors == null) {
List<Field> descriptorsList = new ArrayList<>();
// Number of targets
descriptorsList.add(new PL_O("-targets", "Export number of targets", "Number of targets", "numberOfTargets") {
@Override
protected String extract(VDJCObject object) {
return Integer.toString(object.numberOfTargets());
}
});
// Best hits
for (final GeneType type : GeneType.values()) {
char l = type.getLetter();
descriptorsList.add(new PL_O("-" + Character.toLowerCase(l) + "Hit", "Export best " + l + " hit", "Best " + l + " hit", "best" + l + "Hit") {
@Override
protected String extract(VDJCObject object) {
VDJCHit bestHit = object.getBestHit(type);
if (bestHit == null)
return NULL;
return bestHit.getGene().getName();
}
});
}
// Best gene
for (final GeneType type : GeneType.values()) {
char l = type.getLetter();
descriptorsList.add(new PL_O("-" + Character.toLowerCase(l) + "Gene", "Export best " + l + " hit gene name (e.g. TRBV12-3 for TRBV12-3*00)", "Best " + l + " gene", "best" + l + "Gene") {
@Override
protected String extract(VDJCObject object) {
VDJCHit bestHit = object.getBestHit(type);
if (bestHit == null)
return NULL;
return bestHit.getGene().getGeneName();
}
});
}
// Best family
for (final GeneType type : GeneType.values()) {
char l = type.getLetter();
descriptorsList.add(new PL_O("-" + Character.toLowerCase(l) + "Family", "Export best " + l + " hit family name (e.g. TRBV12 for TRBV12-3*00)", "Best " + l + " family", "best" + l + "Family") {
@Override
protected String extract(VDJCObject object) {
VDJCHit bestHit = object.getBestHit(type);
if (bestHit == null)
return NULL;
return bestHit.getGene().getFamilyName();
}
});
}
// Best hit score
for (final GeneType type : GeneType.values()) {
char l = type.getLetter();
descriptorsList.add(new PL_O("-" + Character.toLowerCase(l) + "HitScore", "Export score for best " + l + " hit", "Best " + l + " hit score", "best" + l + "HitScore") {
@Override
protected String extract(VDJCObject object) {
VDJCHit bestHit = object.getBestHit(type);
if (bestHit == null)
return NULL;
return String.valueOf(bestHit.getScore());
}
});
}
// All hits
for (final GeneType type : GeneType.values()) {
char l = type.getLetter();
descriptorsList.add(new PL_O("-" + Character.toLowerCase(l) + "HitsWithScore", "Export all " + l + " hits with score", "All " + l + " hits", "all" + l + "HitsWithScore") {
@Override
protected String extract(VDJCObject object) {
VDJCHit[] hits = object.getHits(type);
if (hits.length == 0)
return "";
StringBuilder sb = new StringBuilder();
for (int i = 0; ; i++) {
sb.append(hits[i].getGene().getName()).append("(").append(SCORE_FORMAT.format(hits[i].getScore())).append(")");
if (i == hits.length - 1)
break;
sb.append(",");
}
return sb.toString();
}
});
}
// All hits without score
for (final GeneType type : GeneType.values()) {
char l = type.getLetter();
descriptorsList.add(new PL_O("-" + Character.toLowerCase(l) + "Hits", "Export all " + l + " hits", "All " + l + " Hits", "all" + l + "Hits") {
@Override
protected String extract(VDJCObject object) {
VDJCHit[] hits = object.getHits(type);
if (hits.length == 0)
return "";
StringBuilder sb = new StringBuilder();
for (int i = 0; ; i++) {
sb.append(hits[i].getGene().getName());
if (i == hits.length - 1)
break;
sb.append(",");
}
return sb.toString();
}
});
}
// All gene names
for (final GeneType type : GeneType.values()) {
char l = type.getLetter();
descriptorsList.add(new StringExtractor("-" + Character.toLowerCase(l) + "Genes", "Export all " + l + " gene names (e.g. TRBV12-3 for TRBV12-3*00)", "All " + l + " genes", "all" + l + "Genes", type) {
@Override
String extractStringForHit(VDJCHit hit) {
return hit.getGene().getGeneName();
}
});
}
// All families
for (final GeneType type : GeneType.values()) {
char l = type.getLetter();
descriptorsList.add(new StringExtractor("-" + Character.toLowerCase(l) + "Families", "Export all " + l + " gene family anmes (e.g. TRBV12 for TRBV12-3*00)", "All " + l + " families", "all" + l + "Families", type) {
@Override
String extractStringForHit(VDJCHit hit) {
return hit.getGene().getFamilyName();
}
});
}
// Best alignment
for (final GeneType type : GeneType.values()) {
char l = type.getLetter();
descriptorsList.add(new PL_O("-" + Character.toLowerCase(l) + "Alignment", "Export best " + l + " alignment", "Best " + l + " alignment", "best" + l + "Alignment") {
@Override
protected String extract(VDJCObject object) {
VDJCHit bestHit = object.getBestHit(type);
if (bestHit == null)
return NULL;
StringBuilder sb = new StringBuilder();
for (int i = 0; ; i++) {
Alignment<NucleotideSequence> alignment = bestHit.getAlignment(i);
if (alignment == null)
sb.append(NULL);
else
sb.append(alignment.toCompactString());
if (i == object.numberOfTargets() - 1)
break;
sb.append(",");
}
return sb.toString();
}
});
}
// All alignments
for (final GeneType type : GeneType.values()) {
char l = type.getLetter();
descriptorsList.add(new PL_O("-" + Character.toLowerCase(l) + "Alignments", "Export all " + l + " alignments", "All " + l + " alignments", "all" + l + "Alignments") {
@Override
protected String extract(VDJCObject object) {
VDJCHit[] hits = object.getHits(type);
if (hits.length == 0)
return "";
StringBuilder sb = new StringBuilder();
for (int j = 0; ; ++j) {
for (int i = 0; ; i++) {
Alignment<NucleotideSequence> alignment = hits[j].getAlignment(i);
if (alignment == null)
sb.append(NULL);
else
sb.append(alignment.toCompactString());
if (i == object.numberOfTargets() - 1)
break;
sb.append(',');
}
if (j == hits.length - 1)
break;
sb.append(';');
}
return sb.toString();
}
});
}
descriptorsList.add(new FeatureExtractors.NSeqExtractor("-nFeature", "Export nucleotide sequence of specified gene feature", "N. Seq. ", "nSeq") {
@Override
public String convert(NSequenceWithQuality seq) {
return seq.getSequence().toString();
}
});
descriptorsList.add(new FeatureExtractors.NSeqExtractor("-qFeature", "Export quality string of specified gene feature", "Qual. ", "qual") {
@Override
public String convert(NSequenceWithQuality seq) {
return seq.getQuality().toString();
}
});
descriptorsList.add(new FeatureExtractors.WithHeader("-aaFeature", "Export amino acid sequence of specified gene feature", 1, new String[] { "AA. Seq. " }, new String[] { "aaSeq" }) {
@Override
protected String extractValue(VDJCObject object, GeneFeature[] parameters) {
GeneFeature geneFeature = parameters[parameters.length - 1];
NSequenceWithQuality feature = object.getFeature(geneFeature);
if (feature == null)
return NULL;
int targetId = object.getTargetContainingFeature(geneFeature);
TranslationParameters tr = targetId == -1 ? TranslationParameters.FromLeftWithIncompleteCodon : object.getPartitionedTarget(targetId).getPartitioning().getTranslationParameters(geneFeature);
if (tr == null)
return NULL;
return AminoAcidSequence.translate(feature.getSequence(), tr).toString();
}
});
// descriptorsList.add(new FeatureExtractorDescriptor("-aaFeatureFromLeft", "Export amino acid sequence of " +
// "specified gene feature starting from the leftmost nucleotide (differs from -aaFeature only for " +
// "sequences which length are not multiple of 3)", "AA. Seq.", "aaSeq") {
// @Override
// public String convert(NSequenceWithQuality seq) {
// return AminoAcidSequence.translate(seq.getSequence(), FromLeftWithoutIncompleteCodon).toString();
// }
// });
//
// descriptorsList.add(new FeatureExtractorDescriptor("-aaFeatureFromRight", "Export amino acid sequence of " +
// "specified gene feature starting from the rightmost nucleotide (differs from -aaFeature only for " +
// "sequences which length are not multiple of 3)", "AA. Seq.", "aaSeq") {
// @Override
// public String convert(NSequenceWithQuality seq) {
// return AminoAcidSequence.translate(seq.getSequence(), FromRightWithoutIncompleteCodon).toString();
// }
// });
descriptorsList.add(new FeatureExtractors.NSeqExtractor("-minFeatureQuality", "Export minimal quality of specified gene feature", "Min. qual. ", "minQual") {
@Override
public String convert(NSequenceWithQuality seq) {
return "" + seq.getQuality().minValue();
}
});
descriptorsList.add(new FeatureExtractors.NSeqExtractor("-avrgFeatureQuality", "Export average quality of specified gene feature", "Mean. qual. ", "meanQual") {
@Override
public String convert(NSequenceWithQuality seq) {
return "" + seq.getQuality().meanValue();
}
});
descriptorsList.add(new FeatureExtractors.NSeqExtractor("-lengthOf", "Exports length of specified gene feature.", "Length of ", "lengthOf") {
@Override
public String convert(NSequenceWithQuality seq) {
return "" + seq.size();
}
});
descriptorsList.add(new FeatureExtractors.MutationsExtractor("-nMutations", "Extract nucleotide mutations for specific gene feature; relative to germline sequence.", 1, new String[] { "N. Mutations in " }, new String[] { "nMutations" }) {
@Override
String convert(Mutations<NucleotideSequence> mutations, NucleotideSequence seq1, NucleotideSequence seq2, TranslationParameters tr) {
return mutations.encode(",");
}
});
descriptorsList.add(new FeatureExtractors.MutationsExtractor("-nMutationsRelative", "Extract nucleotide mutations for specific gene feature relative to another feature.", 2, new String[] { "N. Mutations in ", " relative to " }, new String[] { "nMutationsIn", "Relative" }) {
@Override
String convert(Mutations<NucleotideSequence> mutations, NucleotideSequence seq1, NucleotideSequence seq2, TranslationParameters tr) {
return mutations.encode(",");
}
});
final class AAMutations extends FeatureExtractors.MutationsExtractor {
AAMutations(String command, String description, int nArgs, String[] hPrefix, String[] sPrefix) {
super(command, description, nArgs, hPrefix, sPrefix);
}
@Override
String convert(Mutations<NucleotideSequence> mutations, NucleotideSequence seq1, NucleotideSequence seq2, TranslationParameters tr) {
if (tr == null)
return "-";
Mutations<AminoAcidSequence> aaMuts = MutationsUtil.nt2aa(seq1, mutations, tr);
if (aaMuts == null)
return "-";
return aaMuts.encode(",");
}
}
descriptorsList.add(new AAMutations("-aaMutations", "Extract amino acid mutations for specific gene feature", 1, new String[] { "AA. Mutations in " }, new String[] { "aaMutations" }));
descriptorsList.add(new AAMutations("-aaMutationsRelative", "Extract amino acid mutations for specific gene feature relative to another feature.", 2, new String[] { "AA. Mutations in ", " relative to " }, new String[] { "aaMutationsIn", "Relative" }));
final class MutationsDetailed extends FeatureExtractors.MutationsExtractor {
MutationsDetailed(String command, String description, int nArgs, String[] hPrefix, String[] sPrefix) {
super(command, description, nArgs, hPrefix, sPrefix);
}
@Override
String convert(Mutations<NucleotideSequence> mutations, NucleotideSequence seq1, NucleotideSequence seq2, TranslationParameters tr) {
if (tr == null)
return "-";
MutationsUtil.MutationNt2AADescriptor[] descriptors = MutationsUtil.nt2aaDetailed(seq1, mutations, tr, 10);
if (descriptors == null)
return "-";
StringBuilder sb = new StringBuilder();
for (int i = 0; i < descriptors.length; i++) {
sb.append(descriptors[i]);
if (i == descriptors.length - 1)
break;
sb.append(",");
}
return sb.toString();
}
}
String detailedMutationsFormat = "Format <nt_mutation>:<aa_mutation_individual>:<aa_mutation_cumulative>, where <aa_mutation_individual> is an expected amino acid " + "mutation given no other mutations have occurred, and <aa_mutation_cumulative> amino acid mutation is the observed amino acid " + "mutation combining effect from all other. WARNING: format may change in following versions.";
descriptorsList.add(new MutationsDetailed("-mutationsDetailed", "Detailed list of nucleotide and corresponding amino acid mutations. " + detailedMutationsFormat, 1, new String[] { "Detailed mutations in " }, new String[] { "mutationsDetailedIn" }));
descriptorsList.add(new MutationsDetailed("-mutationsDetailedRelative", "Detailed list of nucleotide and corresponding amino acid mutations written, positions relative to specified gene feature. " + detailedMutationsFormat, 2, new String[] { "Detailed mutations in ", " relative to " }, new String[] { "mutationsDetailedIn", "Relative" }));
descriptorsList.add(new ExtractReferencePointPosition());
descriptorsList.add(new ExtractDefaultReferencePointsPositions());
descriptorsList.add(new PL_A("-readId", "Export id of read corresponding to alignment", "Read id", "readId") {
@Override
protected String extract(VDJCAlignments object) {
return "" + object.getMinReadId();
}
@Override
public FieldExtractor<VDJCAlignments> create(OutputMode outputMode, String[] args) {
System.out.println("WARNING: -readId is deprecated. Use -readIds");
return super.create(outputMode, args);
}
});
descriptorsList.add(new PL_A("-readIds", "Export id of read corresponding to alignment", "Read id", "readId") {
@Override
protected String extract(VDJCAlignments object) {
long[] readIds = object.getReadIds();
StringBuilder sb = new StringBuilder();
for (int i = 0; ; i++) {
sb.append(readIds[i]);
if (i == readIds.length - 1)
return sb.toString();
sb.append(",");
}
}
});
descriptorsList.add(new ExtractSequence(VDJCAlignments.class, "-sequence", "Export aligned sequence (initial read), or 2 sequences in case of paired-end reads", "Read(s) sequence", "readSequence"));
descriptorsList.add(new ExtractSequenceQuality(VDJCAlignments.class, "-quality", "Export initial read quality, or 2 qualities in case of paired-end reads", "Read(s) sequence qualities", "readQuality"));
descriptorsList.add(new PL_C("-cloneId", "Unique clone identifier", "Clone ID", "cloneId") {
@Override
protected String extract(Clone object) {
return "" + object.getId();
}
});
descriptorsList.add(new PL_C("-count", "Export clone count", "Clone count", "cloneCount") {
@Override
protected String extract(Clone object) {
return "" + object.getCount();
}
});
descriptorsList.add(new PL_C("-fraction", "Export clone fraction", "Clone fraction", "cloneFraction") {
@Override
protected String extract(Clone object) {
return "" + object.getFraction();
}
});
descriptorsList.add(new ExtractSequence(Clone.class, "-sequence", "Export aligned sequence (initial read), or 2 sequences in case of paired-end reads", "Clonal sequence(s)", "clonalSequence"));
descriptorsList.add(new ExtractSequenceQuality(Clone.class, "-quality", "Export initial read quality, or 2 qualities in case of paired-end reads", "Clonal sequence quality(s)", "clonalSequenceQuality"));
descriptorsList.add(new PL_A("-descrR1", "Export description line from initial .fasta or .fastq file " + "of the first read (only available if --save-description was used in align command)", "Description R1", "descrR1") {
@Override
protected String extract(VDJCAlignments object) {
List<SequenceRead> reads = object.getOriginalReads();
if (reads == null)
throw new IllegalArgumentException("Error for option \'-descrR1\':\n" + "No description available for read: either re-run align action with -OsaveOriginalReads=true option " + "or don't use \'-descrR1\' in exportAlignments");
return reads.get(0).getRead(0).getDescription();
}
@Override
public FieldExtractor<VDJCAlignments> create(OutputMode outputMode, String[] args) {
System.out.println("WARNING: -descrR1 is deprecated. Use -descrsR1");
return super.create(outputMode, args);
}
});
descriptorsList.add(new PL_A("-descrR2", "Export description line from initial .fasta or .fastq file " + "of the second read (only available if --save-description was used in align command)", "Description R2", "descrR2") {
@Override
protected String extract(VDJCAlignments object) {
List<SequenceRead> reads = object.getOriginalReads();
if (reads == null)
throw new IllegalArgumentException("Error for option \'-descrR1\':\n" + "No description available for read: either re-run align action with -OsaveOriginalReads=true option " + "or don't use \'-descrR1\' in exportAlignments");
SequenceRead read = reads.get(0);
if (read.numberOfReads() < 2)
throw new IllegalArgumentException("Error for option \'-descrR2\':\n" + "No description available for second read: your input data was single-end");
return read.getRead(1).getDescription();
}
@Override
public FieldExtractor<VDJCAlignments> create(OutputMode outputMode, String[] args) {
System.out.println("WARNING: -descrR2 is deprecated. Use -descrsR2");
return super.create(outputMode, args);
}
});
descriptorsList.add(new PL_A("-descrsR1", "Export description lines from initial .fasta or .fastq file " + "of the first reads (only available if -OsaveOriginalReads=true was used in align command)", "Descriptions R1", "descrsR1") {
@Override
protected String extract(VDJCAlignments object) {
List<SequenceRead> reads = object.getOriginalReads();
if (reads == null)
throw new IllegalArgumentException("Error for option \'-descrR1\':\n" + "No description available for read: either re-run align action with -OsaveOriginalReads option " + "or don't use \'-descrR1\' in exportAlignments");
StringBuilder sb = new StringBuilder();
for (int i = 0; ; i++) {
sb.append(reads.get(i).getRead(0).getDescription());
if (i == reads.size() - 1)
return sb.toString();
sb.append(",");
}
}
});
descriptorsList.add(new PL_A("-descrsR2", "Export description lines from initial .fasta or .fastq file " + "of the second reads (only available if -OsaveOriginalReads=true was used in align command)", "Descriptions R2", "descrsR2") {
@Override
protected String extract(VDJCAlignments object) {
List<SequenceRead> reads = object.getOriginalReads();
if (reads == null)
throw new IllegalArgumentException("Error for option \'-descrR1\':\n" + "No description available for read: either re-run align action with -OsaveOriginalReads option " + "or don't use \'-descrR1\' in exportAlignments");
StringBuilder sb = new StringBuilder();
for (int i = 0; ; i++) {
SequenceRead read = reads.get(i);
if (read.numberOfReads() < 2)
throw new IllegalArgumentException("Error for option \'-descrsR2\':\n" + "No description available for second read: your input data was single-end");
sb.append(read.getRead(1).getDescription());
if (i == reads.size() - 1)
return sb.toString();
sb.append(",");
}
}
});
descriptorsList.add(new PL_A("-readHistory", "Export read history", "Read history", "readHistory") {
@Override
protected String extract(VDJCAlignments object) {
try {
return GlobalObjectMappers.toOneLine(object.getHistory());
} catch (JsonProcessingException ex) {
throw new RuntimeException(ex);
}
}
});
for (final GeneType type : GeneType.values()) {
String c = Character.toLowerCase(type.getLetter()) + "IdentityPercents";
descriptorsList.add(new PL_O("-" + c, type.getLetter() + " alignment identity percents", type.getLetter() + " alignment identity percents", c) {
@Override
protected String extract(VDJCObject object) {
VDJCHit[] hits = object.getHits(type);
if (hits == null)
return NULL;
StringBuilder sb = new StringBuilder();
sb.append("");
for (int i = 0; ; i++) {
sb.append(hits[i].getIdentity());
if (i == hits.length - 1)
return sb.toString();
sb.append(",");
}
}
});
}
for (final GeneType type : GeneType.values()) {
String c = Character.toLowerCase(type.getLetter()) + "BestIdentityPercent";
descriptorsList.add(new PL_O("-" + c, type.getLetter() + "best alignment identity percent", type.getLetter() + "best alignment identity percent", c) {
@Override
protected String extract(VDJCObject object) {
VDJCHit hit = object.getBestHit(type);
if (hit == null)
return NULL;
return Float.toString(hit.getIdentity());
}
});
}
descriptorsList.add(new PL_O("-chains", "Chains", "Chains", "Chains") {
@Override
protected String extract(VDJCObject object) {
return object.commonChains().toString();
}
});
descriptorsList.add(new PL_O("-topChains", "Top chains", "Top chains", "topChains") {
@Override
protected String extract(VDJCObject object) {
return object.commonTopChains().toString();
}
});
descriptors = descriptorsList.toArray(new Field[descriptorsList.size()]);
}
return descriptors;
}
use of com.milaboratory.core.sequence.AminoAcidSequence in project repseqio by repseqio.
the class InferAnchorPointsAction method go.
public void go(GeneFeature geneFeature, String inputFile, String outputFile) throws Exception {
VDJCLibraryRegistry.resetDefaultRegistry();
VDJCLibraryRegistry reg = VDJCLibraryRegistry.getDefault();
// Map of library names
Map<String, String> libraryNameToAddress = new HashMap<>();
// Registering reference library
int i = 0;
for (String refAddress : params.getReference()) {
String name = REFERENCE_LIBRARY_PREFIX + (i++);
reg.registerLibraries(refAddress, name);
libraryNameToAddress.put(name, refAddress);
}
if (params.getReference().isEmpty()) {
reg.loadAllLibraries("default");
for (VDJCLibrary library : reg.getLoadedLibraries()) libraryNameToAddress.put(library.getName(), "built-in");
}
// Registering target library
reg.registerLibraries(inputFile, TARGET_LIBRARY_NAME);
// Compile gene filter
Pattern namePattern = params.name == null ? null : Pattern.compile(params.name);
List<VDJCLibrary> refLibraries = reg.getLoadedLibrariesByNamePattern(REFERENCE_LIBRARY_PATTERN);
SimpleBatchAlignerParameters<AminoAcidSequence> aParams = new SimpleBatchAlignerParameters<>(5, 0.4f, params.getAbsoluteMinScore(geneFeature), true, AffineGapAlignmentScoring.getAminoAcidBLASTScoring(BLASTMatrix.BLOSUM62, -10, -1));
SimpleBatchAligner<AminoAcidSequence, Ref> aligner = new SimpleBatchAligner<>(aParams);
int dbSize = 0;
for (VDJCLibrary lib : refLibraries) {
for (VDJCGene gene : lib.getGenes()) {
NucleotideSequence nSeq = gene.getFeature(geneFeature);
if (nSeq == null)
continue;
ReferencePoint frameReference = GeneFeature.getFrameReference(geneFeature);
ReferencePoints partitioning = gene.getPartitioning();
if (frameReference == null)
continue;
int relativePosition = partitioning.getRelativePosition(geneFeature, frameReference);
if (relativePosition < 0)
continue;
TranslationParameters frame = withIncompleteCodon(relativePosition);
AminoAcidSequence aaSequence = AminoAcidSequence.translate(nSeq, frame);
aligner.addReference(aaSequence, new Ref(gene, frame, nSeq.size()));
++dbSize;
}
}
System.out.println("DB size: " + dbSize);
System.out.println();
// Checking that db is not empty
if (dbSize == 0)
throw new RuntimeException("No reference genes.");
ArrayList<VDJCLibraryData> result = new ArrayList<>();
ByteArrayOutputStream bos = new ByteArrayOutputStream();
PrintStream bufferPS = new PrintStream(bos);
// Iteration over target genes
for (VDJCLibrary lib : reg.getLoadedLibrariesByName(TARGET_LIBRARY_NAME)) {
ArrayList<VDJCGeneData> genes = new ArrayList<>();
for (VDJCGene targetGene : lib.getGenes()) {
bos.reset();
PrintStream ps = params.outputOnlyModified() ? bufferPS : System.out;
if (namePattern != null && !namePattern.matcher(targetGene.getName()).matches()) {
if (!params.outputOnlyModified())
genes.add(targetGene.getData());
continue;
}
ps.println("Processing: " + targetGene.getName() + " (" + (targetGene.isFunctional() ? "F" : "P") + ") " + targetGene.getChains());
// Getting gene feature sequence from target gene
NucleotideSequence nSeq = targetGene.getFeature(geneFeature);
if (nSeq == null) {
ps.println("Failed to extract " + GeneFeature.encode(geneFeature));
ps.println("================");
ps.println();
if (!params.outputOnlyModified())
genes.add(targetGene.getData());
continue;
}
// Alignment result
AlignmentResult<AlignmentHit<AminoAcidSequence, Ref>> bestAResult = null;
TranslationParameters bestFrame = null;
// Searching for best alignment
for (TranslationParameters frame : TRANSLATION_PARAMETERS) {
AminoAcidSequence aaSeq = AminoAcidSequence.translate(nSeq, frame);
AlignmentResult<AlignmentHit<AminoAcidSequence, Ref>> r = aligner.align(aaSeq);
if (r != null && r.hasHits() && (bestAResult == null || bestAResult.getBestHit().getAlignment().getScore() < r.getBestHit().getAlignment().getScore())) {
bestAResult = r;
bestFrame = frame;
}
}
if (bestFrame == null) {
ps.println("No alignments found.");
if (!params.outputOnlyModified())
genes.add(targetGene.getData());
continue;
}
List<AlignmentHit<AminoAcidSequence, Ref>> hits = bestAResult.getHits();
VDJCGeneData targetGeneData = targetGene.getData().clone();
boolean anyPointChanged = false;
for (int ai = 0; ai < hits.size(); ai++) {
// Accumulate output
ByteArrayOutputStream localBos = new ByteArrayOutputStream();
PrintStream localPS = new PrintStream(localBos);
Alignment<AminoAcidSequence> bestAlignment = hits.get(ai).getAlignment();
Ref bestRef = hits.get(ai).getRecordPayload();
VDJCGene bestReferenceGene = bestRef.gene;
localPS.println("Aligned with " + bestReferenceGene.getName() + " from " + libraryNameToAddress.get(bestReferenceGene.getParentLibrary().getName()) + " ; Score = " + bestAlignment.getScore());
AlignmentHelper alignmentHelper = bestAlignment.getAlignmentHelper();
for (AlignmentHelper h : alignmentHelper.split(150)) localPS.println(h + "\n");
ReferencePoints targetPartitioning = targetGene.getPartitioning();
ReferencePoints referencePartitioning = bestReferenceGene.getPartitioning();
for (GeneFeature.ReferenceRange range : geneFeature) for (ReferencePoint point : range.getIntermediatePoints()) {
localPS.print(point + ": ");
boolean isAvailable = targetPartitioning.isAvailable(point);
if (isAvailable) {
localPS.println("already set");
}
if (!referencePartitioning.isAvailable(point)) {
if (!isAvailable)
localPS.println("not set in reference gene");
continue;
}
int ntPositionInReference = referencePartitioning.getRelativePosition(geneFeature, point);
// Projecting position
AminoAcidSequence.AminoAcidSequencePosition aaPositionInReferece = AminoAcidSequence.convertNtPositionToAA(ntPositionInReference, bestRef.ntSeqLength, bestRef.frame);
if (aaPositionInReferece == null) {
if (!isAvailable)
localPS.println("failed to convert to aa position in ref");
continue;
}
int aaPositionInTarget = Alignment.aabs(bestAlignment.convertToSeq2Position(aaPositionInReferece.aminoAcidPosition));
if (aaPositionInTarget == -1) {
if (!isAvailable)
localPS.println("failed to project using alignment");
continue;
}
int ntPositionInTarget = AminoAcidSequence.convertAAPositionToNt(aaPositionInTarget, nSeq.size(), bestFrame);
if (ntPositionInTarget == -1) {
if (!isAvailable)
localPS.println("failed");
continue;
}
ntPositionInTarget += aaPositionInReferece.positionInTriplet;
ntPositionInTarget = targetPartitioning.getAbsolutePosition(geneFeature, ntPositionInTarget);
if (ntPositionInTarget == -1) {
if (!isAvailable)
localPS.println("failed");
continue;
}
if (isAvailable) {
int existingPosition = targetPartitioning.getPosition(point);
if (existingPosition != ntPositionInTarget) {
localPS.println("inferred position differs from existing value. existing: " + existingPosition + ", inferred: " + ntPositionInTarget);
}
continue;
}
localPS.println(ntPositionInTarget);
targetGeneData.getAnchorPoints().put(point, (long) ntPositionInTarget);
anyPointChanged = true;
}
if (!anyPointChanged) {
if (!params.outputOnlyModified() && ai == 0)
ps.write(localBos.toByteArray());
} else {
ps.write(localBos.toByteArray());
break;
}
}
ps.println("================");
ps.println();
if (anyPointChanged && params.outputOnlyModified())
System.out.write(bos.toByteArray());
genes.add(targetGeneData);
}
result.add(new VDJCLibraryData(lib.getData(), genes));
}
VDJCDataUtils.writeToFile(result, outputFile, false);
}
use of com.milaboratory.core.sequence.AminoAcidSequence in project repseqio by repseqio.
the class DebugAction method go.
@Override
public void go(ActionHelper helper) throws Exception {
VDJCLibraryRegistry reg = VDJCLibraryRegistry.getDefault();
reg.registerLibraries(params.getInput());
Pattern namePattern = params.name == null ? null : Pattern.compile(params.name);
GeneFeature cdr3FirstTriplet = new GeneFeature(ReferencePoint.CDR3Begin, 0, 3);
GeneFeature cdr3LastTriplet = new GeneFeature(ReferencePoint.CDR3End, -3, 0);
GeneFeature vIntronDonor = new GeneFeature(ReferencePoint.VIntronBegin, 0, 2);
GeneFeature vIntronAcceptor = new GeneFeature(ReferencePoint.VIntronEnd, -2, 0);
for (VDJCLibrary lib : reg.getLoadedLibraries()) {
for (VDJCGene gene : lib.getGenes()) {
if (namePattern != null && !namePattern.matcher(gene.getName()).matches())
continue;
// First generate list of warning messages
List<String> warnings = new ArrayList<>();
if (gene.isFunctional() || params.getCheckAll()) {
NucleotideSequence l3;
switch(gene.getGeneType()) {
case Variable:
// Flag AA residues flanking CDR3
l3 = gene.getFeature(cdr3FirstTriplet);
if (l3 == null)
warnings.add("Unable to find CDR3 start");
else if (l3.size() != 3)
warnings.add("Unable to translate sequence: " + gene.getName() + " / " + l3);
else if (AminoAcidSequence.translate(l3).codeAt(0) != AminoAcidAlphabet.C)
warnings.add("CDR3 does not start with C, was: " + l3.toString() + " / " + AminoAcidSequence.translate(l3).toString() + " / CDR3Begin: " + gene.getData().getAnchorPoints().get(ReferencePoint.CDR3Begin));
// Flag suspicious exon borders
// https://schneider.ncifcrf.gov/gallery/SequenceLogoSculpture.gif
NucleotideSequence vIntronDonorSeq = gene.getFeature(vIntronDonor);
if (vIntronDonorSeq != null && !vIntronDonorSeq.toString().equals("GT") && !vIntronDonorSeq.toString().equals("GC"))
warnings.add("Expected VIntron sequence to start with GT, was: " + vIntronDonorSeq.toString());
NucleotideSequence vIntronAcceptorSeq = gene.getFeature(vIntronAcceptor);
if (vIntronAcceptorSeq != null && !vIntronAcceptorSeq.toString().equals("AG"))
warnings.add("Expected VIntron sequence to end with AG, was: " + vIntronAcceptorSeq.toString());
ReferencePoints partitioning = gene.getPartitioning();
if (partitioning.isAvailable(GeneFeature.VTranscriptWithout5UTR)) {
// Iterating over all reading-frame bound anchor points of V gene
for (ReferencePoint anchorPoint : ReferencePoint.DefaultReferencePoints) {
if (anchorPoint.getGeneType() != GeneType.Variable || !anchorPoint.isTripletBoundary())
continue;
// And checking that they are in the same frame as Start (L1Begin)
int relativePosition = partitioning.getRelativePosition(GeneFeature.VTranscriptWithout5UTR, anchorPoint);
if (// Point is defined
relativePosition >= 0 && relativePosition % 3 != 0)
warnings.add("Expected " + anchorPoint + " to have position dividable by three inside VTranscriptWithout5UTR. " + "This may indicate an error in the L2 boundaries.");
}
}
break;
case Joining:
// Flag AA residues flanking CDR3
l3 = gene.getFeature(cdr3LastTriplet);
if (l3 == null)
warnings.add("Unable to find CDR3 end");
else if (l3.size() != 3)
warnings.add("Unable to translate sequence: " + gene.getName() + " / " + l3);
else if (AminoAcidSequence.translate(l3).codeAt(0) != AminoAcidAlphabet.W && AminoAcidSequence.translate(l3).codeAt(0) != AminoAcidAlphabet.F)
warnings.add("CDR3 does not end with W or F, was: " + l3.toString() + " / " + AminoAcidSequence.translate(l3).toString() + " / CDR3End: " + gene.getData().getAnchorPoints().get(ReferencePoint.CDR3End));
break;
}
for (GeneFeature geneFeature : aaGeneFeatures.get(gene.getGeneType())) {
AminoAcidSequence aaSequence = getAminoAcidSequence(gene, geneFeature, gene.getFeature(geneFeature));
if (aaSequence != null) {
// Flag if contains stop codon
if (aaSequence.numberOfStops() > 0)
warnings.add(GeneFeature.encode(geneFeature) + " contains a stop codon");
}
}
}
if (params.getProblemOnly() && warnings.isEmpty())
continue;
System.out.println(gene.getName() + " (" + (gene.isFunctional() ? "F" : "P") + ") " + gene.getChains() + " : " + lib.getTaxonId());
if (!warnings.isEmpty()) {
System.out.println();
System.out.println("WARNINGS: ");
for (String warning : warnings) {
System.out.println(warning);
}
System.out.println();
}
for (GeneFeature geneFeature : geneFeatures.get(gene.getGeneType())) {
System.out.println();
System.out.println(GeneFeature.encode(geneFeature));
NucleotideSequence nSequence = gene.getFeature(geneFeature);
AminoAcidSequence aaSequence = getAminoAcidSequence(gene, geneFeature, nSequence);
System.out.print("N: ");
if (nSequence == null)
System.out.println("Not Available");
else
System.out.println(nSequence);
if (GeneFeature.getFrameReference(geneFeature) != null) {
System.out.print("AA: ");
if (aaSequence == null)
System.out.println("Not Available");
else
System.out.println(aaSequence);
}
}
System.out.println("=========");
System.out.println();
}
}
}
Aggregations