use of com.milaboratory.core.sequence.NSequenceWithQuality in project mixcr by milaboratory.
the class PartialAlignmentsAssemblerAligner method process0.
@Override
@SuppressWarnings("unchecked")
protected VDJCAlignmentResult<VDJCMultiRead> process0(VDJCMultiRead input) {
final int nReads = input.numberOfReads();
EnumMap<GeneType, VDJCHit[]> vdjcHits = new EnumMap<>(GeneType.class);
NSequenceWithQuality[] targets = new NSequenceWithQuality[nReads];
Chains currentChains = Chains.ALL;
// Across all gene types
int lastAlignedTarget = 0;
int firstJTarget = -1;
int lastVTarget = -1;
for (int g = 0; g < GeneType.VJC_REFERENCE.length; g++) {
GeneType gt = GeneType.VJC_REFERENCE[g];
AlignmentHit<NucleotideSequence, VDJCGene>[][] alignmentHits = new AlignmentHit[nReads][];
Arrays.fill(alignmentHits, new AlignmentHit[0]);
for (int targetId = lastAlignedTarget; targetId < nReads; targetId++) {
targets[targetId] = input.getRead(targetId).getData();
final NucleotideSequence sequence = input.getRead(targetId).getData().getSequence();
AlignmentResult<AlignmentHit<NucleotideSequence, VDJCGene>> als;
final BatchAlignerWithBaseWithFilter<NucleotideSequence, VDJCGene, AlignmentHit<NucleotideSequence, VDJCGene>> aligner = getAligner(gt);
if (aligner != null) {
int pointer = 0;
if (g != 0) {
// Not V gene
VDJCHit[] vdjcHits1 = vdjcHits.get(GeneType.VJC_REFERENCE[g - 1]);
Alignment<NucleotideSequence> alignment;
if (vdjcHits1.length != 0 && (alignment = vdjcHits1[0].getAlignment(targetId)) != null)
pointer = alignment.getSequence2Range().getTo();
}
als = aligner.align(sequence, pointer, sequence.size(), getFilter(gt, currentChains));
if (als != null && als.hasHits()) {
lastAlignedTarget = targetId;
if (// V
g == 0)
lastVTarget = targetId;
if (// J
g == 1)
firstJTarget = targetId;
alignmentHits[targetId] = als.getHits().toArray(new AlignmentHit[als.getHits().size()]);
}
}
}
Chains chains = Chains.EMPTY;
for (AlignmentHit<NucleotideSequence, VDJCGene>[] alignmentHit0 : alignmentHits) if (alignmentHit0 != null)
for (AlignmentHit<NucleotideSequence, VDJCGene> hit : alignmentHit0) chains = chains.merge(hit.getRecordPayload().getChains());
currentChains = currentChains.intersection(chains);
vdjcHits.put(gt, combine(parameters.getFeatureToAlign(gt), alignmentHits));
}
boolean fineVAlignmentPerformed = false, fineJAlignmentPerformed = false;
// Additional (fine) alignment step for V gene
VDJCHit[] vHits = vdjcHits.get(GeneType.Variable);
final AlignmentScoring<NucleotideSequence> vScoring = parameters.getVAlignerParameters().getParameters().getScoring();
if (// TODO implement AffineGapAlignmentScoring
vHits != null && vHits.length > 0 && !(vScoring instanceof AffineGapAlignmentScoring) && vdjcHits.get(GeneType.Joining) != null && vdjcHits.get(GeneType.Joining).length > 0) {
int minimalVSpace = getAbsoluteMinScore(parameters.getVAlignerParameters().getParameters()) / vScoring.getMaximalMatchScore();
// Assert
if (firstJTarget == -1)
throw new AssertionError();
for (int targetId = 1; targetId <= firstJTarget; targetId++) {
int vSpace;
final NucleotideSequence sequence2 = targets[targetId].getSequence();
if (vdjcHits.get(GeneType.Joining)[0].getAlignment(targetId) != null && (vSpace = vdjcHits.get(GeneType.Joining)[0].getAlignment(targetId).getSequence2Range().getFrom()) >= minimalVSpace) {
for (int vHitIndex = 0; vHitIndex < vHits.length; vHitIndex++) {
VDJCHit vHit = vHits[vHitIndex];
// Perform fine alignment only if target is not already aligned by fast aligner
if (vHit.getAlignment(targetId) != null)
continue;
Alignment<NucleotideSequence> leftAlignment = vHit.getAlignment(targetId - 1);
if (leftAlignment == null)
continue;
final NucleotideSequence sequence1 = leftAlignment.getSequence1();
final int beginFR3 = vHit.getGene().getPartitioning().getRelativePosition(parameters.getFeatureToAlign(GeneType.Variable), ReferencePoint.FR3Begin);
if (beginFR3 == -1)
continue;
final Alignment alignment = AlignerCustom.alignLinearSemiLocalLeft0((LinearGapAlignmentScoring<NucleotideSequence>) vScoring, sequence1, sequence2, beginFR3, sequence1.size() - beginFR3, 0, vSpace, false, true, NucleotideSequence.ALPHABET, linearMatrixCache.get());
if (alignment.getScore() < getAbsoluteMinScore(parameters.getVAlignerParameters().getParameters()))
continue;
fineVAlignmentPerformed = true;
vHits[vHitIndex] = vHit.setAlignment(targetId, alignment);
}
}
}
}
Arrays.sort(vHits);
vdjcHits.put(GeneType.Variable, cutRelativeScore(vHits, parameters.getVAlignerParameters().getRelativeMinScore(), parameters.getVAlignerParameters().getParameters().getMaxHits()));
// Additional (fine) alignment step for J gene
VDJCHit[] jHits = vdjcHits.get(GeneType.Joining);
final AlignmentScoring<NucleotideSequence> jScoring = parameters.getJAlignerParameters().getParameters().getScoring();
if (// TODO implement AffineGapAlignmentScoring
jHits != null && jHits.length > 0 && !(jScoring instanceof AffineGapAlignmentScoring) && vdjcHits.get(GeneType.Variable) != null && vdjcHits.get(GeneType.Variable).length > 0) {
int minimalJSpace = getAbsoluteMinScore(parameters.getJAlignerParameters().getParameters()) / jScoring.getMaximalMatchScore();
// Assert
if (lastVTarget == -1)
throw new AssertionError();
for (int targetId = lastVTarget; targetId < nReads - 1; targetId++) {
int jSpaceBegin;
final NucleotideSequence sequence2 = targets[targetId].getSequence();
if (vdjcHits.get(GeneType.Variable)[0].getAlignment(targetId) != null && (sequence2.size() - (jSpaceBegin = vdjcHits.get(GeneType.Variable)[0].getAlignment(targetId).getSequence2Range().getTo())) >= minimalJSpace) {
for (int jHitIndex = 0; jHitIndex < jHits.length; jHitIndex++) {
VDJCHit jHit = jHits[jHitIndex];
// Perform fine alignment only if target is not already aligned by fast aligner
if (jHit.getAlignment(targetId) != null)
continue;
Alignment<NucleotideSequence> rightAlignment = jHit.getAlignment(targetId + 1);
if (rightAlignment == null)
continue;
final NucleotideSequence sequence1 = rightAlignment.getSequence1();
final Alignment alignment = AlignerCustom.alignLinearSemiLocalRight0((LinearGapAlignmentScoring) jScoring, sequence1, sequence2, 0, sequence1.size(), jSpaceBegin, sequence2.size() - jSpaceBegin, false, true, NucleotideSequence.ALPHABET, linearMatrixCache.get());
if (alignment.getScore() < getAbsoluteMinScore(parameters.getJAlignerParameters().getParameters()))
continue;
fineJAlignmentPerformed = true;
jHits[jHitIndex] = jHit.setAlignment(targetId, alignment);
}
}
}
}
Arrays.sort(jHits);
vdjcHits.put(GeneType.Joining, cutRelativeScore(jHits, parameters.getJAlignerParameters().getRelativeMinScore(), parameters.getJAlignerParameters().getParameters().getMaxHits()));
int dGeneTarget = -1;
VDJCHit[] vResult = vdjcHits.get(GeneType.Variable);
VDJCHit[] jResult = vdjcHits.get(GeneType.Joining);
if (vResult.length != 0 && jResult.length != 0)
for (int i = 0; i < nReads; i++) if (vResult[0].getAlignment(i) != null && jResult[0].getAlignment(i) != null) {
dGeneTarget = i;
break;
}
// if (fineVAlignmentPerformed && fineJAlignmentPerformed)
// System.out.println("sd");
VDJCHit[] dResult;
if (dGeneTarget == -1)
dResult = new VDJCHit[0];
else {
final Alignment<NucleotideSequence> vAl = vResult[0].getAlignment(dGeneTarget);
final Alignment<NucleotideSequence> jAl = jResult[0].getAlignment(dGeneTarget);
if (vAl == null || jAl == null || singleDAligner == null)
dResult = new VDJCHit[0];
else
dResult = singleDAligner.align(targets[dGeneTarget].getSequence(), getPossibleDLoci(vResult, jResult), vAl.getSequence2Range().getTo(), jAl.getSequence2Range().getFrom(), dGeneTarget, nReads);
}
final VDJCAlignments alignment = new VDJCAlignments(vResult, dResult, jResult, cutRelativeScore(vdjcHits.get(GeneType.Constant), parameters.getCAlignerParameters().getRelativeMinScore(), parameters.getMaxHits()), targets, input.getHistory(), input.getOriginalReads());
return new VDJCAlignmentResult<>(input, alignment);
}
use of com.milaboratory.core.sequence.NSequenceWithQuality in project mixcr by milaboratory.
the class VDJCAlignerPVFirst method process0.
@Override
protected VDJCAlignmentResult<PairedRead> process0(final PairedRead input) {
Target[] targets = getTargets(input);
// Creates helper classes for each PTarget
PAlignmentHelper[] helpers = createInitialHelpers(targets);
VDJCAlignmentResult<PairedRead> result = parameters.getAllowPartialAlignments() ? processPartial(input, helpers) : processStrict(input, helpers);
// if sAligner == null (which means --no-merge option), no merge will be performed
if (result.alignment != null && sAligner != null) {
final VDJCAlignments alignment = result.alignment;
final TargetMerger.TargetMergingResult mergeResult = alignmentsMerger.merge(new AlignedTarget(alignment, 0), new AlignedTarget(alignment, 1), false);
if (mergeResult.failedDueInconsistentAlignments()) {
GeneType geneType = mergeResult.getFailedMergedGeneType();
int removeId = alignment.getBestHit(geneType).getAlignment(0).getScore() > alignment.getBestHit(geneType).getAlignment(1).getScore() ? 1 : 0;
if (listener != null)
listener.onTopHitSequenceConflict(input, alignment, geneType);
return new VDJCAlignmentResult<>(input, alignment.removeBestHitAlignment(geneType, removeId));
} else if (mergeResult.isSuccessful()) {
assert mergeResult.isUsingAlignments();
NSequenceWithQuality alignedTarget = mergeResult.getResult().getTarget();
SingleRead sRead = new SingleReadImpl(input.getId(), alignedTarget, "");
VDJCAlignmentResult<SingleRead> sResult = sAligner.process0(sRead);
if (sResult.alignment == null)
return result;
VDJCAlignments sAlignment = sResult.alignment.setHistory(new SequenceHistory[] { new SequenceHistory.Merge(SequenceHistory.OverlapType.AlignmentOverlap, result.alignment.getHistory(0), result.alignment.getHistory(1), mergeResult.getOffset(), mergeResult.getMismatched()) }, new SequenceRead[] { input });
if (listener != null)
listener.onSuccessfulAlignmentOverlap(input, sAlignment);
return new VDJCAlignmentResult<>(input, sAlignment);
}
}
return result;
}
use of com.milaboratory.core.sequence.NSequenceWithQuality in project mixcr by milaboratory.
the class FieldExtractors method getFields.
public static synchronized Field[] getFields() {
if (descriptors == null) {
List<Field> descriptorsList = new ArrayList<>();
// Number of targets
descriptorsList.add(new PL_O("-targets", "Export number of targets", "Number of targets", "numberOfTargets") {
@Override
protected String extract(VDJCObject object) {
return Integer.toString(object.numberOfTargets());
}
});
// Best hits
for (final GeneType type : GeneType.values()) {
char l = type.getLetter();
descriptorsList.add(new PL_O("-" + Character.toLowerCase(l) + "Hit", "Export best " + l + " hit", "Best " + l + " hit", "best" + l + "Hit") {
@Override
protected String extract(VDJCObject object) {
VDJCHit bestHit = object.getBestHit(type);
if (bestHit == null)
return NULL;
return bestHit.getGene().getName();
}
});
}
// Best gene
for (final GeneType type : GeneType.values()) {
char l = type.getLetter();
descriptorsList.add(new PL_O("-" + Character.toLowerCase(l) + "Gene", "Export best " + l + " hit gene name (e.g. TRBV12-3 for TRBV12-3*00)", "Best " + l + " gene", "best" + l + "Gene") {
@Override
protected String extract(VDJCObject object) {
VDJCHit bestHit = object.getBestHit(type);
if (bestHit == null)
return NULL;
return bestHit.getGene().getGeneName();
}
});
}
// Best family
for (final GeneType type : GeneType.values()) {
char l = type.getLetter();
descriptorsList.add(new PL_O("-" + Character.toLowerCase(l) + "Family", "Export best " + l + " hit family name (e.g. TRBV12 for TRBV12-3*00)", "Best " + l + " family", "best" + l + "Family") {
@Override
protected String extract(VDJCObject object) {
VDJCHit bestHit = object.getBestHit(type);
if (bestHit == null)
return NULL;
return bestHit.getGene().getFamilyName();
}
});
}
// Best hit score
for (final GeneType type : GeneType.values()) {
char l = type.getLetter();
descriptorsList.add(new PL_O("-" + Character.toLowerCase(l) + "HitScore", "Export score for best " + l + " hit", "Best " + l + " hit score", "best" + l + "HitScore") {
@Override
protected String extract(VDJCObject object) {
VDJCHit bestHit = object.getBestHit(type);
if (bestHit == null)
return NULL;
return String.valueOf(bestHit.getScore());
}
});
}
// All hits
for (final GeneType type : GeneType.values()) {
char l = type.getLetter();
descriptorsList.add(new PL_O("-" + Character.toLowerCase(l) + "HitsWithScore", "Export all " + l + " hits with score", "All " + l + " hits", "all" + l + "HitsWithScore") {
@Override
protected String extract(VDJCObject object) {
VDJCHit[] hits = object.getHits(type);
if (hits.length == 0)
return "";
StringBuilder sb = new StringBuilder();
for (int i = 0; ; i++) {
sb.append(hits[i].getGene().getName()).append("(").append(SCORE_FORMAT.format(hits[i].getScore())).append(")");
if (i == hits.length - 1)
break;
sb.append(",");
}
return sb.toString();
}
});
}
// All hits without score
for (final GeneType type : GeneType.values()) {
char l = type.getLetter();
descriptorsList.add(new PL_O("-" + Character.toLowerCase(l) + "Hits", "Export all " + l + " hits", "All " + l + " Hits", "all" + l + "Hits") {
@Override
protected String extract(VDJCObject object) {
VDJCHit[] hits = object.getHits(type);
if (hits.length == 0)
return "";
StringBuilder sb = new StringBuilder();
for (int i = 0; ; i++) {
sb.append(hits[i].getGene().getName());
if (i == hits.length - 1)
break;
sb.append(",");
}
return sb.toString();
}
});
}
// All gene names
for (final GeneType type : GeneType.values()) {
char l = type.getLetter();
descriptorsList.add(new StringExtractor("-" + Character.toLowerCase(l) + "Genes", "Export all " + l + " gene names (e.g. TRBV12-3 for TRBV12-3*00)", "All " + l + " genes", "all" + l + "Genes", type) {
@Override
String extractStringForHit(VDJCHit hit) {
return hit.getGene().getGeneName();
}
});
}
// All families
for (final GeneType type : GeneType.values()) {
char l = type.getLetter();
descriptorsList.add(new StringExtractor("-" + Character.toLowerCase(l) + "Families", "Export all " + l + " gene family anmes (e.g. TRBV12 for TRBV12-3*00)", "All " + l + " families", "all" + l + "Families", type) {
@Override
String extractStringForHit(VDJCHit hit) {
return hit.getGene().getFamilyName();
}
});
}
// Best alignment
for (final GeneType type : GeneType.values()) {
char l = type.getLetter();
descriptorsList.add(new PL_O("-" + Character.toLowerCase(l) + "Alignment", "Export best " + l + " alignment", "Best " + l + " alignment", "best" + l + "Alignment") {
@Override
protected String extract(VDJCObject object) {
VDJCHit bestHit = object.getBestHit(type);
if (bestHit == null)
return NULL;
StringBuilder sb = new StringBuilder();
for (int i = 0; ; i++) {
Alignment<NucleotideSequence> alignment = bestHit.getAlignment(i);
if (alignment == null)
sb.append(NULL);
else
sb.append(alignment.toCompactString());
if (i == object.numberOfTargets() - 1)
break;
sb.append(",");
}
return sb.toString();
}
});
}
// All alignments
for (final GeneType type : GeneType.values()) {
char l = type.getLetter();
descriptorsList.add(new PL_O("-" + Character.toLowerCase(l) + "Alignments", "Export all " + l + " alignments", "All " + l + " alignments", "all" + l + "Alignments") {
@Override
protected String extract(VDJCObject object) {
VDJCHit[] hits = object.getHits(type);
if (hits.length == 0)
return "";
StringBuilder sb = new StringBuilder();
for (int j = 0; ; ++j) {
for (int i = 0; ; i++) {
Alignment<NucleotideSequence> alignment = hits[j].getAlignment(i);
if (alignment == null)
sb.append(NULL);
else
sb.append(alignment.toCompactString());
if (i == object.numberOfTargets() - 1)
break;
sb.append(',');
}
if (j == hits.length - 1)
break;
sb.append(';');
}
return sb.toString();
}
});
}
descriptorsList.add(new FeatureExtractors.NSeqExtractor("-nFeature", "Export nucleotide sequence of specified gene feature", "N. Seq. ", "nSeq") {
@Override
public String convert(NSequenceWithQuality seq) {
return seq.getSequence().toString();
}
});
descriptorsList.add(new FeatureExtractors.NSeqExtractor("-qFeature", "Export quality string of specified gene feature", "Qual. ", "qual") {
@Override
public String convert(NSequenceWithQuality seq) {
return seq.getQuality().toString();
}
});
descriptorsList.add(new FeatureExtractors.WithHeader("-aaFeature", "Export amino acid sequence of specified gene feature", 1, new String[] { "AA. Seq. " }, new String[] { "aaSeq" }) {
@Override
protected String extractValue(VDJCObject object, GeneFeature[] parameters) {
GeneFeature geneFeature = parameters[parameters.length - 1];
NSequenceWithQuality feature = object.getFeature(geneFeature);
if (feature == null)
return NULL;
int targetId = object.getTargetContainingFeature(geneFeature);
TranslationParameters tr = targetId == -1 ? TranslationParameters.FromLeftWithIncompleteCodon : object.getPartitionedTarget(targetId).getPartitioning().getTranslationParameters(geneFeature);
if (tr == null)
return NULL;
return AminoAcidSequence.translate(feature.getSequence(), tr).toString();
}
});
// descriptorsList.add(new FeatureExtractorDescriptor("-aaFeatureFromLeft", "Export amino acid sequence of " +
// "specified gene feature starting from the leftmost nucleotide (differs from -aaFeature only for " +
// "sequences which length are not multiple of 3)", "AA. Seq.", "aaSeq") {
// @Override
// public String convert(NSequenceWithQuality seq) {
// return AminoAcidSequence.translate(seq.getSequence(), FromLeftWithoutIncompleteCodon).toString();
// }
// });
//
// descriptorsList.add(new FeatureExtractorDescriptor("-aaFeatureFromRight", "Export amino acid sequence of " +
// "specified gene feature starting from the rightmost nucleotide (differs from -aaFeature only for " +
// "sequences which length are not multiple of 3)", "AA. Seq.", "aaSeq") {
// @Override
// public String convert(NSequenceWithQuality seq) {
// return AminoAcidSequence.translate(seq.getSequence(), FromRightWithoutIncompleteCodon).toString();
// }
// });
descriptorsList.add(new FeatureExtractors.NSeqExtractor("-minFeatureQuality", "Export minimal quality of specified gene feature", "Min. qual. ", "minQual") {
@Override
public String convert(NSequenceWithQuality seq) {
return "" + seq.getQuality().minValue();
}
});
descriptorsList.add(new FeatureExtractors.NSeqExtractor("-avrgFeatureQuality", "Export average quality of specified gene feature", "Mean. qual. ", "meanQual") {
@Override
public String convert(NSequenceWithQuality seq) {
return "" + seq.getQuality().meanValue();
}
});
descriptorsList.add(new FeatureExtractors.NSeqExtractor("-lengthOf", "Exports length of specified gene feature.", "Length of ", "lengthOf") {
@Override
public String convert(NSequenceWithQuality seq) {
return "" + seq.size();
}
});
descriptorsList.add(new FeatureExtractors.MutationsExtractor("-nMutations", "Extract nucleotide mutations for specific gene feature; relative to germline sequence.", 1, new String[] { "N. Mutations in " }, new String[] { "nMutations" }) {
@Override
String convert(Mutations<NucleotideSequence> mutations, NucleotideSequence seq1, NucleotideSequence seq2, TranslationParameters tr) {
return mutations.encode(",");
}
});
descriptorsList.add(new FeatureExtractors.MutationsExtractor("-nMutationsRelative", "Extract nucleotide mutations for specific gene feature relative to another feature.", 2, new String[] { "N. Mutations in ", " relative to " }, new String[] { "nMutationsIn", "Relative" }) {
@Override
String convert(Mutations<NucleotideSequence> mutations, NucleotideSequence seq1, NucleotideSequence seq2, TranslationParameters tr) {
return mutations.encode(",");
}
});
final class AAMutations extends FeatureExtractors.MutationsExtractor {
AAMutations(String command, String description, int nArgs, String[] hPrefix, String[] sPrefix) {
super(command, description, nArgs, hPrefix, sPrefix);
}
@Override
String convert(Mutations<NucleotideSequence> mutations, NucleotideSequence seq1, NucleotideSequence seq2, TranslationParameters tr) {
if (tr == null)
return "-";
Mutations<AminoAcidSequence> aaMuts = MutationsUtil.nt2aa(seq1, mutations, tr);
if (aaMuts == null)
return "-";
return aaMuts.encode(",");
}
}
descriptorsList.add(new AAMutations("-aaMutations", "Extract amino acid mutations for specific gene feature", 1, new String[] { "AA. Mutations in " }, new String[] { "aaMutations" }));
descriptorsList.add(new AAMutations("-aaMutationsRelative", "Extract amino acid mutations for specific gene feature relative to another feature.", 2, new String[] { "AA. Mutations in ", " relative to " }, new String[] { "aaMutationsIn", "Relative" }));
final class MutationsDetailed extends FeatureExtractors.MutationsExtractor {
MutationsDetailed(String command, String description, int nArgs, String[] hPrefix, String[] sPrefix) {
super(command, description, nArgs, hPrefix, sPrefix);
}
@Override
String convert(Mutations<NucleotideSequence> mutations, NucleotideSequence seq1, NucleotideSequence seq2, TranslationParameters tr) {
if (tr == null)
return "-";
MutationsUtil.MutationNt2AADescriptor[] descriptors = MutationsUtil.nt2aaDetailed(seq1, mutations, tr, 10);
if (descriptors == null)
return "-";
StringBuilder sb = new StringBuilder();
for (int i = 0; i < descriptors.length; i++) {
sb.append(descriptors[i]);
if (i == descriptors.length - 1)
break;
sb.append(",");
}
return sb.toString();
}
}
String detailedMutationsFormat = "Format <nt_mutation>:<aa_mutation_individual>:<aa_mutation_cumulative>, where <aa_mutation_individual> is an expected amino acid " + "mutation given no other mutations have occurred, and <aa_mutation_cumulative> amino acid mutation is the observed amino acid " + "mutation combining effect from all other. WARNING: format may change in following versions.";
descriptorsList.add(new MutationsDetailed("-mutationsDetailed", "Detailed list of nucleotide and corresponding amino acid mutations. " + detailedMutationsFormat, 1, new String[] { "Detailed mutations in " }, new String[] { "mutationsDetailedIn" }));
descriptorsList.add(new MutationsDetailed("-mutationsDetailedRelative", "Detailed list of nucleotide and corresponding amino acid mutations written, positions relative to specified gene feature. " + detailedMutationsFormat, 2, new String[] { "Detailed mutations in ", " relative to " }, new String[] { "mutationsDetailedIn", "Relative" }));
descriptorsList.add(new ExtractReferencePointPosition());
descriptorsList.add(new ExtractDefaultReferencePointsPositions());
descriptorsList.add(new PL_A("-readId", "Export id of read corresponding to alignment", "Read id", "readId") {
@Override
protected String extract(VDJCAlignments object) {
return "" + object.getMinReadId();
}
@Override
public FieldExtractor<VDJCAlignments> create(OutputMode outputMode, String[] args) {
System.out.println("WARNING: -readId is deprecated. Use -readIds");
return super.create(outputMode, args);
}
});
descriptorsList.add(new PL_A("-readIds", "Export id of read corresponding to alignment", "Read id", "readId") {
@Override
protected String extract(VDJCAlignments object) {
long[] readIds = object.getReadIds();
StringBuilder sb = new StringBuilder();
for (int i = 0; ; i++) {
sb.append(readIds[i]);
if (i == readIds.length - 1)
return sb.toString();
sb.append(",");
}
}
});
descriptorsList.add(new ExtractSequence(VDJCAlignments.class, "-sequence", "Export aligned sequence (initial read), or 2 sequences in case of paired-end reads", "Read(s) sequence", "readSequence"));
descriptorsList.add(new ExtractSequenceQuality(VDJCAlignments.class, "-quality", "Export initial read quality, or 2 qualities in case of paired-end reads", "Read(s) sequence qualities", "readQuality"));
descriptorsList.add(new PL_C("-cloneId", "Unique clone identifier", "Clone ID", "cloneId") {
@Override
protected String extract(Clone object) {
return "" + object.getId();
}
});
descriptorsList.add(new PL_C("-count", "Export clone count", "Clone count", "cloneCount") {
@Override
protected String extract(Clone object) {
return "" + object.getCount();
}
});
descriptorsList.add(new PL_C("-fraction", "Export clone fraction", "Clone fraction", "cloneFraction") {
@Override
protected String extract(Clone object) {
return "" + object.getFraction();
}
});
descriptorsList.add(new ExtractSequence(Clone.class, "-sequence", "Export aligned sequence (initial read), or 2 sequences in case of paired-end reads", "Clonal sequence(s)", "clonalSequence"));
descriptorsList.add(new ExtractSequenceQuality(Clone.class, "-quality", "Export initial read quality, or 2 qualities in case of paired-end reads", "Clonal sequence quality(s)", "clonalSequenceQuality"));
descriptorsList.add(new PL_A("-descrR1", "Export description line from initial .fasta or .fastq file " + "of the first read (only available if --save-description was used in align command)", "Description R1", "descrR1") {
@Override
protected String extract(VDJCAlignments object) {
List<SequenceRead> reads = object.getOriginalReads();
if (reads == null)
throw new IllegalArgumentException("Error for option \'-descrR1\':\n" + "No description available for read: either re-run align action with -OsaveOriginalReads=true option " + "or don't use \'-descrR1\' in exportAlignments");
return reads.get(0).getRead(0).getDescription();
}
@Override
public FieldExtractor<VDJCAlignments> create(OutputMode outputMode, String[] args) {
System.out.println("WARNING: -descrR1 is deprecated. Use -descrsR1");
return super.create(outputMode, args);
}
});
descriptorsList.add(new PL_A("-descrR2", "Export description line from initial .fasta or .fastq file " + "of the second read (only available if --save-description was used in align command)", "Description R2", "descrR2") {
@Override
protected String extract(VDJCAlignments object) {
List<SequenceRead> reads = object.getOriginalReads();
if (reads == null)
throw new IllegalArgumentException("Error for option \'-descrR1\':\n" + "No description available for read: either re-run align action with -OsaveOriginalReads=true option " + "or don't use \'-descrR1\' in exportAlignments");
SequenceRead read = reads.get(0);
if (read.numberOfReads() < 2)
throw new IllegalArgumentException("Error for option \'-descrR2\':\n" + "No description available for second read: your input data was single-end");
return read.getRead(1).getDescription();
}
@Override
public FieldExtractor<VDJCAlignments> create(OutputMode outputMode, String[] args) {
System.out.println("WARNING: -descrR2 is deprecated. Use -descrsR2");
return super.create(outputMode, args);
}
});
descriptorsList.add(new PL_A("-descrsR1", "Export description lines from initial .fasta or .fastq file " + "of the first reads (only available if -OsaveOriginalReads=true was used in align command)", "Descriptions R1", "descrsR1") {
@Override
protected String extract(VDJCAlignments object) {
List<SequenceRead> reads = object.getOriginalReads();
if (reads == null)
throw new IllegalArgumentException("Error for option \'-descrR1\':\n" + "No description available for read: either re-run align action with -OsaveOriginalReads option " + "or don't use \'-descrR1\' in exportAlignments");
StringBuilder sb = new StringBuilder();
for (int i = 0; ; i++) {
sb.append(reads.get(i).getRead(0).getDescription());
if (i == reads.size() - 1)
return sb.toString();
sb.append(",");
}
}
});
descriptorsList.add(new PL_A("-descrsR2", "Export description lines from initial .fasta or .fastq file " + "of the second reads (only available if -OsaveOriginalReads=true was used in align command)", "Descriptions R2", "descrsR2") {
@Override
protected String extract(VDJCAlignments object) {
List<SequenceRead> reads = object.getOriginalReads();
if (reads == null)
throw new IllegalArgumentException("Error for option \'-descrR1\':\n" + "No description available for read: either re-run align action with -OsaveOriginalReads option " + "or don't use \'-descrR1\' in exportAlignments");
StringBuilder sb = new StringBuilder();
for (int i = 0; ; i++) {
SequenceRead read = reads.get(i);
if (read.numberOfReads() < 2)
throw new IllegalArgumentException("Error for option \'-descrsR2\':\n" + "No description available for second read: your input data was single-end");
sb.append(read.getRead(1).getDescription());
if (i == reads.size() - 1)
return sb.toString();
sb.append(",");
}
}
});
descriptorsList.add(new PL_A("-readHistory", "Export read history", "Read history", "readHistory") {
@Override
protected String extract(VDJCAlignments object) {
try {
return GlobalObjectMappers.toOneLine(object.getHistory());
} catch (JsonProcessingException ex) {
throw new RuntimeException(ex);
}
}
});
for (final GeneType type : GeneType.values()) {
String c = Character.toLowerCase(type.getLetter()) + "IdentityPercents";
descriptorsList.add(new PL_O("-" + c, type.getLetter() + " alignment identity percents", type.getLetter() + " alignment identity percents", c) {
@Override
protected String extract(VDJCObject object) {
VDJCHit[] hits = object.getHits(type);
if (hits == null)
return NULL;
StringBuilder sb = new StringBuilder();
sb.append("");
for (int i = 0; ; i++) {
sb.append(hits[i].getIdentity());
if (i == hits.length - 1)
return sb.toString();
sb.append(",");
}
}
});
}
for (final GeneType type : GeneType.values()) {
String c = Character.toLowerCase(type.getLetter()) + "BestIdentityPercent";
descriptorsList.add(new PL_O("-" + c, type.getLetter() + "best alignment identity percent", type.getLetter() + "best alignment identity percent", c) {
@Override
protected String extract(VDJCObject object) {
VDJCHit hit = object.getBestHit(type);
if (hit == null)
return NULL;
return Float.toString(hit.getIdentity());
}
});
}
descriptorsList.add(new PL_O("-chains", "Chains", "Chains", "Chains") {
@Override
protected String extract(VDJCObject object) {
return object.commonChains().toString();
}
});
descriptorsList.add(new PL_O("-topChains", "Top chains", "Top chains", "topChains") {
@Override
protected String extract(VDJCObject object) {
return object.commonTopChains().toString();
}
});
descriptors = descriptorsList.toArray(new Field[descriptorsList.size()]);
}
return descriptors;
}
use of com.milaboratory.core.sequence.NSequenceWithQuality in project mixcr by milaboratory.
the class ClonalSequenceTest method createRandom.
private ClonalSequence createRandom(int size, RandomGenerator generator) {
NSequenceWithQuality[] data = new NSequenceWithQuality[size];
for (int i = 0; i < size; ++i) {
NucleotideSequence s = TestUtil.randomSequence(NucleotideSequence.ALPHABET, generator, 2, 10);
SequenceQuality q = SequenceQuality.getUniformQuality((byte) 0, s.size());
data[i] = new NSequenceWithQuality(s, q);
}
return new ClonalSequence(data);
}
use of com.milaboratory.core.sequence.NSequenceWithQuality in project mixcr by milaboratory.
the class FullSeqAssemblerTest method testRandom1.
@Test
public void testRandom1() throws Exception {
CloneFraction[] clones = { new CloneFraction(750, masterSeq1WT), // V: S346:G->T
new CloneFraction(1000, masterSeq1VSub1), // J: D55:A
new CloneFraction(1000, masterSeq1VDel1JDel1), // J: D62:C
new CloneFraction(500, masterSeq1VDel1JDelVSub2) };
Well19937c rand = new Well19937c();
rand.setSeed(12345);
RandomDataGenerator rdg = new RandomDataGenerator(rand);
List<SequenceRead> readsOrig = new ArrayList<>();
int readLength = 100;
int id = -1;
for (CloneFraction clone : clones) {
for (int i = 0; i < clone.count; i++) {
// Left read with CDR3
++id;
readsOrig.add(new PairedRead(new SingleReadImpl(id, new NSequenceWithQuality(clone.seq.getRangeFromCDR3Begin(-rand.nextInt(readLength - clone.seq.cdr3Part), readLength)), "R1_" + id), new SingleReadImpl(id, new NSequenceWithQuality(clone.seq.getRangeFromCDR3End(rdg.nextInt(-clone.seq.cdr3Part / 2, clone.seq.jPart), readLength).getReverseComplement()), "R2_" + id)));
++id;
readsOrig.add(new PairedRead(new SingleReadImpl(id, new NSequenceWithQuality(clone.seq.getRangeFromCDR3Begin(rdg.nextInt(-clone.seq.vPart, clone.seq.cdr3Part / 2 - readLength), readLength)), "R1_" + id), new SingleReadImpl(id, new NSequenceWithQuality(clone.seq.getRangeFromCDR3Begin(-rand.nextInt(readLength - clone.seq.cdr3Part), readLength)).getReverseComplement(), "R2_" + id)));
}
}
// readsOrig = Arrays.asList(setReadId(0, readsOrig.get(12)), setReadId(1, readsOrig.get(13)));
int[] perm = rdg.nextPermutation(readsOrig.size(), readsOrig.size());
List<SequenceRead> reads = new ArrayList<>();
for (int i = 0; i < readsOrig.size(); i++) reads.add(readsOrig.get(perm[i]));
RunMiXCR.RunMiXCRAnalysis params = new RunMiXCR.RunMiXCRAnalysis(new SequenceReaderCloseable<SequenceRead>() {
int counter = 0;
@Override
public void close() {
}
@Override
public long getNumberOfReads() {
return counter;
}
@Override
public synchronized SequenceRead take() {
if (counter == reads.size())
return null;
return reads.get(counter++);
}
}, true);
params.alignerParameters = VDJCParametersPresets.getByName("rna-seq");
params.alignerParameters.setSaveOriginalReads(true);
params.alignerParameters.setVAlignmentParameters(params.alignerParameters.getVAlignerParameters().setGeneFeatureToAlign(GeneFeature.VTranscriptWithP));
RunMiXCR.AlignResult align = RunMiXCR.align(params);
// // TODO exception for translation
// for (VDJCAlignments al : align.alignments) {
// for (int i = 0; i < al.numberOfTargets(); i++) {
// System.out.println(VDJCAlignmentsFormatter.getTargetAsMultiAlignment(al, i));
// System.out.println();
// }
// System.out.println();
// System.out.println(" ================================================ ");
// System.out.println();
// }
RunMiXCR.AssembleResult assemble = RunMiXCR.assemble(align);
Assert.assertEquals(1, assemble.cloneSet.size());
CloneFactory cloneFactory = new CloneFactory(align.parameters.cloneAssemblerParameters.getCloneFactoryParameters(), align.parameters.cloneAssemblerParameters.getAssemblingFeatures(), align.usedGenes, align.parameters.alignerParameters.getFeaturesToAlignMap());
FullSeqAssembler agg = new FullSeqAssembler(cloneFactory, DEFAULT_PARAMETERS, assemble.cloneSet.get(0), align.parameters.alignerParameters);
FullSeqAssembler.RawVariantsData prep = agg.calculateRawData(() -> CUtils.asOutputPort(align.alignments.stream().filter(a -> a.getFeature(GeneFeature.CDR3) != null).collect(Collectors.toList())));
List<Clone> clns = new ArrayList<>(new CloneSet(Arrays.asList(agg.callVariants(prep))).getClones());
clns.sort(Comparator.comparingDouble(Clone::getCount).reversed());
System.out.println("# Clones: " + clns.size());
id = 0;
for (Clone clone : clns) {
clone = clone.setId(id++);
System.out.println(clone.numberOfTargets());
System.out.println(clone.getCount());
System.out.println(clone.getFraction());
System.out.println(clone.getBestHit(GeneType.Variable).getAlignment(0).getAbsoluteMutations());
System.out.println(clone.getBestHit(GeneType.Joining).getAlignment(0).getAbsoluteMutations());
System.out.println();
// ActionExportClonesPretty.outputCompact(System.out, clone);
}
}
Aggregations