use of io.repseq.dto.VDJCGeneData in project repseqio by repseqio.
the class InferAnchorPointsAction method go.
public void go(GeneFeature geneFeature, String inputFile, String outputFile) throws Exception {
VDJCLibraryRegistry.resetDefaultRegistry();
VDJCLibraryRegistry reg = VDJCLibraryRegistry.getDefault();
// Map of library names
Map<String, String> libraryNameToAddress = new HashMap<>();
// Registering reference library
int i = 0;
for (String refAddress : params.getReference()) {
String name = REFERENCE_LIBRARY_PREFIX + (i++);
reg.registerLibraries(refAddress, name);
libraryNameToAddress.put(name, refAddress);
}
if (params.getReference().isEmpty()) {
reg.loadAllLibraries("default");
for (VDJCLibrary library : reg.getLoadedLibraries()) libraryNameToAddress.put(library.getName(), "built-in");
}
// Registering target library
reg.registerLibraries(inputFile, TARGET_LIBRARY_NAME);
// Compile gene filter
Pattern namePattern = params.name == null ? null : Pattern.compile(params.name);
List<VDJCLibrary> refLibraries = reg.getLoadedLibrariesByNamePattern(REFERENCE_LIBRARY_PATTERN);
SimpleBatchAlignerParameters<AminoAcidSequence> aParams = new SimpleBatchAlignerParameters<>(5, 0.4f, params.getAbsoluteMinScore(geneFeature), true, AffineGapAlignmentScoring.getAminoAcidBLASTScoring(BLASTMatrix.BLOSUM62, -10, -1));
SimpleBatchAligner<AminoAcidSequence, Ref> aligner = new SimpleBatchAligner<>(aParams);
int dbSize = 0;
for (VDJCLibrary lib : refLibraries) {
for (VDJCGene gene : lib.getGenes()) {
NucleotideSequence nSeq = gene.getFeature(geneFeature);
if (nSeq == null)
continue;
ReferencePoint frameReference = GeneFeature.getFrameReference(geneFeature);
ReferencePoints partitioning = gene.getPartitioning();
if (frameReference == null)
continue;
int relativePosition = partitioning.getRelativePosition(geneFeature, frameReference);
if (relativePosition < 0)
continue;
TranslationParameters frame = withIncompleteCodon(relativePosition);
AminoAcidSequence aaSequence = AminoAcidSequence.translate(nSeq, frame);
aligner.addReference(aaSequence, new Ref(gene, frame, nSeq.size()));
++dbSize;
}
}
System.out.println("DB size: " + dbSize);
System.out.println();
// Checking that db is not empty
if (dbSize == 0)
throw new RuntimeException("No reference genes.");
ArrayList<VDJCLibraryData> result = new ArrayList<>();
ByteArrayOutputStream bos = new ByteArrayOutputStream();
PrintStream bufferPS = new PrintStream(bos);
// Iteration over target genes
for (VDJCLibrary lib : reg.getLoadedLibrariesByName(TARGET_LIBRARY_NAME)) {
ArrayList<VDJCGeneData> genes = new ArrayList<>();
for (VDJCGene targetGene : lib.getGenes()) {
bos.reset();
PrintStream ps = params.outputOnlyModified() ? bufferPS : System.out;
if (namePattern != null && !namePattern.matcher(targetGene.getName()).matches()) {
if (!params.outputOnlyModified())
genes.add(targetGene.getData());
continue;
}
ps.println("Processing: " + targetGene.getName() + " (" + (targetGene.isFunctional() ? "F" : "P") + ") " + targetGene.getChains());
// Getting gene feature sequence from target gene
NucleotideSequence nSeq = targetGene.getFeature(geneFeature);
if (nSeq == null) {
ps.println("Failed to extract " + GeneFeature.encode(geneFeature));
ps.println("================");
ps.println();
if (!params.outputOnlyModified())
genes.add(targetGene.getData());
continue;
}
// Alignment result
AlignmentResult<AlignmentHit<AminoAcidSequence, Ref>> bestAResult = null;
TranslationParameters bestFrame = null;
// Searching for best alignment
for (TranslationParameters frame : TRANSLATION_PARAMETERS) {
AminoAcidSequence aaSeq = AminoAcidSequence.translate(nSeq, frame);
AlignmentResult<AlignmentHit<AminoAcidSequence, Ref>> r = aligner.align(aaSeq);
if (r != null && r.hasHits() && (bestAResult == null || bestAResult.getBestHit().getAlignment().getScore() < r.getBestHit().getAlignment().getScore())) {
bestAResult = r;
bestFrame = frame;
}
}
if (bestFrame == null) {
ps.println("No alignments found.");
if (!params.outputOnlyModified())
genes.add(targetGene.getData());
continue;
}
List<AlignmentHit<AminoAcidSequence, Ref>> hits = bestAResult.getHits();
VDJCGeneData targetGeneData = targetGene.getData().clone();
boolean anyPointChanged = false;
for (int ai = 0; ai < hits.size(); ai++) {
// Accumulate output
ByteArrayOutputStream localBos = new ByteArrayOutputStream();
PrintStream localPS = new PrintStream(localBos);
Alignment<AminoAcidSequence> bestAlignment = hits.get(ai).getAlignment();
Ref bestRef = hits.get(ai).getRecordPayload();
VDJCGene bestReferenceGene = bestRef.gene;
localPS.println("Aligned with " + bestReferenceGene.getName() + " from " + libraryNameToAddress.get(bestReferenceGene.getParentLibrary().getName()) + " ; Score = " + bestAlignment.getScore());
AlignmentHelper alignmentHelper = bestAlignment.getAlignmentHelper();
for (AlignmentHelper h : alignmentHelper.split(150)) localPS.println(h + "\n");
ReferencePoints targetPartitioning = targetGene.getPartitioning();
ReferencePoints referencePartitioning = bestReferenceGene.getPartitioning();
for (GeneFeature.ReferenceRange range : geneFeature) for (ReferencePoint point : range.getIntermediatePoints()) {
localPS.print(point + ": ");
boolean isAvailable = targetPartitioning.isAvailable(point);
if (isAvailable) {
localPS.println("already set");
}
if (!referencePartitioning.isAvailable(point)) {
if (!isAvailable)
localPS.println("not set in reference gene");
continue;
}
int ntPositionInReference = referencePartitioning.getRelativePosition(geneFeature, point);
// Projecting position
AminoAcidSequence.AminoAcidSequencePosition aaPositionInReferece = AminoAcidSequence.convertNtPositionToAA(ntPositionInReference, bestRef.ntSeqLength, bestRef.frame);
if (aaPositionInReferece == null) {
if (!isAvailable)
localPS.println("failed to convert to aa position in ref");
continue;
}
int aaPositionInTarget = Alignment.aabs(bestAlignment.convertToSeq2Position(aaPositionInReferece.aminoAcidPosition));
if (aaPositionInTarget == -1) {
if (!isAvailable)
localPS.println("failed to project using alignment");
continue;
}
int ntPositionInTarget = AminoAcidSequence.convertAAPositionToNt(aaPositionInTarget, nSeq.size(), bestFrame);
if (ntPositionInTarget == -1) {
if (!isAvailable)
localPS.println("failed");
continue;
}
ntPositionInTarget += aaPositionInReferece.positionInTriplet;
ntPositionInTarget = targetPartitioning.getAbsolutePosition(geneFeature, ntPositionInTarget);
if (ntPositionInTarget == -1) {
if (!isAvailable)
localPS.println("failed");
continue;
}
if (isAvailable) {
int existingPosition = targetPartitioning.getPosition(point);
if (existingPosition != ntPositionInTarget) {
localPS.println("inferred position differs from existing value. existing: " + existingPosition + ", inferred: " + ntPositionInTarget);
}
continue;
}
localPS.println(ntPositionInTarget);
targetGeneData.getAnchorPoints().put(point, (long) ntPositionInTarget);
anyPointChanged = true;
}
if (!anyPointChanged) {
if (!params.outputOnlyModified() && ai == 0)
ps.write(localBos.toByteArray());
} else {
ps.write(localBos.toByteArray());
break;
}
}
ps.println("================");
ps.println();
if (anyPointChanged && params.outputOnlyModified())
System.out.write(bos.toByteArray());
genes.add(targetGeneData);
}
result.add(new VDJCLibraryData(lib.getData(), genes));
}
VDJCDataUtils.writeToFile(result, outputFile, false);
}
use of io.repseq.dto.VDJCGeneData in project repseqio by repseqio.
the class ListAction method go.
@Override
public void go(ActionHelper helper) throws Exception {
VDJCLibraryData[] libs = VDJCDataUtils.readArrayFromFile(params.getInput());
System.out.println("TaxonId\tGeneName\tFunctional");
for (VDJCLibraryData lib : libs) for (VDJCGeneData geneData : lib.getGenes()) {
System.out.println(lib.getTaxonId() + "\t" + geneData.getName() + "\t" + (geneData.isFunctional() ? "F" : "P"));
}
}
use of io.repseq.dto.VDJCGeneData in project repseqio by repseqio.
the class FilterAction method go.
@Override
public void go(ActionHelper helper) throws Exception {
VDJCLibraryData[] libs = VDJCDataUtils.readArrayFromFile(Paths.get(params.getInput()));
List<VDJCLibraryData> filtered = new ArrayList<>();
Pattern chainPattern = params.chain == null ? null : Pattern.compile(params.chain);
boolean filterRecords = (params.chain != null);
int filteredLibraries = 0;
int filteredGenes = 0;
for (VDJCLibraryData lib : libs) {
if (params.taxonId != null && params.taxonId != lib.getTaxonId())
continue;
if (params.species != null && !lib.getSpeciesNames().contains(params.species))
continue;
++filteredLibraries;
if (!filterRecords) {
filteredGenes += lib.getGenes().size();
filtered.add(lib);
} else {
List<VDJCGeneData> genes = new ArrayList<>();
for (VDJCGeneData gene : lib.getGenes()) {
if (chainPattern != null) {
boolean y = false;
for (String s : gene.getChains()) if (y |= chainPattern.matcher(s).matches())
break;
if (!y)
continue;
}
++filteredGenes;
genes.add(gene);
}
filtered.add(new VDJCLibraryData(lib, genes));
}
}
System.out.println("Filtered libraries: " + filteredLibraries);
System.out.println("Filtered genes: " + filteredGenes);
VDJCDataUtils.writeToFile(filtered, params.getOutput(), false);
}
use of io.repseq.dto.VDJCGeneData in project repseqio by repseqio.
the class VDJCLibraryRegistry method registerLibrary.
/**
* Creates and registers single library from VDJCLibraryData
*
* @param context context to use for resolution of sequences
* @param name library name
* @param data library data
* @return created library
*/
public synchronized VDJCLibrary registerLibrary(Path context, String name, VDJCLibraryData data) {
// Creating library object
VDJCLibrary library = new VDJCLibrary(data, name, this, context);
// Getting library id
VDJCLibraryId rootId = library.getLibraryIdWithoutChecksum();
// Check if such library is already registered
if (libraries.containsKey(rootId))
throw new RuntimeException("Duplicate library: " + rootId);
// Loading known sequence fragments from VDJCLibraryData to current SequenceResolver
SequenceResolver resolver = getSequenceResolver();
for (KnownSequenceFragmentData fragment : data.getSequenceFragments()) resolver.resolve(new SequenceAddress(context, fragment.getUri())).setRegion(fragment.getRange(), fragment.getSequence());
// Adding genes
for (VDJCGeneData gene : data.getGenes()) VDJCLibrary.addGene(library, gene);
// Adding common species names
Long taxonId = data.getTaxonId();
for (String speciesName : data.getSpeciesNames()) {
String cSpeciesName = canonicalizeSpeciesName(speciesName);
if (speciesNames.containsKey(cSpeciesName) && !speciesNames.get(cSpeciesName).equals(taxonId))
throw new IllegalArgumentException("Mismatch in common species name between several libraries. " + "(Library name = " + name + "; name = " + speciesName + ").");
speciesNames.put(cSpeciesName, taxonId);
List<String> names = speciesNamesReverse.get(taxonId);
if (names == null)
speciesNamesReverse.put(taxonId, names = new ArrayList<>());
names.add(speciesName);
}
// Adding this library to collection
libraries.put(library.getLibraryIdWithoutChecksum(), library);
return library;
}
Aggregations