use of io.repseq.dto.VDJCLibraryData in project repseqio by repseqio.
the class CompileLibraryMavenStage method process.
public static void process(Path libraryRepoFolder, Path cacheFolder, Path buildFolder, Path outputFolder, String tag, boolean isDefault) throws IOException, InterruptedException {
SequenceResolvers.initDefaultResolver(cacheFolder);
Files.createDirectories(buildFolder);
List<Path> compiledPaths = compileDir(libraryRepoFolder, buildFolder);
List<VDJCLibraryData> libs = new ArrayList<>();
for (Path path : compiledPaths) libs.addAll(asList(ONE_LINE.readValue(path.toFile(), VDJCLibraryData[].class)));
VDJCLibraryData[] mergeResult = VDJCDataUtils.merge(libs);
log.info("Merged successfully.");
Files.createDirectories(outputFolder);
String fullLibraryName = "repseqio." + tag;
Path resultPath = outputFolder.resolve(fullLibraryName + ".json");
log.info("Writing {}", resultPath);
VDJCDataUtils.writeToFile(mergeResult, resultPath, true);
if (isDefault) {
Path aliasPath = outputFolder.resolve("default.alias");
log.info("Writing {}", aliasPath);
try (FileOutputStream fos = new FileOutputStream(aliasPath.toFile())) {
fos.write(fullLibraryName.getBytes(StandardCharsets.UTF_8));
}
}
}
use of io.repseq.dto.VDJCLibraryData in project repseqio by repseqio.
the class InferAnchorPointsAction method go.
public void go(GeneFeature geneFeature, String inputFile, String outputFile) throws Exception {
VDJCLibraryRegistry.resetDefaultRegistry();
VDJCLibraryRegistry reg = VDJCLibraryRegistry.getDefault();
// Map of library names
Map<String, String> libraryNameToAddress = new HashMap<>();
// Registering reference library
int i = 0;
for (String refAddress : params.getReference()) {
String name = REFERENCE_LIBRARY_PREFIX + (i++);
reg.registerLibraries(refAddress, name);
libraryNameToAddress.put(name, refAddress);
}
if (params.getReference().isEmpty()) {
reg.loadAllLibraries("default");
for (VDJCLibrary library : reg.getLoadedLibraries()) libraryNameToAddress.put(library.getName(), "built-in");
}
// Registering target library
reg.registerLibraries(inputFile, TARGET_LIBRARY_NAME);
// Compile gene filter
Pattern namePattern = params.name == null ? null : Pattern.compile(params.name);
List<VDJCLibrary> refLibraries = reg.getLoadedLibrariesByNamePattern(REFERENCE_LIBRARY_PATTERN);
SimpleBatchAlignerParameters<AminoAcidSequence> aParams = new SimpleBatchAlignerParameters<>(5, 0.4f, params.getAbsoluteMinScore(geneFeature), true, AffineGapAlignmentScoring.getAminoAcidBLASTScoring(BLASTMatrix.BLOSUM62, -10, -1));
SimpleBatchAligner<AminoAcidSequence, Ref> aligner = new SimpleBatchAligner<>(aParams);
int dbSize = 0;
for (VDJCLibrary lib : refLibraries) {
for (VDJCGene gene : lib.getGenes()) {
NucleotideSequence nSeq = gene.getFeature(geneFeature);
if (nSeq == null)
continue;
ReferencePoint frameReference = GeneFeature.getFrameReference(geneFeature);
ReferencePoints partitioning = gene.getPartitioning();
if (frameReference == null)
continue;
int relativePosition = partitioning.getRelativePosition(geneFeature, frameReference);
if (relativePosition < 0)
continue;
TranslationParameters frame = withIncompleteCodon(relativePosition);
AminoAcidSequence aaSequence = AminoAcidSequence.translate(nSeq, frame);
aligner.addReference(aaSequence, new Ref(gene, frame, nSeq.size()));
++dbSize;
}
}
System.out.println("DB size: " + dbSize);
System.out.println();
// Checking that db is not empty
if (dbSize == 0)
throw new RuntimeException("No reference genes.");
ArrayList<VDJCLibraryData> result = new ArrayList<>();
ByteArrayOutputStream bos = new ByteArrayOutputStream();
PrintStream bufferPS = new PrintStream(bos);
// Iteration over target genes
for (VDJCLibrary lib : reg.getLoadedLibrariesByName(TARGET_LIBRARY_NAME)) {
ArrayList<VDJCGeneData> genes = new ArrayList<>();
for (VDJCGene targetGene : lib.getGenes()) {
bos.reset();
PrintStream ps = params.outputOnlyModified() ? bufferPS : System.out;
if (namePattern != null && !namePattern.matcher(targetGene.getName()).matches()) {
if (!params.outputOnlyModified())
genes.add(targetGene.getData());
continue;
}
ps.println("Processing: " + targetGene.getName() + " (" + (targetGene.isFunctional() ? "F" : "P") + ") " + targetGene.getChains());
// Getting gene feature sequence from target gene
NucleotideSequence nSeq = targetGene.getFeature(geneFeature);
if (nSeq == null) {
ps.println("Failed to extract " + GeneFeature.encode(geneFeature));
ps.println("================");
ps.println();
if (!params.outputOnlyModified())
genes.add(targetGene.getData());
continue;
}
// Alignment result
AlignmentResult<AlignmentHit<AminoAcidSequence, Ref>> bestAResult = null;
TranslationParameters bestFrame = null;
// Searching for best alignment
for (TranslationParameters frame : TRANSLATION_PARAMETERS) {
AminoAcidSequence aaSeq = AminoAcidSequence.translate(nSeq, frame);
AlignmentResult<AlignmentHit<AminoAcidSequence, Ref>> r = aligner.align(aaSeq);
if (r != null && r.hasHits() && (bestAResult == null || bestAResult.getBestHit().getAlignment().getScore() < r.getBestHit().getAlignment().getScore())) {
bestAResult = r;
bestFrame = frame;
}
}
if (bestFrame == null) {
ps.println("No alignments found.");
if (!params.outputOnlyModified())
genes.add(targetGene.getData());
continue;
}
List<AlignmentHit<AminoAcidSequence, Ref>> hits = bestAResult.getHits();
VDJCGeneData targetGeneData = targetGene.getData().clone();
boolean anyPointChanged = false;
for (int ai = 0; ai < hits.size(); ai++) {
// Accumulate output
ByteArrayOutputStream localBos = new ByteArrayOutputStream();
PrintStream localPS = new PrintStream(localBos);
Alignment<AminoAcidSequence> bestAlignment = hits.get(ai).getAlignment();
Ref bestRef = hits.get(ai).getRecordPayload();
VDJCGene bestReferenceGene = bestRef.gene;
localPS.println("Aligned with " + bestReferenceGene.getName() + " from " + libraryNameToAddress.get(bestReferenceGene.getParentLibrary().getName()) + " ; Score = " + bestAlignment.getScore());
AlignmentHelper alignmentHelper = bestAlignment.getAlignmentHelper();
for (AlignmentHelper h : alignmentHelper.split(150)) localPS.println(h + "\n");
ReferencePoints targetPartitioning = targetGene.getPartitioning();
ReferencePoints referencePartitioning = bestReferenceGene.getPartitioning();
for (GeneFeature.ReferenceRange range : geneFeature) for (ReferencePoint point : range.getIntermediatePoints()) {
localPS.print(point + ": ");
boolean isAvailable = targetPartitioning.isAvailable(point);
if (isAvailable) {
localPS.println("already set");
}
if (!referencePartitioning.isAvailable(point)) {
if (!isAvailable)
localPS.println("not set in reference gene");
continue;
}
int ntPositionInReference = referencePartitioning.getRelativePosition(geneFeature, point);
// Projecting position
AminoAcidSequence.AminoAcidSequencePosition aaPositionInReferece = AminoAcidSequence.convertNtPositionToAA(ntPositionInReference, bestRef.ntSeqLength, bestRef.frame);
if (aaPositionInReferece == null) {
if (!isAvailable)
localPS.println("failed to convert to aa position in ref");
continue;
}
int aaPositionInTarget = Alignment.aabs(bestAlignment.convertToSeq2Position(aaPositionInReferece.aminoAcidPosition));
if (aaPositionInTarget == -1) {
if (!isAvailable)
localPS.println("failed to project using alignment");
continue;
}
int ntPositionInTarget = AminoAcidSequence.convertAAPositionToNt(aaPositionInTarget, nSeq.size(), bestFrame);
if (ntPositionInTarget == -1) {
if (!isAvailable)
localPS.println("failed");
continue;
}
ntPositionInTarget += aaPositionInReferece.positionInTriplet;
ntPositionInTarget = targetPartitioning.getAbsolutePosition(geneFeature, ntPositionInTarget);
if (ntPositionInTarget == -1) {
if (!isAvailable)
localPS.println("failed");
continue;
}
if (isAvailable) {
int existingPosition = targetPartitioning.getPosition(point);
if (existingPosition != ntPositionInTarget) {
localPS.println("inferred position differs from existing value. existing: " + existingPosition + ", inferred: " + ntPositionInTarget);
}
continue;
}
localPS.println(ntPositionInTarget);
targetGeneData.getAnchorPoints().put(point, (long) ntPositionInTarget);
anyPointChanged = true;
}
if (!anyPointChanged) {
if (!params.outputOnlyModified() && ai == 0)
ps.write(localBos.toByteArray());
} else {
ps.write(localBos.toByteArray());
break;
}
}
ps.println("================");
ps.println();
if (anyPointChanged && params.outputOnlyModified())
System.out.write(bos.toByteArray());
genes.add(targetGeneData);
}
result.add(new VDJCLibraryData(lib.getData(), genes));
}
VDJCDataUtils.writeToFile(result, outputFile, false);
}
use of io.repseq.dto.VDJCLibraryData in project repseqio by repseqio.
the class ListAction method go.
@Override
public void go(ActionHelper helper) throws Exception {
VDJCLibraryData[] libs = VDJCDataUtils.readArrayFromFile(params.getInput());
System.out.println("TaxonId\tGeneName\tFunctional");
for (VDJCLibraryData lib : libs) for (VDJCGeneData geneData : lib.getGenes()) {
System.out.println(lib.getTaxonId() + "\t" + geneData.getName() + "\t" + (geneData.isFunctional() ? "F" : "P"));
}
}
use of io.repseq.dto.VDJCLibraryData in project repseqio by repseqio.
the class FilterAction method go.
@Override
public void go(ActionHelper helper) throws Exception {
VDJCLibraryData[] libs = VDJCDataUtils.readArrayFromFile(Paths.get(params.getInput()));
List<VDJCLibraryData> filtered = new ArrayList<>();
Pattern chainPattern = params.chain == null ? null : Pattern.compile(params.chain);
boolean filterRecords = (params.chain != null);
int filteredLibraries = 0;
int filteredGenes = 0;
for (VDJCLibraryData lib : libs) {
if (params.taxonId != null && params.taxonId != lib.getTaxonId())
continue;
if (params.species != null && !lib.getSpeciesNames().contains(params.species))
continue;
++filteredLibraries;
if (!filterRecords) {
filteredGenes += lib.getGenes().size();
filtered.add(lib);
} else {
List<VDJCGeneData> genes = new ArrayList<>();
for (VDJCGeneData gene : lib.getGenes()) {
if (chainPattern != null) {
boolean y = false;
for (String s : gene.getChains()) if (y |= chainPattern.matcher(s).matches())
break;
if (!y)
continue;
}
++filteredGenes;
genes.add(gene);
}
filtered.add(new VDJCLibraryData(lib, genes));
}
}
System.out.println("Filtered libraries: " + filteredLibraries);
System.out.println("Filtered genes: " + filteredGenes);
VDJCDataUtils.writeToFile(filtered, params.getOutput(), false);
}
use of io.repseq.dto.VDJCLibraryData in project repseqio by repseqio.
the class VDJCLibraryRegistry method tryResolve.
private void tryResolve(LibraryResolver resolver, String libraryName) {
// Check if this combination of resolver and libraryName was already being processed
LibraryLoadRequest request = new LibraryLoadRequest(resolver, libraryName);
if (loadedLibraries.contains(request))
return;
// Try resolve
VDJCLibraryData[] resolved = resolver.resolve(libraryName);
// Marking this request as already processed
loadedLibraries.add(request);
// If not resolved
if (resolved == null) {
if (resolver instanceof AliasResolver) {
String newLibraryName = ((AliasResolver) resolver).resolveAlias(libraryName);
if (newLibraryName == null)
// proceed to next resolver
return;
tryResolve(resolver, newLibraryName);
if (aliases.containsKey(libraryName) && !aliases.get(libraryName).equals(newLibraryName))
throw new RuntimeException("Conflicting aliases " + libraryName + " -> " + newLibraryName + " / " + aliases.get(libraryName));
aliases.put(libraryName, newLibraryName);
}
// proceed to next resolver
return;
}
// Registering loaded library entries
for (VDJCLibraryData vdjcLibraryData : resolved) {
VDJCLibraryId sal = new VDJCLibraryId(libraryName, vdjcLibraryData.getTaxonId());
// (or using previous resolution call with the same library name)
if (// If so - ignore it
libraries.containsKey(sal))
continue;
// Registering library
registerLibrary(resolver.getContext(libraryName), libraryName, vdjcLibraryData);
}
}
Aggregations