Search in sources :

Example 1 with VDJCLibraryData

use of io.repseq.dto.VDJCLibraryData in project repseqio by repseqio.

the class CompileLibraryMavenStage method process.

public static void process(Path libraryRepoFolder, Path cacheFolder, Path buildFolder, Path outputFolder, String tag, boolean isDefault) throws IOException, InterruptedException {
    SequenceResolvers.initDefaultResolver(cacheFolder);
    Files.createDirectories(buildFolder);
    List<Path> compiledPaths = compileDir(libraryRepoFolder, buildFolder);
    List<VDJCLibraryData> libs = new ArrayList<>();
    for (Path path : compiledPaths) libs.addAll(asList(ONE_LINE.readValue(path.toFile(), VDJCLibraryData[].class)));
    VDJCLibraryData[] mergeResult = VDJCDataUtils.merge(libs);
    log.info("Merged successfully.");
    Files.createDirectories(outputFolder);
    String fullLibraryName = "repseqio." + tag;
    Path resultPath = outputFolder.resolve(fullLibraryName + ".json");
    log.info("Writing {}", resultPath);
    VDJCDataUtils.writeToFile(mergeResult, resultPath, true);
    if (isDefault) {
        Path aliasPath = outputFolder.resolve("default.alias");
        log.info("Writing {}", aliasPath);
        try (FileOutputStream fos = new FileOutputStream(aliasPath.toFile())) {
            fos.write(fullLibraryName.getBytes(StandardCharsets.UTF_8));
        }
    }
}
Also used : Path(java.nio.file.Path) VDJCLibraryData(io.repseq.dto.VDJCLibraryData) FileOutputStream(java.io.FileOutputStream) ArrayList(java.util.ArrayList)

Example 2 with VDJCLibraryData

use of io.repseq.dto.VDJCLibraryData in project repseqio by repseqio.

the class InferAnchorPointsAction method go.

public void go(GeneFeature geneFeature, String inputFile, String outputFile) throws Exception {
    VDJCLibraryRegistry.resetDefaultRegistry();
    VDJCLibraryRegistry reg = VDJCLibraryRegistry.getDefault();
    // Map of library names
    Map<String, String> libraryNameToAddress = new HashMap<>();
    // Registering reference library
    int i = 0;
    for (String refAddress : params.getReference()) {
        String name = REFERENCE_LIBRARY_PREFIX + (i++);
        reg.registerLibraries(refAddress, name);
        libraryNameToAddress.put(name, refAddress);
    }
    if (params.getReference().isEmpty()) {
        reg.loadAllLibraries("default");
        for (VDJCLibrary library : reg.getLoadedLibraries()) libraryNameToAddress.put(library.getName(), "built-in");
    }
    // Registering target library
    reg.registerLibraries(inputFile, TARGET_LIBRARY_NAME);
    // Compile gene filter
    Pattern namePattern = params.name == null ? null : Pattern.compile(params.name);
    List<VDJCLibrary> refLibraries = reg.getLoadedLibrariesByNamePattern(REFERENCE_LIBRARY_PATTERN);
    SimpleBatchAlignerParameters<AminoAcidSequence> aParams = new SimpleBatchAlignerParameters<>(5, 0.4f, params.getAbsoluteMinScore(geneFeature), true, AffineGapAlignmentScoring.getAminoAcidBLASTScoring(BLASTMatrix.BLOSUM62, -10, -1));
    SimpleBatchAligner<AminoAcidSequence, Ref> aligner = new SimpleBatchAligner<>(aParams);
    int dbSize = 0;
    for (VDJCLibrary lib : refLibraries) {
        for (VDJCGene gene : lib.getGenes()) {
            NucleotideSequence nSeq = gene.getFeature(geneFeature);
            if (nSeq == null)
                continue;
            ReferencePoint frameReference = GeneFeature.getFrameReference(geneFeature);
            ReferencePoints partitioning = gene.getPartitioning();
            if (frameReference == null)
                continue;
            int relativePosition = partitioning.getRelativePosition(geneFeature, frameReference);
            if (relativePosition < 0)
                continue;
            TranslationParameters frame = withIncompleteCodon(relativePosition);
            AminoAcidSequence aaSequence = AminoAcidSequence.translate(nSeq, frame);
            aligner.addReference(aaSequence, new Ref(gene, frame, nSeq.size()));
            ++dbSize;
        }
    }
    System.out.println("DB size: " + dbSize);
    System.out.println();
    // Checking that db is not empty
    if (dbSize == 0)
        throw new RuntimeException("No reference genes.");
    ArrayList<VDJCLibraryData> result = new ArrayList<>();
    ByteArrayOutputStream bos = new ByteArrayOutputStream();
    PrintStream bufferPS = new PrintStream(bos);
    // Iteration over target genes
    for (VDJCLibrary lib : reg.getLoadedLibrariesByName(TARGET_LIBRARY_NAME)) {
        ArrayList<VDJCGeneData> genes = new ArrayList<>();
        for (VDJCGene targetGene : lib.getGenes()) {
            bos.reset();
            PrintStream ps = params.outputOnlyModified() ? bufferPS : System.out;
            if (namePattern != null && !namePattern.matcher(targetGene.getName()).matches()) {
                if (!params.outputOnlyModified())
                    genes.add(targetGene.getData());
                continue;
            }
            ps.println("Processing: " + targetGene.getName() + " (" + (targetGene.isFunctional() ? "F" : "P") + ") " + targetGene.getChains());
            // Getting gene feature sequence from target gene
            NucleotideSequence nSeq = targetGene.getFeature(geneFeature);
            if (nSeq == null) {
                ps.println("Failed to extract " + GeneFeature.encode(geneFeature));
                ps.println("================");
                ps.println();
                if (!params.outputOnlyModified())
                    genes.add(targetGene.getData());
                continue;
            }
            // Alignment result
            AlignmentResult<AlignmentHit<AminoAcidSequence, Ref>> bestAResult = null;
            TranslationParameters bestFrame = null;
            // Searching for best alignment
            for (TranslationParameters frame : TRANSLATION_PARAMETERS) {
                AminoAcidSequence aaSeq = AminoAcidSequence.translate(nSeq, frame);
                AlignmentResult<AlignmentHit<AminoAcidSequence, Ref>> r = aligner.align(aaSeq);
                if (r != null && r.hasHits() && (bestAResult == null || bestAResult.getBestHit().getAlignment().getScore() < r.getBestHit().getAlignment().getScore())) {
                    bestAResult = r;
                    bestFrame = frame;
                }
            }
            if (bestFrame == null) {
                ps.println("No alignments found.");
                if (!params.outputOnlyModified())
                    genes.add(targetGene.getData());
                continue;
            }
            List<AlignmentHit<AminoAcidSequence, Ref>> hits = bestAResult.getHits();
            VDJCGeneData targetGeneData = targetGene.getData().clone();
            boolean anyPointChanged = false;
            for (int ai = 0; ai < hits.size(); ai++) {
                // Accumulate output
                ByteArrayOutputStream localBos = new ByteArrayOutputStream();
                PrintStream localPS = new PrintStream(localBos);
                Alignment<AminoAcidSequence> bestAlignment = hits.get(ai).getAlignment();
                Ref bestRef = hits.get(ai).getRecordPayload();
                VDJCGene bestReferenceGene = bestRef.gene;
                localPS.println("Aligned with " + bestReferenceGene.getName() + " from " + libraryNameToAddress.get(bestReferenceGene.getParentLibrary().getName()) + " ; Score = " + bestAlignment.getScore());
                AlignmentHelper alignmentHelper = bestAlignment.getAlignmentHelper();
                for (AlignmentHelper h : alignmentHelper.split(150)) localPS.println(h + "\n");
                ReferencePoints targetPartitioning = targetGene.getPartitioning();
                ReferencePoints referencePartitioning = bestReferenceGene.getPartitioning();
                for (GeneFeature.ReferenceRange range : geneFeature) for (ReferencePoint point : range.getIntermediatePoints()) {
                    localPS.print(point + ": ");
                    boolean isAvailable = targetPartitioning.isAvailable(point);
                    if (isAvailable) {
                        localPS.println("already set");
                    }
                    if (!referencePartitioning.isAvailable(point)) {
                        if (!isAvailable)
                            localPS.println("not set in reference gene");
                        continue;
                    }
                    int ntPositionInReference = referencePartitioning.getRelativePosition(geneFeature, point);
                    // Projecting position
                    AminoAcidSequence.AminoAcidSequencePosition aaPositionInReferece = AminoAcidSequence.convertNtPositionToAA(ntPositionInReference, bestRef.ntSeqLength, bestRef.frame);
                    if (aaPositionInReferece == null) {
                        if (!isAvailable)
                            localPS.println("failed to convert to aa position in ref");
                        continue;
                    }
                    int aaPositionInTarget = Alignment.aabs(bestAlignment.convertToSeq2Position(aaPositionInReferece.aminoAcidPosition));
                    if (aaPositionInTarget == -1) {
                        if (!isAvailable)
                            localPS.println("failed to project using alignment");
                        continue;
                    }
                    int ntPositionInTarget = AminoAcidSequence.convertAAPositionToNt(aaPositionInTarget, nSeq.size(), bestFrame);
                    if (ntPositionInTarget == -1) {
                        if (!isAvailable)
                            localPS.println("failed");
                        continue;
                    }
                    ntPositionInTarget += aaPositionInReferece.positionInTriplet;
                    ntPositionInTarget = targetPartitioning.getAbsolutePosition(geneFeature, ntPositionInTarget);
                    if (ntPositionInTarget == -1) {
                        if (!isAvailable)
                            localPS.println("failed");
                        continue;
                    }
                    if (isAvailable) {
                        int existingPosition = targetPartitioning.getPosition(point);
                        if (existingPosition != ntPositionInTarget) {
                            localPS.println("inferred position differs from existing value.  existing: " + existingPosition + ", inferred: " + ntPositionInTarget);
                        }
                        continue;
                    }
                    localPS.println(ntPositionInTarget);
                    targetGeneData.getAnchorPoints().put(point, (long) ntPositionInTarget);
                    anyPointChanged = true;
                }
                if (!anyPointChanged) {
                    if (!params.outputOnlyModified() && ai == 0)
                        ps.write(localBos.toByteArray());
                } else {
                    ps.write(localBos.toByteArray());
                    break;
                }
            }
            ps.println("================");
            ps.println();
            if (anyPointChanged && params.outputOnlyModified())
                System.out.write(bos.toByteArray());
            genes.add(targetGeneData);
        }
        result.add(new VDJCLibraryData(lib.getData(), genes));
    }
    VDJCDataUtils.writeToFile(result, outputFile, false);
}
Also used : SimpleBatchAlignerParameters(com.milaboratory.core.alignment.batch.SimpleBatchAlignerParameters) AminoAcidSequence(com.milaboratory.core.sequence.AminoAcidSequence) VDJCLibraryData(io.repseq.dto.VDJCLibraryData) VDJCGeneData(io.repseq.dto.VDJCGeneData) Pattern(java.util.regex.Pattern) PrintStream(java.io.PrintStream) AlignmentHelper(com.milaboratory.core.alignment.AlignmentHelper) ByteArrayOutputStream(java.io.ByteArrayOutputStream) SimpleBatchAligner(com.milaboratory.core.alignment.batch.SimpleBatchAligner) AlignmentHit(com.milaboratory.core.alignment.batch.AlignmentHit) TranslationParameters(com.milaboratory.core.sequence.TranslationParameters) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence)

Example 3 with VDJCLibraryData

use of io.repseq.dto.VDJCLibraryData in project repseqio by repseqio.

the class ListAction method go.

@Override
public void go(ActionHelper helper) throws Exception {
    VDJCLibraryData[] libs = VDJCDataUtils.readArrayFromFile(params.getInput());
    System.out.println("TaxonId\tGeneName\tFunctional");
    for (VDJCLibraryData lib : libs) for (VDJCGeneData geneData : lib.getGenes()) {
        System.out.println(lib.getTaxonId() + "\t" + geneData.getName() + "\t" + (geneData.isFunctional() ? "F" : "P"));
    }
}
Also used : VDJCLibraryData(io.repseq.dto.VDJCLibraryData) VDJCGeneData(io.repseq.dto.VDJCGeneData)

Example 4 with VDJCLibraryData

use of io.repseq.dto.VDJCLibraryData in project repseqio by repseqio.

the class FilterAction method go.

@Override
public void go(ActionHelper helper) throws Exception {
    VDJCLibraryData[] libs = VDJCDataUtils.readArrayFromFile(Paths.get(params.getInput()));
    List<VDJCLibraryData> filtered = new ArrayList<>();
    Pattern chainPattern = params.chain == null ? null : Pattern.compile(params.chain);
    boolean filterRecords = (params.chain != null);
    int filteredLibraries = 0;
    int filteredGenes = 0;
    for (VDJCLibraryData lib : libs) {
        if (params.taxonId != null && params.taxonId != lib.getTaxonId())
            continue;
        if (params.species != null && !lib.getSpeciesNames().contains(params.species))
            continue;
        ++filteredLibraries;
        if (!filterRecords) {
            filteredGenes += lib.getGenes().size();
            filtered.add(lib);
        } else {
            List<VDJCGeneData> genes = new ArrayList<>();
            for (VDJCGeneData gene : lib.getGenes()) {
                if (chainPattern != null) {
                    boolean y = false;
                    for (String s : gene.getChains()) if (y |= chainPattern.matcher(s).matches())
                        break;
                    if (!y)
                        continue;
                }
                ++filteredGenes;
                genes.add(gene);
            }
            filtered.add(new VDJCLibraryData(lib, genes));
        }
    }
    System.out.println("Filtered libraries: " + filteredLibraries);
    System.out.println("Filtered genes: " + filteredGenes);
    VDJCDataUtils.writeToFile(filtered, params.getOutput(), false);
}
Also used : Pattern(java.util.regex.Pattern) VDJCLibraryData(io.repseq.dto.VDJCLibraryData) ArrayList(java.util.ArrayList) VDJCGeneData(io.repseq.dto.VDJCGeneData)

Example 5 with VDJCLibraryData

use of io.repseq.dto.VDJCLibraryData in project repseqio by repseqio.

the class VDJCLibraryRegistry method tryResolve.

private void tryResolve(LibraryResolver resolver, String libraryName) {
    // Check if this combination of resolver and libraryName was already being processed
    LibraryLoadRequest request = new LibraryLoadRequest(resolver, libraryName);
    if (loadedLibraries.contains(request))
        return;
    // Try resolve
    VDJCLibraryData[] resolved = resolver.resolve(libraryName);
    // Marking this request as already processed
    loadedLibraries.add(request);
    // If not resolved
    if (resolved == null) {
        if (resolver instanceof AliasResolver) {
            String newLibraryName = ((AliasResolver) resolver).resolveAlias(libraryName);
            if (newLibraryName == null)
                // proceed to next resolver
                return;
            tryResolve(resolver, newLibraryName);
            if (aliases.containsKey(libraryName) && !aliases.get(libraryName).equals(newLibraryName))
                throw new RuntimeException("Conflicting aliases " + libraryName + " -> " + newLibraryName + " / " + aliases.get(libraryName));
            aliases.put(libraryName, newLibraryName);
        }
        // proceed to next resolver
        return;
    }
    // Registering loaded library entries
    for (VDJCLibraryData vdjcLibraryData : resolved) {
        VDJCLibraryId sal = new VDJCLibraryId(libraryName, vdjcLibraryData.getTaxonId());
        // (or using previous resolution call with the same library name)
        if (// If so - ignore it
        libraries.containsKey(sal))
            continue;
        // Registering library
        registerLibrary(resolver.getContext(libraryName), libraryName, vdjcLibraryData);
    }
}
Also used : VDJCLibraryData(io.repseq.dto.VDJCLibraryData)

Aggregations

VDJCLibraryData (io.repseq.dto.VDJCLibraryData)7 VDJCGeneData (io.repseq.dto.VDJCGeneData)3 ArrayList (java.util.ArrayList)3 NucleotideSequence (com.milaboratory.core.sequence.NucleotideSequence)2 Path (java.nio.file.Path)2 Pattern (java.util.regex.Pattern)2 Range (com.milaboratory.core.Range)1 AlignmentHelper (com.milaboratory.core.alignment.AlignmentHelper)1 AlignmentHit (com.milaboratory.core.alignment.batch.AlignmentHit)1 SimpleBatchAligner (com.milaboratory.core.alignment.batch.SimpleBatchAligner)1 SimpleBatchAlignerParameters (com.milaboratory.core.alignment.batch.SimpleBatchAlignerParameters)1 AminoAcidSequence (com.milaboratory.core.sequence.AminoAcidSequence)1 TranslationParameters (com.milaboratory.core.sequence.TranslationParameters)1 SequenceProviderIndexOutOfBoundsException (com.milaboratory.core.sequence.provider.SequenceProviderIndexOutOfBoundsException)1 VDJCGene (io.repseq.core.VDJCGene)1 VDJCLibrary (io.repseq.core.VDJCLibrary)1 VDJCLibraryRegistry (io.repseq.core.VDJCLibraryRegistry)1 KnownSequenceFragmentData (io.repseq.dto.KnownSequenceFragmentData)1 VDJCDataUtils (io.repseq.dto.VDJCDataUtils)1 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1