Search in sources :

Example 1 with VDJCGeneData

use of io.repseq.dto.VDJCGeneData in project repseqio by repseqio.

the class InferAnchorPointsAction method go.

public void go(GeneFeature geneFeature, String inputFile, String outputFile) throws Exception {
    VDJCLibraryRegistry.resetDefaultRegistry();
    VDJCLibraryRegistry reg = VDJCLibraryRegistry.getDefault();
    // Map of library names
    Map<String, String> libraryNameToAddress = new HashMap<>();
    // Registering reference library
    int i = 0;
    for (String refAddress : params.getReference()) {
        String name = REFERENCE_LIBRARY_PREFIX + (i++);
        reg.registerLibraries(refAddress, name);
        libraryNameToAddress.put(name, refAddress);
    }
    if (params.getReference().isEmpty()) {
        reg.loadAllLibraries("default");
        for (VDJCLibrary library : reg.getLoadedLibraries()) libraryNameToAddress.put(library.getName(), "built-in");
    }
    // Registering target library
    reg.registerLibraries(inputFile, TARGET_LIBRARY_NAME);
    // Compile gene filter
    Pattern namePattern = params.name == null ? null : Pattern.compile(params.name);
    List<VDJCLibrary> refLibraries = reg.getLoadedLibrariesByNamePattern(REFERENCE_LIBRARY_PATTERN);
    SimpleBatchAlignerParameters<AminoAcidSequence> aParams = new SimpleBatchAlignerParameters<>(5, 0.4f, params.getAbsoluteMinScore(geneFeature), true, AffineGapAlignmentScoring.getAminoAcidBLASTScoring(BLASTMatrix.BLOSUM62, -10, -1));
    SimpleBatchAligner<AminoAcidSequence, Ref> aligner = new SimpleBatchAligner<>(aParams);
    int dbSize = 0;
    for (VDJCLibrary lib : refLibraries) {
        for (VDJCGene gene : lib.getGenes()) {
            NucleotideSequence nSeq = gene.getFeature(geneFeature);
            if (nSeq == null)
                continue;
            ReferencePoint frameReference = GeneFeature.getFrameReference(geneFeature);
            ReferencePoints partitioning = gene.getPartitioning();
            if (frameReference == null)
                continue;
            int relativePosition = partitioning.getRelativePosition(geneFeature, frameReference);
            if (relativePosition < 0)
                continue;
            TranslationParameters frame = withIncompleteCodon(relativePosition);
            AminoAcidSequence aaSequence = AminoAcidSequence.translate(nSeq, frame);
            aligner.addReference(aaSequence, new Ref(gene, frame, nSeq.size()));
            ++dbSize;
        }
    }
    System.out.println("DB size: " + dbSize);
    System.out.println();
    // Checking that db is not empty
    if (dbSize == 0)
        throw new RuntimeException("No reference genes.");
    ArrayList<VDJCLibraryData> result = new ArrayList<>();
    ByteArrayOutputStream bos = new ByteArrayOutputStream();
    PrintStream bufferPS = new PrintStream(bos);
    // Iteration over target genes
    for (VDJCLibrary lib : reg.getLoadedLibrariesByName(TARGET_LIBRARY_NAME)) {
        ArrayList<VDJCGeneData> genes = new ArrayList<>();
        for (VDJCGene targetGene : lib.getGenes()) {
            bos.reset();
            PrintStream ps = params.outputOnlyModified() ? bufferPS : System.out;
            if (namePattern != null && !namePattern.matcher(targetGene.getName()).matches()) {
                if (!params.outputOnlyModified())
                    genes.add(targetGene.getData());
                continue;
            }
            ps.println("Processing: " + targetGene.getName() + " (" + (targetGene.isFunctional() ? "F" : "P") + ") " + targetGene.getChains());
            // Getting gene feature sequence from target gene
            NucleotideSequence nSeq = targetGene.getFeature(geneFeature);
            if (nSeq == null) {
                ps.println("Failed to extract " + GeneFeature.encode(geneFeature));
                ps.println("================");
                ps.println();
                if (!params.outputOnlyModified())
                    genes.add(targetGene.getData());
                continue;
            }
            // Alignment result
            AlignmentResult<AlignmentHit<AminoAcidSequence, Ref>> bestAResult = null;
            TranslationParameters bestFrame = null;
            // Searching for best alignment
            for (TranslationParameters frame : TRANSLATION_PARAMETERS) {
                AminoAcidSequence aaSeq = AminoAcidSequence.translate(nSeq, frame);
                AlignmentResult<AlignmentHit<AminoAcidSequence, Ref>> r = aligner.align(aaSeq);
                if (r != null && r.hasHits() && (bestAResult == null || bestAResult.getBestHit().getAlignment().getScore() < r.getBestHit().getAlignment().getScore())) {
                    bestAResult = r;
                    bestFrame = frame;
                }
            }
            if (bestFrame == null) {
                ps.println("No alignments found.");
                if (!params.outputOnlyModified())
                    genes.add(targetGene.getData());
                continue;
            }
            List<AlignmentHit<AminoAcidSequence, Ref>> hits = bestAResult.getHits();
            VDJCGeneData targetGeneData = targetGene.getData().clone();
            boolean anyPointChanged = false;
            for (int ai = 0; ai < hits.size(); ai++) {
                // Accumulate output
                ByteArrayOutputStream localBos = new ByteArrayOutputStream();
                PrintStream localPS = new PrintStream(localBos);
                Alignment<AminoAcidSequence> bestAlignment = hits.get(ai).getAlignment();
                Ref bestRef = hits.get(ai).getRecordPayload();
                VDJCGene bestReferenceGene = bestRef.gene;
                localPS.println("Aligned with " + bestReferenceGene.getName() + " from " + libraryNameToAddress.get(bestReferenceGene.getParentLibrary().getName()) + " ; Score = " + bestAlignment.getScore());
                AlignmentHelper alignmentHelper = bestAlignment.getAlignmentHelper();
                for (AlignmentHelper h : alignmentHelper.split(150)) localPS.println(h + "\n");
                ReferencePoints targetPartitioning = targetGene.getPartitioning();
                ReferencePoints referencePartitioning = bestReferenceGene.getPartitioning();
                for (GeneFeature.ReferenceRange range : geneFeature) for (ReferencePoint point : range.getIntermediatePoints()) {
                    localPS.print(point + ": ");
                    boolean isAvailable = targetPartitioning.isAvailable(point);
                    if (isAvailable) {
                        localPS.println("already set");
                    }
                    if (!referencePartitioning.isAvailable(point)) {
                        if (!isAvailable)
                            localPS.println("not set in reference gene");
                        continue;
                    }
                    int ntPositionInReference = referencePartitioning.getRelativePosition(geneFeature, point);
                    // Projecting position
                    AminoAcidSequence.AminoAcidSequencePosition aaPositionInReferece = AminoAcidSequence.convertNtPositionToAA(ntPositionInReference, bestRef.ntSeqLength, bestRef.frame);
                    if (aaPositionInReferece == null) {
                        if (!isAvailable)
                            localPS.println("failed to convert to aa position in ref");
                        continue;
                    }
                    int aaPositionInTarget = Alignment.aabs(bestAlignment.convertToSeq2Position(aaPositionInReferece.aminoAcidPosition));
                    if (aaPositionInTarget == -1) {
                        if (!isAvailable)
                            localPS.println("failed to project using alignment");
                        continue;
                    }
                    int ntPositionInTarget = AminoAcidSequence.convertAAPositionToNt(aaPositionInTarget, nSeq.size(), bestFrame);
                    if (ntPositionInTarget == -1) {
                        if (!isAvailable)
                            localPS.println("failed");
                        continue;
                    }
                    ntPositionInTarget += aaPositionInReferece.positionInTriplet;
                    ntPositionInTarget = targetPartitioning.getAbsolutePosition(geneFeature, ntPositionInTarget);
                    if (ntPositionInTarget == -1) {
                        if (!isAvailable)
                            localPS.println("failed");
                        continue;
                    }
                    if (isAvailable) {
                        int existingPosition = targetPartitioning.getPosition(point);
                        if (existingPosition != ntPositionInTarget) {
                            localPS.println("inferred position differs from existing value.  existing: " + existingPosition + ", inferred: " + ntPositionInTarget);
                        }
                        continue;
                    }
                    localPS.println(ntPositionInTarget);
                    targetGeneData.getAnchorPoints().put(point, (long) ntPositionInTarget);
                    anyPointChanged = true;
                }
                if (!anyPointChanged) {
                    if (!params.outputOnlyModified() && ai == 0)
                        ps.write(localBos.toByteArray());
                } else {
                    ps.write(localBos.toByteArray());
                    break;
                }
            }
            ps.println("================");
            ps.println();
            if (anyPointChanged && params.outputOnlyModified())
                System.out.write(bos.toByteArray());
            genes.add(targetGeneData);
        }
        result.add(new VDJCLibraryData(lib.getData(), genes));
    }
    VDJCDataUtils.writeToFile(result, outputFile, false);
}
Also used : SimpleBatchAlignerParameters(com.milaboratory.core.alignment.batch.SimpleBatchAlignerParameters) AminoAcidSequence(com.milaboratory.core.sequence.AminoAcidSequence) VDJCLibraryData(io.repseq.dto.VDJCLibraryData) VDJCGeneData(io.repseq.dto.VDJCGeneData) Pattern(java.util.regex.Pattern) PrintStream(java.io.PrintStream) AlignmentHelper(com.milaboratory.core.alignment.AlignmentHelper) ByteArrayOutputStream(java.io.ByteArrayOutputStream) SimpleBatchAligner(com.milaboratory.core.alignment.batch.SimpleBatchAligner) AlignmentHit(com.milaboratory.core.alignment.batch.AlignmentHit) TranslationParameters(com.milaboratory.core.sequence.TranslationParameters) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence)

Example 2 with VDJCGeneData

use of io.repseq.dto.VDJCGeneData in project repseqio by repseqio.

the class ListAction method go.

@Override
public void go(ActionHelper helper) throws Exception {
    VDJCLibraryData[] libs = VDJCDataUtils.readArrayFromFile(params.getInput());
    System.out.println("TaxonId\tGeneName\tFunctional");
    for (VDJCLibraryData lib : libs) for (VDJCGeneData geneData : lib.getGenes()) {
        System.out.println(lib.getTaxonId() + "\t" + geneData.getName() + "\t" + (geneData.isFunctional() ? "F" : "P"));
    }
}
Also used : VDJCLibraryData(io.repseq.dto.VDJCLibraryData) VDJCGeneData(io.repseq.dto.VDJCGeneData)

Example 3 with VDJCGeneData

use of io.repseq.dto.VDJCGeneData in project repseqio by repseqio.

the class FilterAction method go.

@Override
public void go(ActionHelper helper) throws Exception {
    VDJCLibraryData[] libs = VDJCDataUtils.readArrayFromFile(Paths.get(params.getInput()));
    List<VDJCLibraryData> filtered = new ArrayList<>();
    Pattern chainPattern = params.chain == null ? null : Pattern.compile(params.chain);
    boolean filterRecords = (params.chain != null);
    int filteredLibraries = 0;
    int filteredGenes = 0;
    for (VDJCLibraryData lib : libs) {
        if (params.taxonId != null && params.taxonId != lib.getTaxonId())
            continue;
        if (params.species != null && !lib.getSpeciesNames().contains(params.species))
            continue;
        ++filteredLibraries;
        if (!filterRecords) {
            filteredGenes += lib.getGenes().size();
            filtered.add(lib);
        } else {
            List<VDJCGeneData> genes = new ArrayList<>();
            for (VDJCGeneData gene : lib.getGenes()) {
                if (chainPattern != null) {
                    boolean y = false;
                    for (String s : gene.getChains()) if (y |= chainPattern.matcher(s).matches())
                        break;
                    if (!y)
                        continue;
                }
                ++filteredGenes;
                genes.add(gene);
            }
            filtered.add(new VDJCLibraryData(lib, genes));
        }
    }
    System.out.println("Filtered libraries: " + filteredLibraries);
    System.out.println("Filtered genes: " + filteredGenes);
    VDJCDataUtils.writeToFile(filtered, params.getOutput(), false);
}
Also used : Pattern(java.util.regex.Pattern) VDJCLibraryData(io.repseq.dto.VDJCLibraryData) ArrayList(java.util.ArrayList) VDJCGeneData(io.repseq.dto.VDJCGeneData)

Example 4 with VDJCGeneData

use of io.repseq.dto.VDJCGeneData in project repseqio by repseqio.

the class VDJCLibraryRegistry method registerLibrary.

/**
 * Creates and registers single library from VDJCLibraryData
 *
 * @param context context to use for resolution of sequences
 * @param name    library name
 * @param data    library data
 * @return created library
 */
public synchronized VDJCLibrary registerLibrary(Path context, String name, VDJCLibraryData data) {
    // Creating library object
    VDJCLibrary library = new VDJCLibrary(data, name, this, context);
    // Getting library id
    VDJCLibraryId rootId = library.getLibraryIdWithoutChecksum();
    // Check if such library is already registered
    if (libraries.containsKey(rootId))
        throw new RuntimeException("Duplicate library: " + rootId);
    // Loading known sequence fragments from VDJCLibraryData to current SequenceResolver
    SequenceResolver resolver = getSequenceResolver();
    for (KnownSequenceFragmentData fragment : data.getSequenceFragments()) resolver.resolve(new SequenceAddress(context, fragment.getUri())).setRegion(fragment.getRange(), fragment.getSequence());
    // Adding genes
    for (VDJCGeneData gene : data.getGenes()) VDJCLibrary.addGene(library, gene);
    // Adding common species names
    Long taxonId = data.getTaxonId();
    for (String speciesName : data.getSpeciesNames()) {
        String cSpeciesName = canonicalizeSpeciesName(speciesName);
        if (speciesNames.containsKey(cSpeciesName) && !speciesNames.get(cSpeciesName).equals(taxonId))
            throw new IllegalArgumentException("Mismatch in common species name between several libraries. " + "(Library name = " + name + "; name = " + speciesName + ").");
        speciesNames.put(cSpeciesName, taxonId);
        List<String> names = speciesNamesReverse.get(taxonId);
        if (names == null)
            speciesNamesReverse.put(taxonId, names = new ArrayList<>());
        names.add(speciesName);
    }
    // Adding this library to collection
    libraries.put(library.getLibraryIdWithoutChecksum(), library);
    return library;
}
Also used : SequenceResolver(io.repseq.seqbase.SequenceResolver) KnownSequenceFragmentData(io.repseq.dto.KnownSequenceFragmentData) SequenceAddress(io.repseq.seqbase.SequenceAddress) VDJCGeneData(io.repseq.dto.VDJCGeneData)

Aggregations

VDJCGeneData (io.repseq.dto.VDJCGeneData)4 VDJCLibraryData (io.repseq.dto.VDJCLibraryData)3 Pattern (java.util.regex.Pattern)2 AlignmentHelper (com.milaboratory.core.alignment.AlignmentHelper)1 AlignmentHit (com.milaboratory.core.alignment.batch.AlignmentHit)1 SimpleBatchAligner (com.milaboratory.core.alignment.batch.SimpleBatchAligner)1 SimpleBatchAlignerParameters (com.milaboratory.core.alignment.batch.SimpleBatchAlignerParameters)1 AminoAcidSequence (com.milaboratory.core.sequence.AminoAcidSequence)1 NucleotideSequence (com.milaboratory.core.sequence.NucleotideSequence)1 TranslationParameters (com.milaboratory.core.sequence.TranslationParameters)1 KnownSequenceFragmentData (io.repseq.dto.KnownSequenceFragmentData)1 SequenceAddress (io.repseq.seqbase.SequenceAddress)1 SequenceResolver (io.repseq.seqbase.SequenceResolver)1 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 PrintStream (java.io.PrintStream)1 ArrayList (java.util.ArrayList)1