Search in sources :

Example 1 with AminoAcidCompound

use of org.biojava.nbio.core.sequence.compound.AminoAcidCompound in project jstructure by JonStargaryen.

the class UniProtMutationsForBindingSite method handleBindingSite.

private static void handleBindingSite(BindingSite bindingSite) {
    String uniProtId = AARSConstants.lines(EFFECTS_TSV).map(line -> line.split(DEL)).filter(split -> split[0].equals(bindingSite.pdbId)).filter(split -> split[1].equals(bindingSite.chainId)).findAny().get()[2];
    // load original, full structure
    Chain originalChain = ProteinParser.source(bindingSite.pdbId).parse().select().chainName(bindingSite.chainId).asChain();
    String pdbSequence = originalChain.getAminoAcidSequence();
    String uniProtSequence = loadUniProtSequence(uniProtId);
    // align sequences
    SequencePair<ProteinSequence, AminoAcidCompound> alignment = needle(uniProtSequence, pdbSequence);
    System.out.println(bindingSite);
    System.out.println(alignment);
    // load renumbered, but not transformed chain
    Chain renumberedChain = ProteinParser.source(Paths.get("/home/bittrich/git/aars_analysis/data/msa/" + classToProcess + "/renumbered_structures/" + bindingSite.pdbId + "_renum.pdb")).parse().select().chainName(bindingSite.chainId).asChain();
    // key: renumbered, transformed binding site group - value: original group in PDB chain
    List<Integer> residueIndices = bindingSite.residues.stream().map(Group::getResidueNumber).map(ResidueNumber::getResidueNumber).collect(Collectors.toList());
    Map<Group, Group> groupMapping = renumberedChain.aminoAcids().filter(aminoAcid -> residueIndices.contains(aminoAcid.getResidueNumber())).collect(Collectors.toMap(Function.identity(), // map each group to the entity in the not renumbered structure
    renumberedGroup -> originalChain.select().groupName(renumberedGroup.getThreeLetterCode()).asFilteredGroups().min(Comparator.comparingDouble(originalGroup -> originalGroup.calculate().centroid().distanceFast(renumberedGroup.calculate().centroid()))).get()));
    // determine sequence position in sequence alignment - rarely these indices do not match
    groupMapping.entrySet().forEach(entry -> {
        int residueIndex = originalChain.getGroups().indexOf(entry.getValue()) + 1;
        System.out.print("mapped: " + entry.getKey().getIdentifier() + " -> " + entry.getValue().getIdentifier() + " -> uniprot ");
        try {
            int indexInUniProt = alignment.getIndexInQueryForTargetAt(residueIndex);
            System.out.println(alignment.getCompoundInQueryAt(indexInUniProt).getLongName().toUpperCase() + "-" + indexInUniProt);
            String indexToFind = String.valueOf(indexInUniProt);
            AARSConstants.lines(EFFECTS_TSV).map(line -> line.split("\t")).filter(split -> split[0].equals(bindingSite.pdbId)).filter(split -> split[1].equals(bindingSite.chainId)).filter(split -> refersToPosition(split, indexToFind)).forEach(split -> {
                String outputLine = bindingSite.pdbId + DEL + bindingSite.chainId + DEL + split[2] + DEL + bindingSite.clazz + DEL + bindingSite.aa + DEL + bindingSite.mode + DEL + entry.getKey().getResidueNumber() + DEL + entry.getValue().getResidueNumber() + DEL + split[3] + DEL + split[4] + DEL + split[5] + DEL + split[6] + DEL + split[7] + System.lineSeparator();
                System.out.println(outputLine);
                output.append(outputLine);
            });
        } catch (ArrayIndexOutOfBoundsException e) {
            System.out.println("failed!");
            warnings.append("#could not map ").append(entry.getValue().getIdentifier()).append(" in ").append(bindingSite.pdbId).append("_").append(bindingSite.chainId).append(" to UniProt sequence").append(System.lineSeparator());
        }
    });
}
Also used : Function(java.util.function.Function) ProteinSequence(org.biojava.nbio.core.sequence.ProteinSequence) SimpleGapPenalty(org.biojava.nbio.alignment.SimpleGapPenalty) Group(de.bioforscher.jstructure.model.structure.Group) Map(java.util.Map) Chain(de.bioforscher.jstructure.model.structure.Chain) SubstitutionMatrixHelper(org.biojava.nbio.core.alignment.matrices.SubstitutionMatrixHelper) CompoundNotFoundException(org.biojava.nbio.core.exceptions.CompoundNotFoundException) Protein(de.bioforscher.jstructure.model.structure.Protein) Path(java.nio.file.Path) ResidueNumber(de.bioforscher.jstructure.model.structure.ResidueNumber) ProteinIdentifier(de.bioforscher.jstructure.model.structure.identifier.ProteinIdentifier) ProteinParser(de.bioforscher.jstructure.parser.ProteinParser) Files(java.nio.file.Files) SequencePair(org.biojava.nbio.core.alignment.template.SequencePair) AminoAcidCompound(org.biojava.nbio.core.sequence.compound.AminoAcidCompound) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) File(java.io.File) UncheckedIOException(java.io.UncheckedIOException) List(java.util.List) Paths(java.nio.file.Paths) Alignments(org.biojava.nbio.alignment.Alignments) Document(org.jsoup.nodes.Document) Jsoup(org.jsoup.Jsoup) Pattern(java.util.regex.Pattern) Comparator(java.util.Comparator) Chain(de.bioforscher.jstructure.model.structure.Chain) Group(de.bioforscher.jstructure.model.structure.Group) ProteinSequence(org.biojava.nbio.core.sequence.ProteinSequence) AminoAcidCompound(org.biojava.nbio.core.sequence.compound.AminoAcidCompound)

Example 2 with AminoAcidCompound

use of org.biojava.nbio.core.sequence.compound.AminoAcidCompound in project jstructure by JonStargaryen.

the class Start2FoldXmlParser method assignValues.

private static void assignValues(Experiment experiment, Chain chain) {
    String pdbSequence = chain.getAminoAcidSequence();
    String experimentSequence = experiment.getSequence();
    // align sequences to ensure correct mapping
    SequencePair<ProteinSequence, AminoAcidCompound> pair = null;
    try {
        pair = Alignments.getPairwiseAlignment(new ProteinSequence(experimentSequence), new ProteinSequence(pdbSequence), Alignments.PairwiseSequenceAlignerType.GLOBAL, new SimpleGapPenalty(), SubstitutionMatrixHelper.getBlosum62());
        // logger.info("alignment:{}{}",
        // System.lineSeparator(),
        // pair.toString());
        List<AminoAcid> aminoAcids = chain.aminoAcids().collect(Collectors.toList());
        for (Experiment.Residue residue : experiment.getResidues()) {
            int experimentIndex = residue.getIndex() - 1;
            try {
                int pdbIndex;
                if (residue.getCode().equals("P") && residue.getIndex() == 1) {
                    // super-russian fix for STF0017 where the alignment should match theoretically
                    pdbIndex = 0;
                } else {
                    pdbIndex = pair.getIndexInTargetForQueryAt(experimentIndex);
                }
                AminoAcid aminoAcid = aminoAcids.get(pdbIndex);
                // logger.debug("mapped experiment {}-{} onto PDB {}-{}",
                // residue.getCode(),
                // residue.getIndex(),
                // aminoAcid.getOneLetterCode(),
                // aminoAcid.getResidueIdentifier());
                // if(!residue.getCode().equals(aminoAcid.getOneLetterCode())) {
                // logger.warn("alignment:{}{}",
                // System.lineSeparator(),
                // pair.toString());
                // logger.warn("could not map data correctly for {}-{} and {}-{}",
                // residue.getCode(),
                // residue.getIndex(),
                // aminoAcid.getOneLetterCode(),
                // aminoAcid.getResidueIdentifier());
                // }
                // ignore: STF0034 (cannot align)
                // assign experiment-specific protection level to residue
                aminoAcid.getFeature(Start2FoldResidueAnnotation.class).addProtectionLevelEntry(Stream.of(ProtectionLevel.values()).filter(pl -> pl == experiment.getProtectionLevel()).findFirst().get());
            } catch (Exception e) {
                // residue not present in structure - e.g. for STF0031 and STF0032
                logger.warn("alignment:{}{}", System.lineSeparator(), pair.toString());
                logger.warn("failed to map residue {}-{}", residue.getCode(), residue.getIndex(), e);
            }
        }
    } catch (CompoundNotFoundException e) {
        throw new IllegalArgumentException(e);
    }
}
Also used : LoggerFactory(org.slf4j.LoggerFactory) ProteinSequence(org.biojava.nbio.core.sequence.ProteinSequence) SimpleGapPenalty(org.biojava.nbio.alignment.SimpleGapPenalty) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) Chain(de.bioforscher.jstructure.model.structure.Chain) SubstitutionMatrixHelper(org.biojava.nbio.core.alignment.matrices.SubstitutionMatrixHelper) CompoundNotFoundException(org.biojava.nbio.core.exceptions.CompoundNotFoundException) Path(java.nio.file.Path) Logger(org.slf4j.Logger) Experiment(de.bioforscher.start2fold.model.Experiment) Files(java.nio.file.Files) SequencePair(org.biojava.nbio.core.alignment.template.SequencePair) AminoAcidCompound(org.biojava.nbio.core.sequence.compound.AminoAcidCompound) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) UncheckedIOException(java.io.UncheckedIOException) Start2FoldResidueAnnotation(de.bioforscher.start2fold.model.Start2FoldResidueAnnotation) List(java.util.List) Stream(java.util.stream.Stream) ProtectionLevel(de.bioforscher.start2fold.model.ProtectionLevel) Alignments(org.biojava.nbio.alignment.Alignments) Document(org.jsoup.nodes.Document) Jsoup(org.jsoup.Jsoup) Elements(org.jsoup.select.Elements) InputStream(java.io.InputStream) CompoundNotFoundException(org.biojava.nbio.core.exceptions.CompoundNotFoundException) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) Experiment(de.bioforscher.start2fold.model.Experiment) Start2FoldResidueAnnotation(de.bioforscher.start2fold.model.Start2FoldResidueAnnotation) SimpleGapPenalty(org.biojava.nbio.alignment.SimpleGapPenalty) CompoundNotFoundException(org.biojava.nbio.core.exceptions.CompoundNotFoundException) IOException(java.io.IOException) UncheckedIOException(java.io.UncheckedIOException) ProteinSequence(org.biojava.nbio.core.sequence.ProteinSequence) AminoAcidCompound(org.biojava.nbio.core.sequence.compound.AminoAcidCompound)

Example 3 with AminoAcidCompound

use of org.biojava.nbio.core.sequence.compound.AminoAcidCompound in project jstructure by JonStargaryen.

the class A01_SingleLinkageClusterer method computeNeedlemanWunschSimilarity.

/**
 * Compute the Needleman-Wunsch alignment between 2 sequence and report the sequence identity.
 *
 * @param entry1 the reference sequence
 * @param entry2 the query sequence
 * @return the fraction of identically aligned positions
 */
private double computeNeedlemanWunschSimilarity(Chain entry1, Chain entry2) {
    try {
        ProteinSequence sequence1 = new ProteinSequence(entry1.getAminoAcidSequence());
        ProteinSequence sequence2 = new ProteinSequence(entry2.getAminoAcidSequence());
        SequencePair<ProteinSequence, AminoAcidCompound> pair = Alignments.getPairwiseAlignment(sequence1, sequence2, Alignments.PairwiseSequenceAlignerType.GLOBAL, new SimpleGapPenalty(), SubstitutionMatrixHelper.getBlosum62());
        System.out.println(pair.getPercentageOfIdentity());
        return pair.getPercentageOfIdentity();
    } catch (CompoundNotFoundException e) {
        throw new IllegalArgumentException(e);
    }
}
Also used : CompoundNotFoundException(org.biojava.nbio.core.exceptions.CompoundNotFoundException) ProteinSequence(org.biojava.nbio.core.sequence.ProteinSequence) AminoAcidCompound(org.biojava.nbio.core.sequence.compound.AminoAcidCompound) SimpleGapPenalty(org.biojava.nbio.alignment.SimpleGapPenalty)

Example 4 with AminoAcidCompound

use of org.biojava.nbio.core.sequence.compound.AminoAcidCompound in project jstructure by JonStargaryen.

the class Start2FoldXmlParser method assignValues.

private static void assignValues(Experiment experiment, Chain chain) {
    String pdbSequence = chain.getAminoAcidSequence();
    String experimentSequence = experiment.getSequence();
    // align sequences to ensure correct mapping
    SequencePair<ProteinSequence, AminoAcidCompound> pair = null;
    try {
        pair = Alignments.getPairwiseAlignment(new ProteinSequence(experimentSequence), new ProteinSequence(pdbSequence), Alignments.PairwiseSequenceAlignerType.GLOBAL, new SimpleGapPenalty(), SubstitutionMatrixHelper.getBlosum62());
        logger.debug("alignment:{}{}", System.lineSeparator(), pair.toString());
        List<AminoAcid> aminoAcids = chain.aminoAcids().collect(Collectors.toList());
        for (Experiment.Residue residue : experiment.getResidues()) {
            int experimentIndex = residue.getIndex() - 1;
            try {
                int pdbIndex;
                if (residue.getCode().equals("P") && residue.getIndex() == 1) {
                    // super-russian fix for STF0017 where the alignment should match theoretically
                    pdbIndex = 0;
                } else {
                    pdbIndex = pair.getIndexInTargetForQueryAt(experimentIndex);
                }
                AminoAcid aminoAcid = aminoAcids.get(pdbIndex);
                logger.debug("mapped experiment {}-{} onto PDB {}-{}", residue.getCode(), residue.getIndex(), aminoAcid.getOneLetterCode(), aminoAcid.getResidueIdentifier());
                // if(!residue.getCode().equals(aminoAcid.getOneLetterCode())) {
                // logger.warn("alignment:{}{}",
                // System.lineSeparator(),
                // pair.toString());
                // logger.warn("could not map data correctly for {}-{} and {}-{}",
                // residue.getCode(),
                // residue.getIndex(),
                // aminoAcid.getOneLetterCode(),
                // aminoAcid.getResidueIdentifier());
                // }
                // ignore: STF0034 (cannot align)
                // assign experiment-specific protection level to residue
                aminoAcid.getFeature(Start2FoldResidueAnnotation.class).addProtectionLevelEntry(Stream.of(ProtectionLevel.values()).filter(pl -> pl == experiment.getProtectionLevel()).findFirst().get());
            } catch (Exception e) {
                // residue not present in structure - e.g. for STF0031 and STF0032
                logger.warn("alignment:{}{}", System.lineSeparator(), pair.toString());
                logger.warn("failed to map residue {}-{}", residue.getCode(), residue.getIndex(), e);
            }
        }
    } catch (CompoundNotFoundException e) {
        throw new IllegalArgumentException(e);
    }
}
Also used : LoggerFactory(org.slf4j.LoggerFactory) ProteinSequence(org.biojava.nbio.core.sequence.ProteinSequence) ProtectionLevel(de.bioforscher.jstructure.efr.model.ProtectionLevel) Experiment(de.bioforscher.jstructure.efr.model.Experiment) SimpleGapPenalty(org.biojava.nbio.alignment.SimpleGapPenalty) Method(de.bioforscher.jstructure.efr.model.Method) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) Chain(de.bioforscher.jstructure.model.structure.Chain) SubstitutionMatrixHelper(org.biojava.nbio.core.alignment.matrices.SubstitutionMatrixHelper) CompoundNotFoundException(org.biojava.nbio.core.exceptions.CompoundNotFoundException) Path(java.nio.file.Path) Logger(org.slf4j.Logger) Start2FoldResidueAnnotation(de.bioforscher.jstructure.efr.model.Start2FoldResidueAnnotation) Files(java.nio.file.Files) SequencePair(org.biojava.nbio.core.alignment.template.SequencePair) AminoAcidCompound(org.biojava.nbio.core.sequence.compound.AminoAcidCompound) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) UncheckedIOException(java.io.UncheckedIOException) List(java.util.List) Stream(java.util.stream.Stream) Alignments(org.biojava.nbio.alignment.Alignments) Document(org.jsoup.nodes.Document) Jsoup(org.jsoup.Jsoup) Elements(org.jsoup.select.Elements) InputStream(java.io.InputStream) CompoundNotFoundException(org.biojava.nbio.core.exceptions.CompoundNotFoundException) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) Experiment(de.bioforscher.jstructure.efr.model.Experiment) Start2FoldResidueAnnotation(de.bioforscher.jstructure.efr.model.Start2FoldResidueAnnotation) SimpleGapPenalty(org.biojava.nbio.alignment.SimpleGapPenalty) CompoundNotFoundException(org.biojava.nbio.core.exceptions.CompoundNotFoundException) IOException(java.io.IOException) UncheckedIOException(java.io.UncheckedIOException) ProteinSequence(org.biojava.nbio.core.sequence.ProteinSequence) AminoAcidCompound(org.biojava.nbio.core.sequence.compound.AminoAcidCompound)

Example 5 with AminoAcidCompound

use of org.biojava.nbio.core.sequence.compound.AminoAcidCompound in project jstructure by JonStargaryen.

the class Start2FoldXmlParser method assignValuesForStrong.

private static void assignValuesForStrong(Experiment experiment, Chain chain) {
    String pdbSequence = chain.getAminoAcidSequence();
    String experimentSequence = experiment.getSequence();
    // align sequences to ensure correct mapping
    SequencePair<ProteinSequence, AminoAcidCompound> pair = null;
    try {
        pair = Alignments.getPairwiseAlignment(new ProteinSequence(experimentSequence), new ProteinSequence(pdbSequence), Alignments.PairwiseSequenceAlignerType.GLOBAL, new SimpleGapPenalty(), SubstitutionMatrixHelper.getBlosum62());
        List<AminoAcid> aminoAcids = chain.aminoAcids().collect(Collectors.toList());
        for (Experiment.Residue residue : experiment.getResidues()) {
            int experimentIndex = residue.getIndex() - 1;
            try {
                int pdbIndex;
                if (residue.getCode().equals("P") && residue.getIndex() == 1) {
                    // super-russian fix for STF0017 where the alignment should match theoretically
                    pdbIndex = 0;
                } else {
                    pdbIndex = pair.getIndexInTargetForQueryAt(experimentIndex);
                }
                AminoAcid aminoAcid = aminoAcids.get(pdbIndex);
                // assign experiment-specific protection level to residue
                aminoAcid.getFeature(Start2FoldResidueAnnotation.class).addProtectionLevelEntry(ProtectionLevel.STRONG);
            } catch (Exception e) {
                // residue not present in structure - e.g. for STF0031 and STF0032
                logger.warn("alignment:{}{}", System.lineSeparator(), pair.toString());
                logger.warn("failed to map residue {}-{}", residue.getCode(), residue.getIndex(), e);
            }
        }
    } catch (CompoundNotFoundException e) {
        throw new IllegalArgumentException(e);
    }
}
Also used : CompoundNotFoundException(org.biojava.nbio.core.exceptions.CompoundNotFoundException) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) Experiment(de.bioforscher.jstructure.efr.model.Experiment) Start2FoldResidueAnnotation(de.bioforscher.jstructure.efr.model.Start2FoldResidueAnnotation) SimpleGapPenalty(org.biojava.nbio.alignment.SimpleGapPenalty) CompoundNotFoundException(org.biojava.nbio.core.exceptions.CompoundNotFoundException) IOException(java.io.IOException) UncheckedIOException(java.io.UncheckedIOException) ProteinSequence(org.biojava.nbio.core.sequence.ProteinSequence) AminoAcidCompound(org.biojava.nbio.core.sequence.compound.AminoAcidCompound)

Aggregations

SimpleGapPenalty (org.biojava.nbio.alignment.SimpleGapPenalty)7 CompoundNotFoundException (org.biojava.nbio.core.exceptions.CompoundNotFoundException)7 ProteinSequence (org.biojava.nbio.core.sequence.ProteinSequence)7 AminoAcidCompound (org.biojava.nbio.core.sequence.compound.AminoAcidCompound)7 IOException (java.io.IOException)6 UncheckedIOException (java.io.UncheckedIOException)6 AminoAcid (de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid)5 Experiment (de.bioforscher.jstructure.efr.model.Experiment)3 Start2FoldResidueAnnotation (de.bioforscher.jstructure.efr.model.Start2FoldResidueAnnotation)3 Chain (de.bioforscher.jstructure.model.structure.Chain)3 Files (java.nio.file.Files)3 Path (java.nio.file.Path)3 List (java.util.List)3 Collectors (java.util.stream.Collectors)3 Alignments (org.biojava.nbio.alignment.Alignments)3 SubstitutionMatrixHelper (org.biojava.nbio.core.alignment.matrices.SubstitutionMatrixHelper)3 SequencePair (org.biojava.nbio.core.alignment.template.SequencePair)3 Jsoup (org.jsoup.Jsoup)3 Document (org.jsoup.nodes.Document)3 Experiment (de.bioforscher.start2fold.model.Experiment)2