Search in sources :

Example 1 with SimpleGapPenalty

use of org.biojava.nbio.alignment.SimpleGapPenalty in project jstructure by JonStargaryen.

the class Start2FoldXmlParser method assignValues.

private static void assignValues(Experiment experiment, Chain chain) {
    String pdbSequence = chain.getAminoAcidSequence();
    String experimentSequence = experiment.getSequence();
    // align sequences to ensure correct mapping
    SequencePair<ProteinSequence, AminoAcidCompound> pair = null;
    try {
        pair = Alignments.getPairwiseAlignment(new ProteinSequence(experimentSequence), new ProteinSequence(pdbSequence), Alignments.PairwiseSequenceAlignerType.GLOBAL, new SimpleGapPenalty(), SubstitutionMatrixHelper.getBlosum62());
        // logger.info("alignment:{}{}",
        // System.lineSeparator(),
        // pair.toString());
        List<AminoAcid> aminoAcids = chain.aminoAcids().collect(Collectors.toList());
        for (Experiment.Residue residue : experiment.getResidues()) {
            int experimentIndex = residue.getIndex() - 1;
            try {
                int pdbIndex;
                if (residue.getCode().equals("P") && residue.getIndex() == 1) {
                    // super-russian fix for STF0017 where the alignment should match theoretically
                    pdbIndex = 0;
                } else {
                    pdbIndex = pair.getIndexInTargetForQueryAt(experimentIndex);
                }
                AminoAcid aminoAcid = aminoAcids.get(pdbIndex);
                // logger.debug("mapped experiment {}-{} onto PDB {}-{}",
                // residue.getCode(),
                // residue.getIndex(),
                // aminoAcid.getOneLetterCode(),
                // aminoAcid.getResidueIdentifier());
                // if(!residue.getCode().equals(aminoAcid.getOneLetterCode())) {
                // logger.warn("alignment:{}{}",
                // System.lineSeparator(),
                // pair.toString());
                // logger.warn("could not map data correctly for {}-{} and {}-{}",
                // residue.getCode(),
                // residue.getIndex(),
                // aminoAcid.getOneLetterCode(),
                // aminoAcid.getResidueIdentifier());
                // }
                // ignore: STF0034 (cannot align)
                // assign experiment-specific protection level to residue
                aminoAcid.getFeature(Start2FoldResidueAnnotation.class).addProtectionLevelEntry(Stream.of(ProtectionLevel.values()).filter(pl -> pl == experiment.getProtectionLevel()).findFirst().get());
            } catch (Exception e) {
                // residue not present in structure - e.g. for STF0031 and STF0032
                logger.warn("alignment:{}{}", System.lineSeparator(), pair.toString());
                logger.warn("failed to map residue {}-{}", residue.getCode(), residue.getIndex(), e);
            }
        }
    } catch (CompoundNotFoundException e) {
        throw new IllegalArgumentException(e);
    }
}
Also used : LoggerFactory(org.slf4j.LoggerFactory) ProteinSequence(org.biojava.nbio.core.sequence.ProteinSequence) SimpleGapPenalty(org.biojava.nbio.alignment.SimpleGapPenalty) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) Chain(de.bioforscher.jstructure.model.structure.Chain) SubstitutionMatrixHelper(org.biojava.nbio.core.alignment.matrices.SubstitutionMatrixHelper) CompoundNotFoundException(org.biojava.nbio.core.exceptions.CompoundNotFoundException) Path(java.nio.file.Path) Logger(org.slf4j.Logger) Experiment(de.bioforscher.start2fold.model.Experiment) Files(java.nio.file.Files) SequencePair(org.biojava.nbio.core.alignment.template.SequencePair) AminoAcidCompound(org.biojava.nbio.core.sequence.compound.AminoAcidCompound) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) UncheckedIOException(java.io.UncheckedIOException) Start2FoldResidueAnnotation(de.bioforscher.start2fold.model.Start2FoldResidueAnnotation) List(java.util.List) Stream(java.util.stream.Stream) ProtectionLevel(de.bioforscher.start2fold.model.ProtectionLevel) Alignments(org.biojava.nbio.alignment.Alignments) Document(org.jsoup.nodes.Document) Jsoup(org.jsoup.Jsoup) Elements(org.jsoup.select.Elements) InputStream(java.io.InputStream) CompoundNotFoundException(org.biojava.nbio.core.exceptions.CompoundNotFoundException) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) Experiment(de.bioforscher.start2fold.model.Experiment) Start2FoldResidueAnnotation(de.bioforscher.start2fold.model.Start2FoldResidueAnnotation) SimpleGapPenalty(org.biojava.nbio.alignment.SimpleGapPenalty) CompoundNotFoundException(org.biojava.nbio.core.exceptions.CompoundNotFoundException) IOException(java.io.IOException) UncheckedIOException(java.io.UncheckedIOException) ProteinSequence(org.biojava.nbio.core.sequence.ProteinSequence) AminoAcidCompound(org.biojava.nbio.core.sequence.compound.AminoAcidCompound)

Example 2 with SimpleGapPenalty

use of org.biojava.nbio.alignment.SimpleGapPenalty in project jstructure by JonStargaryen.

the class A01_SingleLinkageClusterer method computeNeedlemanWunschSimilarity.

/**
 * Compute the Needleman-Wunsch alignment between 2 sequence and report the sequence identity.
 *
 * @param entry1 the reference sequence
 * @param entry2 the query sequence
 * @return the fraction of identically aligned positions
 */
private double computeNeedlemanWunschSimilarity(Chain entry1, Chain entry2) {
    try {
        ProteinSequence sequence1 = new ProteinSequence(entry1.getAminoAcidSequence());
        ProteinSequence sequence2 = new ProteinSequence(entry2.getAminoAcidSequence());
        SequencePair<ProteinSequence, AminoAcidCompound> pair = Alignments.getPairwiseAlignment(sequence1, sequence2, Alignments.PairwiseSequenceAlignerType.GLOBAL, new SimpleGapPenalty(), SubstitutionMatrixHelper.getBlosum62());
        System.out.println(pair.getPercentageOfIdentity());
        return pair.getPercentageOfIdentity();
    } catch (CompoundNotFoundException e) {
        throw new IllegalArgumentException(e);
    }
}
Also used : CompoundNotFoundException(org.biojava.nbio.core.exceptions.CompoundNotFoundException) ProteinSequence(org.biojava.nbio.core.sequence.ProteinSequence) AminoAcidCompound(org.biojava.nbio.core.sequence.compound.AminoAcidCompound) SimpleGapPenalty(org.biojava.nbio.alignment.SimpleGapPenalty)

Example 3 with SimpleGapPenalty

use of org.biojava.nbio.alignment.SimpleGapPenalty in project jstructure by JonStargaryen.

the class Start2FoldXmlParser method assignValues.

private static void assignValues(Experiment experiment, Chain chain) {
    String pdbSequence = chain.getAminoAcidSequence();
    String experimentSequence = experiment.getSequence();
    // align sequences to ensure correct mapping
    SequencePair<ProteinSequence, AminoAcidCompound> pair = null;
    try {
        pair = Alignments.getPairwiseAlignment(new ProteinSequence(experimentSequence), new ProteinSequence(pdbSequence), Alignments.PairwiseSequenceAlignerType.GLOBAL, new SimpleGapPenalty(), SubstitutionMatrixHelper.getBlosum62());
        logger.debug("alignment:{}{}", System.lineSeparator(), pair.toString());
        List<AminoAcid> aminoAcids = chain.aminoAcids().collect(Collectors.toList());
        for (Experiment.Residue residue : experiment.getResidues()) {
            int experimentIndex = residue.getIndex() - 1;
            try {
                int pdbIndex;
                if (residue.getCode().equals("P") && residue.getIndex() == 1) {
                    // super-russian fix for STF0017 where the alignment should match theoretically
                    pdbIndex = 0;
                } else {
                    pdbIndex = pair.getIndexInTargetForQueryAt(experimentIndex);
                }
                AminoAcid aminoAcid = aminoAcids.get(pdbIndex);
                logger.debug("mapped experiment {}-{} onto PDB {}-{}", residue.getCode(), residue.getIndex(), aminoAcid.getOneLetterCode(), aminoAcid.getResidueIdentifier());
                // if(!residue.getCode().equals(aminoAcid.getOneLetterCode())) {
                // logger.warn("alignment:{}{}",
                // System.lineSeparator(),
                // pair.toString());
                // logger.warn("could not map data correctly for {}-{} and {}-{}",
                // residue.getCode(),
                // residue.getIndex(),
                // aminoAcid.getOneLetterCode(),
                // aminoAcid.getResidueIdentifier());
                // }
                // ignore: STF0034 (cannot align)
                // assign experiment-specific protection level to residue
                aminoAcid.getFeature(Start2FoldResidueAnnotation.class).addProtectionLevelEntry(Stream.of(ProtectionLevel.values()).filter(pl -> pl == experiment.getProtectionLevel()).findFirst().get());
            } catch (Exception e) {
                // residue not present in structure - e.g. for STF0031 and STF0032
                logger.warn("alignment:{}{}", System.lineSeparator(), pair.toString());
                logger.warn("failed to map residue {}-{}", residue.getCode(), residue.getIndex(), e);
            }
        }
    } catch (CompoundNotFoundException e) {
        throw new IllegalArgumentException(e);
    }
}
Also used : LoggerFactory(org.slf4j.LoggerFactory) ProteinSequence(org.biojava.nbio.core.sequence.ProteinSequence) ProtectionLevel(de.bioforscher.jstructure.efr.model.ProtectionLevel) Experiment(de.bioforscher.jstructure.efr.model.Experiment) SimpleGapPenalty(org.biojava.nbio.alignment.SimpleGapPenalty) Method(de.bioforscher.jstructure.efr.model.Method) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) Chain(de.bioforscher.jstructure.model.structure.Chain) SubstitutionMatrixHelper(org.biojava.nbio.core.alignment.matrices.SubstitutionMatrixHelper) CompoundNotFoundException(org.biojava.nbio.core.exceptions.CompoundNotFoundException) Path(java.nio.file.Path) Logger(org.slf4j.Logger) Start2FoldResidueAnnotation(de.bioforscher.jstructure.efr.model.Start2FoldResidueAnnotation) Files(java.nio.file.Files) SequencePair(org.biojava.nbio.core.alignment.template.SequencePair) AminoAcidCompound(org.biojava.nbio.core.sequence.compound.AminoAcidCompound) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) UncheckedIOException(java.io.UncheckedIOException) List(java.util.List) Stream(java.util.stream.Stream) Alignments(org.biojava.nbio.alignment.Alignments) Document(org.jsoup.nodes.Document) Jsoup(org.jsoup.Jsoup) Elements(org.jsoup.select.Elements) InputStream(java.io.InputStream) CompoundNotFoundException(org.biojava.nbio.core.exceptions.CompoundNotFoundException) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) Experiment(de.bioforscher.jstructure.efr.model.Experiment) Start2FoldResidueAnnotation(de.bioforscher.jstructure.efr.model.Start2FoldResidueAnnotation) SimpleGapPenalty(org.biojava.nbio.alignment.SimpleGapPenalty) CompoundNotFoundException(org.biojava.nbio.core.exceptions.CompoundNotFoundException) IOException(java.io.IOException) UncheckedIOException(java.io.UncheckedIOException) ProteinSequence(org.biojava.nbio.core.sequence.ProteinSequence) AminoAcidCompound(org.biojava.nbio.core.sequence.compound.AminoAcidCompound)

Example 4 with SimpleGapPenalty

use of org.biojava.nbio.alignment.SimpleGapPenalty in project jstructure by JonStargaryen.

the class Start2FoldXmlParser method assignValuesForStrong.

private static void assignValuesForStrong(Experiment experiment, Chain chain) {
    String pdbSequence = chain.getAminoAcidSequence();
    String experimentSequence = experiment.getSequence();
    // align sequences to ensure correct mapping
    SequencePair<ProteinSequence, AminoAcidCompound> pair = null;
    try {
        pair = Alignments.getPairwiseAlignment(new ProteinSequence(experimentSequence), new ProteinSequence(pdbSequence), Alignments.PairwiseSequenceAlignerType.GLOBAL, new SimpleGapPenalty(), SubstitutionMatrixHelper.getBlosum62());
        List<AminoAcid> aminoAcids = chain.aminoAcids().collect(Collectors.toList());
        for (Experiment.Residue residue : experiment.getResidues()) {
            int experimentIndex = residue.getIndex() - 1;
            try {
                int pdbIndex;
                if (residue.getCode().equals("P") && residue.getIndex() == 1) {
                    // super-russian fix for STF0017 where the alignment should match theoretically
                    pdbIndex = 0;
                } else {
                    pdbIndex = pair.getIndexInTargetForQueryAt(experimentIndex);
                }
                AminoAcid aminoAcid = aminoAcids.get(pdbIndex);
                // assign experiment-specific protection level to residue
                aminoAcid.getFeature(Start2FoldResidueAnnotation.class).addProtectionLevelEntry(ProtectionLevel.STRONG);
            } catch (Exception e) {
                // residue not present in structure - e.g. for STF0031 and STF0032
                logger.warn("alignment:{}{}", System.lineSeparator(), pair.toString());
                logger.warn("failed to map residue {}-{}", residue.getCode(), residue.getIndex(), e);
            }
        }
    } catch (CompoundNotFoundException e) {
        throw new IllegalArgumentException(e);
    }
}
Also used : CompoundNotFoundException(org.biojava.nbio.core.exceptions.CompoundNotFoundException) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) Experiment(de.bioforscher.jstructure.efr.model.Experiment) Start2FoldResidueAnnotation(de.bioforscher.jstructure.efr.model.Start2FoldResidueAnnotation) SimpleGapPenalty(org.biojava.nbio.alignment.SimpleGapPenalty) CompoundNotFoundException(org.biojava.nbio.core.exceptions.CompoundNotFoundException) IOException(java.io.IOException) UncheckedIOException(java.io.UncheckedIOException) ProteinSequence(org.biojava.nbio.core.sequence.ProteinSequence) AminoAcidCompound(org.biojava.nbio.core.sequence.compound.AminoAcidCompound)

Example 5 with SimpleGapPenalty

use of org.biojava.nbio.alignment.SimpleGapPenalty in project jstructure by JonStargaryen.

the class Start2FoldXmlParser method assignValuesForEarly.

private static void assignValuesForEarly(Experiment experiment, Chain chain) {
    String pdbSequence = chain.getAminoAcidSequence();
    String experimentSequence = experiment.getSequence();
    // align sequences to ensure correct mapping
    SequencePair<ProteinSequence, AminoAcidCompound> pair = null;
    try {
        pair = Alignments.getPairwiseAlignment(new ProteinSequence(experimentSequence), new ProteinSequence(pdbSequence), Alignments.PairwiseSequenceAlignerType.GLOBAL, new SimpleGapPenalty(), SubstitutionMatrixHelper.getBlosum62());
        List<AminoAcid> aminoAcids = chain.aminoAcids().collect(Collectors.toList());
        for (Experiment.Residue residue : experiment.getResidues()) {
            int experimentIndex = residue.getIndex() - 1;
            try {
                int pdbIndex;
                if (residue.getCode().equals("P") && residue.getIndex() == 1) {
                    // super-russian fix for STF0017 where the alignment should match theoretically
                    pdbIndex = 0;
                } else {
                    pdbIndex = pair.getIndexInTargetForQueryAt(experimentIndex);
                }
                AminoAcid aminoAcid = aminoAcids.get(pdbIndex);
                // assign experiment-specific protection level to residue
                aminoAcid.getFeature(Start2FoldResidueAnnotation.class).addProtectionLevelEntry(ProtectionLevel.EARLY);
            } catch (Exception e) {
                // residue not present in structure - e.g. for STF0031 and STF0032
                logger.warn("alignment:{}{}", System.lineSeparator(), pair.toString());
                logger.warn("failed to map residue {}-{}", residue.getCode(), residue.getIndex(), e);
            }
        }
    } catch (CompoundNotFoundException e) {
        throw new IllegalArgumentException(e);
    }
}
Also used : CompoundNotFoundException(org.biojava.nbio.core.exceptions.CompoundNotFoundException) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) Experiment(de.bioforscher.jstructure.efr.model.Experiment) Start2FoldResidueAnnotation(de.bioforscher.jstructure.efr.model.Start2FoldResidueAnnotation) SimpleGapPenalty(org.biojava.nbio.alignment.SimpleGapPenalty) CompoundNotFoundException(org.biojava.nbio.core.exceptions.CompoundNotFoundException) IOException(java.io.IOException) UncheckedIOException(java.io.UncheckedIOException) ProteinSequence(org.biojava.nbio.core.sequence.ProteinSequence) AminoAcidCompound(org.biojava.nbio.core.sequence.compound.AminoAcidCompound)

Aggregations

SimpleGapPenalty (org.biojava.nbio.alignment.SimpleGapPenalty)6 CompoundNotFoundException (org.biojava.nbio.core.exceptions.CompoundNotFoundException)6 ProteinSequence (org.biojava.nbio.core.sequence.ProteinSequence)6 AminoAcidCompound (org.biojava.nbio.core.sequence.compound.AminoAcidCompound)6 AminoAcid (de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid)5 IOException (java.io.IOException)5 UncheckedIOException (java.io.UncheckedIOException)5 Experiment (de.bioforscher.jstructure.efr.model.Experiment)3 Start2FoldResidueAnnotation (de.bioforscher.jstructure.efr.model.Start2FoldResidueAnnotation)3 Chain (de.bioforscher.jstructure.model.structure.Chain)2 Experiment (de.bioforscher.start2fold.model.Experiment)2 Start2FoldResidueAnnotation (de.bioforscher.start2fold.model.Start2FoldResidueAnnotation)2 InputStream (java.io.InputStream)2 Files (java.nio.file.Files)2 Path (java.nio.file.Path)2 List (java.util.List)2 Collectors (java.util.stream.Collectors)2 Stream (java.util.stream.Stream)2 Alignments (org.biojava.nbio.alignment.Alignments)2 SubstitutionMatrixHelper (org.biojava.nbio.core.alignment.matrices.SubstitutionMatrixHelper)2