Search in sources :

Example 1 with Experiment

use of de.bioforscher.start2fold.model.Experiment in project jstructure by JonStargaryen.

the class Start2FoldXmlParser method assignValues.

private static void assignValues(Experiment experiment, Chain chain) {
    String pdbSequence = chain.getAminoAcidSequence();
    String experimentSequence = experiment.getSequence();
    // align sequences to ensure correct mapping
    SequencePair<ProteinSequence, AminoAcidCompound> pair = null;
    try {
        pair = Alignments.getPairwiseAlignment(new ProteinSequence(experimentSequence), new ProteinSequence(pdbSequence), Alignments.PairwiseSequenceAlignerType.GLOBAL, new SimpleGapPenalty(), SubstitutionMatrixHelper.getBlosum62());
        // logger.info("alignment:{}{}",
        // System.lineSeparator(),
        // pair.toString());
        List<AminoAcid> aminoAcids = chain.aminoAcids().collect(Collectors.toList());
        for (Experiment.Residue residue : experiment.getResidues()) {
            int experimentIndex = residue.getIndex() - 1;
            try {
                int pdbIndex;
                if (residue.getCode().equals("P") && residue.getIndex() == 1) {
                    // super-russian fix for STF0017 where the alignment should match theoretically
                    pdbIndex = 0;
                } else {
                    pdbIndex = pair.getIndexInTargetForQueryAt(experimentIndex);
                }
                AminoAcid aminoAcid = aminoAcids.get(pdbIndex);
                // logger.debug("mapped experiment {}-{} onto PDB {}-{}",
                // residue.getCode(),
                // residue.getIndex(),
                // aminoAcid.getOneLetterCode(),
                // aminoAcid.getResidueIdentifier());
                // if(!residue.getCode().equals(aminoAcid.getOneLetterCode())) {
                // logger.warn("alignment:{}{}",
                // System.lineSeparator(),
                // pair.toString());
                // logger.warn("could not map data correctly for {}-{} and {}-{}",
                // residue.getCode(),
                // residue.getIndex(),
                // aminoAcid.getOneLetterCode(),
                // aminoAcid.getResidueIdentifier());
                // }
                // ignore: STF0034 (cannot align)
                // assign experiment-specific protection level to residue
                aminoAcid.getFeature(Start2FoldResidueAnnotation.class).addProtectionLevelEntry(Stream.of(ProtectionLevel.values()).filter(pl -> pl == experiment.getProtectionLevel()).findFirst().get());
            } catch (Exception e) {
                // residue not present in structure - e.g. for STF0031 and STF0032
                logger.warn("alignment:{}{}", System.lineSeparator(), pair.toString());
                logger.warn("failed to map residue {}-{}", residue.getCode(), residue.getIndex(), e);
            }
        }
    } catch (CompoundNotFoundException e) {
        throw new IllegalArgumentException(e);
    }
}
Also used : LoggerFactory(org.slf4j.LoggerFactory) ProteinSequence(org.biojava.nbio.core.sequence.ProteinSequence) SimpleGapPenalty(org.biojava.nbio.alignment.SimpleGapPenalty) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) Chain(de.bioforscher.jstructure.model.structure.Chain) SubstitutionMatrixHelper(org.biojava.nbio.core.alignment.matrices.SubstitutionMatrixHelper) CompoundNotFoundException(org.biojava.nbio.core.exceptions.CompoundNotFoundException) Path(java.nio.file.Path) Logger(org.slf4j.Logger) Experiment(de.bioforscher.start2fold.model.Experiment) Files(java.nio.file.Files) SequencePair(org.biojava.nbio.core.alignment.template.SequencePair) AminoAcidCompound(org.biojava.nbio.core.sequence.compound.AminoAcidCompound) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) UncheckedIOException(java.io.UncheckedIOException) Start2FoldResidueAnnotation(de.bioforscher.start2fold.model.Start2FoldResidueAnnotation) List(java.util.List) Stream(java.util.stream.Stream) ProtectionLevel(de.bioforscher.start2fold.model.ProtectionLevel) Alignments(org.biojava.nbio.alignment.Alignments) Document(org.jsoup.nodes.Document) Jsoup(org.jsoup.Jsoup) Elements(org.jsoup.select.Elements) InputStream(java.io.InputStream) CompoundNotFoundException(org.biojava.nbio.core.exceptions.CompoundNotFoundException) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) Experiment(de.bioforscher.start2fold.model.Experiment) Start2FoldResidueAnnotation(de.bioforscher.start2fold.model.Start2FoldResidueAnnotation) SimpleGapPenalty(org.biojava.nbio.alignment.SimpleGapPenalty) CompoundNotFoundException(org.biojava.nbio.core.exceptions.CompoundNotFoundException) IOException(java.io.IOException) UncheckedIOException(java.io.UncheckedIOException) ProteinSequence(org.biojava.nbio.core.sequence.ProteinSequence) AminoAcidCompound(org.biojava.nbio.core.sequence.compound.AminoAcidCompound)

Example 2 with Experiment

use of de.bioforscher.start2fold.model.Experiment in project jstructure by JonStargaryen.

the class Start2FoldXmlParser method parse.

public static void parse(Chain chain, InputStream inputStream) {
    try {
        // assign baseline resp. entry container for each residue
        chain.aminoAcids().forEach(aminoAcid -> aminoAcid.getFeatureContainer().addFeature(new Start2FoldResidueAnnotation()));
        Document document = Jsoup.parse(inputStream, "UTF-8", "/");
        Elements experimentElements = document.getElementsByTag("experiment");
        // logger.info("found {} experiments",
        // experimentElements.size());
        List<Experiment> experiments = experimentElements.stream().map(Experiment::parse).collect(Collectors.toList());
        for (Experiment experiment : experiments) {
            assignValues(experiment, chain);
        }
    } catch (IOException e) {
        throw new UncheckedIOException(e);
    }
}
Also used : Experiment(de.bioforscher.start2fold.model.Experiment) Start2FoldResidueAnnotation(de.bioforscher.start2fold.model.Start2FoldResidueAnnotation) UncheckedIOException(java.io.UncheckedIOException) IOException(java.io.IOException) UncheckedIOException(java.io.UncheckedIOException) Document(org.jsoup.nodes.Document) Elements(org.jsoup.select.Elements)

Example 3 with Experiment

use of de.bioforscher.start2fold.model.Experiment in project jstructure by JonStargaryen.

the class Start2FoldXmlParser method parseSpecificExperiment.

public static void parseSpecificExperiment(Chain chain, InputStream inputStream, List<Integer> experimentIds) {
    // experimentIds);
    try {
        // assign baseline resp. entry container for each residue
        chain.aminoAcids().forEach(aminoAcid -> aminoAcid.getFeatureContainer().addFeature(new Start2FoldResidueAnnotation()));
        Document document = Jsoup.parse(inputStream, "UTF-8", "/");
        Elements experimentElements = document.getElementsByTag("experiment");
        List<Experiment> experiments = experimentElements.stream().map(Experiment::parse).filter(experiment -> experimentIds.contains(experiment.getExperimentId())).collect(Collectors.toList());
        if (experimentIds.size() != experiments.size()) {
            throw new IllegalArgumentException("did not find all referenced experiments");
        }
        for (Experiment experiment : experiments) {
            assignValuesForEarly(experiment, chain);
        }
    } catch (IOException e) {
        throw new UncheckedIOException(e);
    }
}
Also used : LoggerFactory(org.slf4j.LoggerFactory) ProteinSequence(org.biojava.nbio.core.sequence.ProteinSequence) SimpleGapPenalty(org.biojava.nbio.alignment.SimpleGapPenalty) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) Chain(de.bioforscher.jstructure.model.structure.Chain) SubstitutionMatrixHelper(org.biojava.nbio.core.alignment.matrices.SubstitutionMatrixHelper) CompoundNotFoundException(org.biojava.nbio.core.exceptions.CompoundNotFoundException) Path(java.nio.file.Path) Logger(org.slf4j.Logger) Experiment(de.bioforscher.start2fold.model.Experiment) Files(java.nio.file.Files) SequencePair(org.biojava.nbio.core.alignment.template.SequencePair) AminoAcidCompound(org.biojava.nbio.core.sequence.compound.AminoAcidCompound) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) UncheckedIOException(java.io.UncheckedIOException) Start2FoldResidueAnnotation(de.bioforscher.start2fold.model.Start2FoldResidueAnnotation) List(java.util.List) Stream(java.util.stream.Stream) ProtectionLevel(de.bioforscher.start2fold.model.ProtectionLevel) Alignments(org.biojava.nbio.alignment.Alignments) Document(org.jsoup.nodes.Document) Jsoup(org.jsoup.Jsoup) Elements(org.jsoup.select.Elements) InputStream(java.io.InputStream) Experiment(de.bioforscher.start2fold.model.Experiment) Start2FoldResidueAnnotation(de.bioforscher.start2fold.model.Start2FoldResidueAnnotation) UncheckedIOException(java.io.UncheckedIOException) IOException(java.io.IOException) UncheckedIOException(java.io.UncheckedIOException) Document(org.jsoup.nodes.Document) Elements(org.jsoup.select.Elements)

Example 4 with Experiment

use of de.bioforscher.start2fold.model.Experiment in project jstructure by JonStargaryen.

the class Start2FoldXmlParser method assignValuesForEarly.

private static void assignValuesForEarly(Experiment experiment, Chain chain) {
    String pdbSequence = chain.getAminoAcidSequence();
    String experimentSequence = experiment.getSequence();
    // align sequences to ensure correct mapping
    SequencePair<ProteinSequence, AminoAcidCompound> pair = null;
    try {
        pair = Alignments.getPairwiseAlignment(new ProteinSequence(experimentSequence), new ProteinSequence(pdbSequence), Alignments.PairwiseSequenceAlignerType.GLOBAL, new SimpleGapPenalty(), SubstitutionMatrixHelper.getBlosum62());
        List<AminoAcid> aminoAcids = chain.aminoAcids().collect(Collectors.toList());
        for (Experiment.Residue residue : experiment.getResidues()) {
            int experimentIndex = residue.getIndex() - 1;
            try {
                int pdbIndex;
                if (residue.getCode().equals("P") && residue.getIndex() == 1) {
                    // super-russian fix for STF0017 where the alignment should match theoretically
                    pdbIndex = 0;
                } else {
                    pdbIndex = pair.getIndexInTargetForQueryAt(experimentIndex);
                }
                AminoAcid aminoAcid = aminoAcids.get(pdbIndex);
                // assign experiment-specific protection level to residue
                aminoAcid.getFeature(Start2FoldResidueAnnotation.class).addProtectionLevelEntry(ProtectionLevel.EARLY);
            } catch (Exception e) {
                // residue not present in structure - e.g. for STF0031 and STF0032
                logger.warn("alignment:{}{}", System.lineSeparator(), pair.toString());
                logger.warn("failed to map residue {}-{}", residue.getCode(), residue.getIndex(), e);
            }
        }
    } catch (CompoundNotFoundException e) {
        throw new IllegalArgumentException(e);
    }
}
Also used : CompoundNotFoundException(org.biojava.nbio.core.exceptions.CompoundNotFoundException) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) Experiment(de.bioforscher.start2fold.model.Experiment) Start2FoldResidueAnnotation(de.bioforscher.start2fold.model.Start2FoldResidueAnnotation) SimpleGapPenalty(org.biojava.nbio.alignment.SimpleGapPenalty) CompoundNotFoundException(org.biojava.nbio.core.exceptions.CompoundNotFoundException) IOException(java.io.IOException) UncheckedIOException(java.io.UncheckedIOException) ProteinSequence(org.biojava.nbio.core.sequence.ProteinSequence) AminoAcidCompound(org.biojava.nbio.core.sequence.compound.AminoAcidCompound)

Aggregations

Experiment (de.bioforscher.start2fold.model.Experiment)4 Start2FoldResidueAnnotation (de.bioforscher.start2fold.model.Start2FoldResidueAnnotation)4 IOException (java.io.IOException)4 UncheckedIOException (java.io.UncheckedIOException)4 AminoAcid (de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid)3 SimpleGapPenalty (org.biojava.nbio.alignment.SimpleGapPenalty)3 CompoundNotFoundException (org.biojava.nbio.core.exceptions.CompoundNotFoundException)3 ProteinSequence (org.biojava.nbio.core.sequence.ProteinSequence)3 AminoAcidCompound (org.biojava.nbio.core.sequence.compound.AminoAcidCompound)3 Document (org.jsoup.nodes.Document)3 Elements (org.jsoup.select.Elements)3 Chain (de.bioforscher.jstructure.model.structure.Chain)2 ProtectionLevel (de.bioforscher.start2fold.model.ProtectionLevel)2 InputStream (java.io.InputStream)2 Files (java.nio.file.Files)2 Path (java.nio.file.Path)2 List (java.util.List)2 Collectors (java.util.stream.Collectors)2 Stream (java.util.stream.Stream)2 Alignments (org.biojava.nbio.alignment.Alignments)2