Search in sources :

Example 1 with ProteinSequence

use of org.biojava.nbio.core.sequence.ProteinSequence in project jstructure by JonStargaryen.

the class UniProtMutationsForBindingSite method handleBindingSite.

private static void handleBindingSite(BindingSite bindingSite) {
    String uniProtId = AARSConstants.lines(EFFECTS_TSV).map(line -> line.split(DEL)).filter(split -> split[0].equals(bindingSite.pdbId)).filter(split -> split[1].equals(bindingSite.chainId)).findAny().get()[2];
    // load original, full structure
    Chain originalChain = ProteinParser.source(bindingSite.pdbId).parse().select().chainName(bindingSite.chainId).asChain();
    String pdbSequence = originalChain.getAminoAcidSequence();
    String uniProtSequence = loadUniProtSequence(uniProtId);
    // align sequences
    SequencePair<ProteinSequence, AminoAcidCompound> alignment = needle(uniProtSequence, pdbSequence);
    System.out.println(bindingSite);
    System.out.println(alignment);
    // load renumbered, but not transformed chain
    Chain renumberedChain = ProteinParser.source(Paths.get("/home/bittrich/git/aars_analysis/data/msa/" + classToProcess + "/renumbered_structures/" + bindingSite.pdbId + "_renum.pdb")).parse().select().chainName(bindingSite.chainId).asChain();
    // key: renumbered, transformed binding site group - value: original group in PDB chain
    List<Integer> residueIndices = bindingSite.residues.stream().map(Group::getResidueNumber).map(ResidueNumber::getResidueNumber).collect(Collectors.toList());
    Map<Group, Group> groupMapping = renumberedChain.aminoAcids().filter(aminoAcid -> residueIndices.contains(aminoAcid.getResidueNumber())).collect(Collectors.toMap(Function.identity(), // map each group to the entity in the not renumbered structure
    renumberedGroup -> originalChain.select().groupName(renumberedGroup.getThreeLetterCode()).asFilteredGroups().min(Comparator.comparingDouble(originalGroup -> originalGroup.calculate().centroid().distanceFast(renumberedGroup.calculate().centroid()))).get()));
    // determine sequence position in sequence alignment - rarely these indices do not match
    groupMapping.entrySet().forEach(entry -> {
        int residueIndex = originalChain.getGroups().indexOf(entry.getValue()) + 1;
        System.out.print("mapped: " + entry.getKey().getIdentifier() + " -> " + entry.getValue().getIdentifier() + " -> uniprot ");
        try {
            int indexInUniProt = alignment.getIndexInQueryForTargetAt(residueIndex);
            System.out.println(alignment.getCompoundInQueryAt(indexInUniProt).getLongName().toUpperCase() + "-" + indexInUniProt);
            String indexToFind = String.valueOf(indexInUniProt);
            AARSConstants.lines(EFFECTS_TSV).map(line -> line.split("\t")).filter(split -> split[0].equals(bindingSite.pdbId)).filter(split -> split[1].equals(bindingSite.chainId)).filter(split -> refersToPosition(split, indexToFind)).forEach(split -> {
                String outputLine = bindingSite.pdbId + DEL + bindingSite.chainId + DEL + split[2] + DEL + bindingSite.clazz + DEL + bindingSite.aa + DEL + bindingSite.mode + DEL + entry.getKey().getResidueNumber() + DEL + entry.getValue().getResidueNumber() + DEL + split[3] + DEL + split[4] + DEL + split[5] + DEL + split[6] + DEL + split[7] + System.lineSeparator();
                System.out.println(outputLine);
                output.append(outputLine);
            });
        } catch (ArrayIndexOutOfBoundsException e) {
            System.out.println("failed!");
            warnings.append("#could not map ").append(entry.getValue().getIdentifier()).append(" in ").append(bindingSite.pdbId).append("_").append(bindingSite.chainId).append(" to UniProt sequence").append(System.lineSeparator());
        }
    });
}
Also used : Function(java.util.function.Function) ProteinSequence(org.biojava.nbio.core.sequence.ProteinSequence) SimpleGapPenalty(org.biojava.nbio.alignment.SimpleGapPenalty) Group(de.bioforscher.jstructure.model.structure.Group) Map(java.util.Map) Chain(de.bioforscher.jstructure.model.structure.Chain) SubstitutionMatrixHelper(org.biojava.nbio.core.alignment.matrices.SubstitutionMatrixHelper) CompoundNotFoundException(org.biojava.nbio.core.exceptions.CompoundNotFoundException) Protein(de.bioforscher.jstructure.model.structure.Protein) Path(java.nio.file.Path) ResidueNumber(de.bioforscher.jstructure.model.structure.ResidueNumber) ProteinIdentifier(de.bioforscher.jstructure.model.structure.identifier.ProteinIdentifier) ProteinParser(de.bioforscher.jstructure.parser.ProteinParser) Files(java.nio.file.Files) SequencePair(org.biojava.nbio.core.alignment.template.SequencePair) AminoAcidCompound(org.biojava.nbio.core.sequence.compound.AminoAcidCompound) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) File(java.io.File) UncheckedIOException(java.io.UncheckedIOException) List(java.util.List) Paths(java.nio.file.Paths) Alignments(org.biojava.nbio.alignment.Alignments) Document(org.jsoup.nodes.Document) Jsoup(org.jsoup.Jsoup) Pattern(java.util.regex.Pattern) Comparator(java.util.Comparator) Chain(de.bioforscher.jstructure.model.structure.Chain) Group(de.bioforscher.jstructure.model.structure.Group) ProteinSequence(org.biojava.nbio.core.sequence.ProteinSequence) AminoAcidCompound(org.biojava.nbio.core.sequence.compound.AminoAcidCompound)

Aggregations

Chain (de.bioforscher.jstructure.model.structure.Chain)1 Group (de.bioforscher.jstructure.model.structure.Group)1 Protein (de.bioforscher.jstructure.model.structure.Protein)1 ResidueNumber (de.bioforscher.jstructure.model.structure.ResidueNumber)1 ProteinIdentifier (de.bioforscher.jstructure.model.structure.identifier.ProteinIdentifier)1 ProteinParser (de.bioforscher.jstructure.parser.ProteinParser)1 File (java.io.File)1 IOException (java.io.IOException)1 UncheckedIOException (java.io.UncheckedIOException)1 Files (java.nio.file.Files)1 Path (java.nio.file.Path)1 Paths (java.nio.file.Paths)1 Comparator (java.util.Comparator)1 List (java.util.List)1 Map (java.util.Map)1 Function (java.util.function.Function)1 Pattern (java.util.regex.Pattern)1 Collectors (java.util.stream.Collectors)1 Alignments (org.biojava.nbio.alignment.Alignments)1 SimpleGapPenalty (org.biojava.nbio.alignment.SimpleGapPenalty)1