Search in sources :

Example 1 with Group

use of de.bioforscher.jstructure.model.structure.Group in project jstructure by JonStargaryen.

the class OrientationsOfProteinsInMembranesAnnotator method processInternally.

@Override
protected void processInternally(Protein protein) {
    try {
        Document document = getDocument(SEARCH_URL + protein.getPdbId().getPdbId());
        if (document.text().contains("No matches")) {
            throw new ComputationException("did not find OPM entry for " + protein.getIdentifier() + " - possibly it is no membrane protein");
        }
        // create global membrane object - 3rd link points to download
        String downloadLink = document.getElementById("caption").getElementsByTag("a").get(2).attr("href");
        try (InputStreamReader inputStreamReader = new InputStreamReader(new URL(BASE_URL + downloadLink).openStream())) {
            try (BufferedReader bufferedReader = new BufferedReader(inputStreamReader)) {
                byte[] bytes = bufferedReader.lines().collect(Collectors.joining(System.lineSeparator())).getBytes();
                // parse protein
                Protein opmProtein = ProteinParser.source(new ByteArrayInputStream(bytes)).forceProteinName(ProteinIdentifier.createFromPdbId(downloadLink.split("=")[0].split("/")[1].substring(0, 4))).parse();
                Membrane membrane = new Membrane(this);
                // superimpose opm protein onto instance of the original protein
                //TODO this alignment is by no means perfect, but works for a first glance
                SVDSuperimposer.ALPHA_CARBON_SVD_INSTANCE.align(protein.select().aminoAcids().asGroupContainer(), opmProtein.select().aminoAcids().asGroupContainer()).transform(opmProtein);
                // extract dummy atoms and move them to membrane object
                List<double[]> membraneAtoms = opmProtein.atoms().map(Atom::getParentGroup).filter(group -> group.getThreeLetterCode().equals("DUM")).flatMap(Group::atoms).map(Atom::getCoordinates).collect(Collectors.toList());
                membrane.setMembraneAtoms(membraneAtoms);
                // extract general information - that is the first table
                Element generalDataTable = document.getElementsByClass("data").get(0);
                Element thicknessTr = generalDataTable.getElementsByTag("tr").get(1);
                membrane.setHydrophobicThickness(thicknessTr.getElementsByTag("td").get(1).text());
                Element tiltTr = generalDataTable.getElementsByTag("tr").get(2);
                membrane.setTiltAngle(tiltTr.getElementsByTag("td").get(1).text());
                Element transferTr = generalDataTable.getElementsByTag("tr").get(3);
                membrane.setDeltaGTransfer(transferTr.getElementsByTag("td").get(1).text());
                Element topologyTr = generalDataTable.getElementsByTag("tr").get(5);
                membrane.setTopology(topologyTr.getElementsByTag("td").get(1).text());
                // extract trans-membrane helices - second table
                Element transMembraneSubunitsTable = document.getElementsByClass("data").get(1);
                List<TransMembraneHelix> helices = transMembraneSubunitsTable.getElementsByTag("tr").stream().skip(1).map(element -> element.getElementsByTag("td").get(0)).map(Element::text).map(TransMembraneHelix::new).collect(Collectors.toList());
                membrane.setTransMembraneHelices(helices);
                protein.getFeatureContainer().addFeature(membrane);
            //                    //TODO remove, used to evaluate alignment manually
            //                    Files.write(Paths.get(System.getProperty("user.home") + "/ori.pdb"), protein.getPdbRepresentation().getBytes());
            //                    Files.write(Paths.get(System.getProperty("user.home") + "/opm.pdb"), opmProtein.getPdbRepresentation().getBytes());
            //                    //TODO remove, used to evaluate segment positions manually
            //                    Files.write(Paths.get(System.getProperty("user.home") + "/tm.pdb"), protein.select()
            //                            .residueNumber(helices.stream()
            //                                    .map(TransMembraneHelix::getSegments)
            //                                    .flatMap(Collection::stream)
            //                                    .collect(Collectors.toList())
            //                                    .toArray(new IntegerRange[0]))
            //                            .asGroupContainer()
            //                            .getPdbRepresentation()
            //                            .getBytes());
            }
        }
    } catch (IOException e) {
        throw new ComputationException("failed to fetch OPM file", e);
    }
}
Also used : AbstractFeatureProvider(de.bioforscher.jstructure.model.feature.AbstractFeatureProvider) ProteinIdentifier(de.bioforscher.jstructure.model.structure.identifier.ProteinIdentifier) ProteinParser(de.bioforscher.jstructure.parser.ProteinParser) Logger(org.slf4j.Logger) ComputationException(de.bioforscher.jstructure.feature.ComputationException) URL(java.net.URL) LoggerFactory(org.slf4j.LoggerFactory) IOException(java.io.IOException) InputStreamReader(java.io.InputStreamReader) Collectors(java.util.stream.Collectors) SVDSuperimposer(de.bioforscher.jstructure.alignment.SVDSuperimposer) List(java.util.List) ByteArrayInputStream(java.io.ByteArrayInputStream) FeatureProvider(de.bioforscher.jstructure.model.feature.FeatureProvider) Group(de.bioforscher.jstructure.model.structure.Group) Atom(de.bioforscher.jstructure.model.structure.Atom) Document(org.jsoup.nodes.Document) Element(org.jsoup.nodes.Element) Jsoup(org.jsoup.Jsoup) BufferedReader(java.io.BufferedReader) Protein(de.bioforscher.jstructure.model.structure.Protein) Group(de.bioforscher.jstructure.model.structure.Group) InputStreamReader(java.io.InputStreamReader) Element(org.jsoup.nodes.Element) IOException(java.io.IOException) Document(org.jsoup.nodes.Document) URL(java.net.URL) Protein(de.bioforscher.jstructure.model.structure.Protein) Atom(de.bioforscher.jstructure.model.structure.Atom) ByteArrayInputStream(java.io.ByteArrayInputStream) ComputationException(de.bioforscher.jstructure.feature.ComputationException) BufferedReader(java.io.BufferedReader)

Example 2 with Group

use of de.bioforscher.jstructure.model.structure.Group in project jstructure by JonStargaryen.

the class SecondaryStructureAnnotator method calculateTurns.

/**
     * Detect helical turn patterns.
     */
private void calculateTurns(List<AminoAcid> residues) {
    for (int i = 0; i < residues.size(); i++) {
        Group residue1 = residues.get(i);
        for (int turn = 3; turn <= 5; turn++) {
            if (i + turn >= residues.size()) {
                continue;
            }
            Group residue2 = residues.get(i + turn);
            // Check for H bond from NH(i+n) to CO(i)
            if (isBonded(residues, i, i + turn)) {
                logger.debug("Turn at ({}, {}, {})", i, i + turn, turn);
                getState(residue1).setTurn('>', turn);
                getState(residue2).setTurn('<', turn);
                // Bracketed residues findAny the helix number
                for (int j = i + 1; j < i + turn; j++) {
                    int t = turn;
                    char helix = String.valueOf(t).charAt(0);
                    getState(residues.get(j)).setTurn(helix, turn);
                }
            }
        }
    }
}
Also used : Group(de.bioforscher.jstructure.model.structure.Group)

Example 3 with Group

use of de.bioforscher.jstructure.model.structure.Group in project jstructure by JonStargaryen.

the class UniProtMutationsForBindingSite method handleBindingSite.

private static void handleBindingSite(BindingSite bindingSite) {
    String uniProtId = AARSConstants.lines(EFFECTS_TSV).map(line -> line.split(DEL)).filter(split -> split[0].equals(bindingSite.pdbId)).filter(split -> split[1].equals(bindingSite.chainId)).findAny().get()[2];
    // load original, full structure
    Chain originalChain = ProteinParser.source(bindingSite.pdbId).parse().select().chainName(bindingSite.chainId).asChain();
    String pdbSequence = originalChain.getAminoAcidSequence();
    String uniProtSequence = loadUniProtSequence(uniProtId);
    // align sequences
    SequencePair<ProteinSequence, AminoAcidCompound> alignment = needle(uniProtSequence, pdbSequence);
    System.out.println(bindingSite);
    System.out.println(alignment);
    // load renumbered, but not transformed chain
    Chain renumberedChain = ProteinParser.source(Paths.get("/home/bittrich/git/aars_analysis/data/msa/" + classToProcess + "/renumbered_structures/" + bindingSite.pdbId + "_renum.pdb")).parse().select().chainName(bindingSite.chainId).asChain();
    // key: renumbered, transformed binding site group - value: original group in PDB chain
    List<Integer> residueIndices = bindingSite.residues.stream().map(Group::getResidueNumber).map(ResidueNumber::getResidueNumber).collect(Collectors.toList());
    Map<Group, Group> groupMapping = renumberedChain.aminoAcids().filter(aminoAcid -> residueIndices.contains(aminoAcid.getResidueNumber())).collect(Collectors.toMap(Function.identity(), // map each group to the entity in the not renumbered structure
    renumberedGroup -> originalChain.select().groupName(renumberedGroup.getThreeLetterCode()).asFilteredGroups().min(Comparator.comparingDouble(originalGroup -> originalGroup.calculate().centroid().distanceFast(renumberedGroup.calculate().centroid()))).get()));
    // determine sequence position in sequence alignment - rarely these indices do not match
    groupMapping.entrySet().forEach(entry -> {
        int residueIndex = originalChain.getGroups().indexOf(entry.getValue()) + 1;
        System.out.print("mapped: " + entry.getKey().getIdentifier() + " -> " + entry.getValue().getIdentifier() + " -> uniprot ");
        try {
            int indexInUniProt = alignment.getIndexInQueryForTargetAt(residueIndex);
            System.out.println(alignment.getCompoundInQueryAt(indexInUniProt).getLongName().toUpperCase() + "-" + indexInUniProt);
            String indexToFind = String.valueOf(indexInUniProt);
            AARSConstants.lines(EFFECTS_TSV).map(line -> line.split("\t")).filter(split -> split[0].equals(bindingSite.pdbId)).filter(split -> split[1].equals(bindingSite.chainId)).filter(split -> refersToPosition(split, indexToFind)).forEach(split -> {
                String outputLine = bindingSite.pdbId + DEL + bindingSite.chainId + DEL + split[2] + DEL + bindingSite.clazz + DEL + bindingSite.aa + DEL + bindingSite.mode + DEL + entry.getKey().getResidueNumber() + DEL + entry.getValue().getResidueNumber() + DEL + split[3] + DEL + split[4] + DEL + split[5] + DEL + split[6] + DEL + split[7] + System.lineSeparator();
                System.out.println(outputLine);
                output.append(outputLine);
            });
        } catch (ArrayIndexOutOfBoundsException e) {
            System.out.println("failed!");
            warnings.append("#could not map ").append(entry.getValue().getIdentifier()).append(" in ").append(bindingSite.pdbId).append("_").append(bindingSite.chainId).append(" to UniProt sequence").append(System.lineSeparator());
        }
    });
}
Also used : Function(java.util.function.Function) ProteinSequence(org.biojava.nbio.core.sequence.ProteinSequence) SimpleGapPenalty(org.biojava.nbio.alignment.SimpleGapPenalty) Group(de.bioforscher.jstructure.model.structure.Group) Map(java.util.Map) Chain(de.bioforscher.jstructure.model.structure.Chain) SubstitutionMatrixHelper(org.biojava.nbio.core.alignment.matrices.SubstitutionMatrixHelper) CompoundNotFoundException(org.biojava.nbio.core.exceptions.CompoundNotFoundException) Protein(de.bioforscher.jstructure.model.structure.Protein) Path(java.nio.file.Path) ResidueNumber(de.bioforscher.jstructure.model.structure.ResidueNumber) ProteinIdentifier(de.bioforscher.jstructure.model.structure.identifier.ProteinIdentifier) ProteinParser(de.bioforscher.jstructure.parser.ProteinParser) Files(java.nio.file.Files) SequencePair(org.biojava.nbio.core.alignment.template.SequencePair) AminoAcidCompound(org.biojava.nbio.core.sequence.compound.AminoAcidCompound) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) File(java.io.File) UncheckedIOException(java.io.UncheckedIOException) List(java.util.List) Paths(java.nio.file.Paths) Alignments(org.biojava.nbio.alignment.Alignments) Document(org.jsoup.nodes.Document) Jsoup(org.jsoup.Jsoup) Pattern(java.util.regex.Pattern) Comparator(java.util.Comparator) Chain(de.bioforscher.jstructure.model.structure.Chain) Group(de.bioforscher.jstructure.model.structure.Group) ProteinSequence(org.biojava.nbio.core.sequence.ProteinSequence) AminoAcidCompound(org.biojava.nbio.core.sequence.compound.AminoAcidCompound)

Example 4 with Group

use of de.bioforscher.jstructure.model.structure.Group in project jstructure by JonStargaryen.

the class ProteinParserTest method shouldAnnotateHetAtmsCorrectlyFor1bs2.

@Test
public void shouldAnnotateHetAtmsCorrectlyFor1bs2() {
    /*
         * 1bs2 is an aars structure with the amino acid arginine in the binding site (annotated as ATOM record), some
         * water (annotated as HETATM)
         */
    Protein protein1bs2 = ProteinParser.source("1bs2").parse();
    List<Group> waters = protein1bs2.select().water().asFilteredGroups().collect(Collectors.toList());
    waters.forEach(group -> {
        Assert.assertTrue(group.isLigand());
        Assert.assertTrue("water records ought to start with HETATM", group.getPdbRepresentation().startsWith(Atom.HETATM_PREFIX));
    });
    Group arginineAsLigand = protein1bs2.select().residueNumber(900).asGroup();
    // assert that selection does not return ARG ligand as normal amino acid
    boolean arginineLigandIsNoAminoAcid = protein1bs2.aminoAcids().noneMatch(group -> group.equals(arginineAsLigand));
    Assert.assertTrue("amino acid ligand ought to be not a part of the amino acid chain", arginineLigandIsNoAminoAcid);
    // ensure last amino acid is MET and not the ARG ligand
    Assert.assertThat(protein1bs2.getAminoAcidSequence(), endsWith("M"));
    List<Group> hetatm1bs2 = protein1bs2.select().hetatms().asFilteredGroups().collect(Collectors.toList());
    Assert.assertTrue(hetatm1bs2.containsAll(waters) && hetatm1bs2.contains(arginineAsLigand));
}
Also used : Group(de.bioforscher.jstructure.model.structure.Group) Protein(de.bioforscher.jstructure.model.structure.Protein) Test(org.junit.Test)

Example 5 with Group

use of de.bioforscher.jstructure.model.structure.Group in project jstructure by JonStargaryen.

the class ProteinParserTest method shouldHandleModifiedResidue.

@Test
public void shouldHandleModifiedResidue() {
    Protein protein = ProteinParser.source("1brr").parse();
    Group pca = protein.select().chainName("C").residueNumber(1).asGroup();
    Assert.assertTrue(pca.isAminoAcid());
    Assert.assertFalse(pca.isLigand());
    // assert correct mapping of PCA to GLU
    Assert.assertEquals("incorrect mapping of PCA to GLU", "E", pca.getGroupPrototype().getOneLetterCode().get());
}
Also used : Group(de.bioforscher.jstructure.model.structure.Group) Protein(de.bioforscher.jstructure.model.structure.Protein) Test(org.junit.Test)

Aggregations

Group (de.bioforscher.jstructure.model.structure.Group)28 Atom (de.bioforscher.jstructure.model.structure.Atom)10 Chain (de.bioforscher.jstructure.model.structure.Chain)8 Structure (de.bioforscher.jstructure.model.structure.Structure)8 IOException (java.io.IOException)8 List (java.util.List)8 Collectors (java.util.stream.Collectors)8 Document (org.jsoup.nodes.Document)7 Element (org.jsoup.nodes.Element)7 Test (org.junit.Test)7 Protein (de.bioforscher.jstructure.model.structure.Protein)6 Files (java.nio.file.Files)6 Stream (java.util.stream.Stream)6 Elements (org.jsoup.select.Elements)6 Logger (org.slf4j.Logger)6 LoggerFactory (org.slf4j.LoggerFactory)6 BufferedReader (java.io.BufferedReader)4 InputStreamReader (java.io.InputStreamReader)4 UncheckedIOException (java.io.UncheckedIOException)4 URL (java.net.URL)4