Search in sources :

Example 36 with AminoAcid

use of de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid in project jstructure by JonStargaryen.

the class StructureCollectorsTest method shouldCreateDistinctContainer.

@Test
public void shouldCreateDistinctContainer() {
    // create copy
    AminoAcid original = protein.select().aminoAcids().asAminoAcid();
    String initialPdbRecord = original.getPdbRepresentation();
    AtomContainer copy = original.createDeepCopy();
    // manipulate coordinates of copy
    copy.getAtoms().get(0).setCoordinates(new double[] { 0, 0, 0 });
    // should not affect original
    Assert.assertNotEquals(initialPdbRecord, copy.getPdbRepresentation());
    Assert.assertEquals(initialPdbRecord, original.getPdbRepresentation());
}
Also used : AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) AtomContainer(de.bioforscher.jstructure.model.structure.container.AtomContainer) Test(org.junit.Test)

Example 37 with AminoAcid

use of de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid in project jstructure by JonStargaryen.

the class StructureParserTest method shouldHandleProteinWithOligopeptide.

@Test
public void shouldHandleProteinWithOligopeptide() {
    Structure protein = StructureParser.fromPdbId("1lyb").parse();
    // contains peptide-like groups - ought to be an AminoAcid
    AminoAcid statine = (AminoAcid) protein.select().groupName("STA").asGroup();
    Assert.assertTrue(statine.getPolymerType() == GroupPrototype.PolymerType.PEPTIDE_LIKE);
    Assert.assertTrue(statine.isAminoAcid());
    Assert.assertFalse(statine.isLigand());
    Assert.assertTrue(protein.aminoAcids().collect(Collectors.toList()).contains(statine));
}
Also used : AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) Test(org.junit.Test)

Example 38 with AminoAcid

use of de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid in project jstructure by JonStargaryen.

the class A03_WriteDatasetCsv method handleLine.

private static Optional<String> handleLine(String line) {
    try {
        System.out.println(line);
        String[] split = line.split(";");
        String entryId = split[0];
        String pdbId = split[1];
        List<Integer> experimentIds = Pattern.compile(",").splitAsStream(split[2].replaceAll("\\[", "").replaceAll("]", "")).map(Integer::valueOf).collect(Collectors.toList());
        Structure structure = StructureParser.fromPdbId(pdbId).parse();
        Chain chain = structure.chains().findFirst().get();
        Start2FoldXmlParser.parseSpecificExperiment(chain, Start2FoldConstants.XML_DIRECTORY.resolve(entryId + ".xml"), experimentIds);
        EvolutionaryCouplingParser.parseHotSpotFile(chain, Start2FoldConstants.COUPLING_DIRECTORY.resolve(entryId.toUpperCase() + "_hs.html"));
        EQuantParser.parseEQuantFile(chain, Start2FoldConstants.EQUANT_DIRECTORY.resolve(entryId.toLowerCase() + ".equant-small.txt"));
        List<AminoAcid> earlyFoldingResidues = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(Start2FoldResidueAnnotation.class).isEarly()).collect(Collectors.toList());
        List<Integer> functionalResidueNumbers = Start2FoldConstants.extractFunctioanlResidueNumbers(split);
        List<AminoAcid> functionalResidues = new ArrayList<>();
        // do nothing if no annotation of functional residues exists
        if (!functionalResidueNumbers.isEmpty()) {
            FunctionalResidueParser.parse(chain, functionalResidueNumbers);
            chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(FunctionalResidueAnnotation.class).isFunctional()).forEach(functionalResidues::add);
        }
        List<AminoAcid> aminoAcids = chain.aminoAcids().collect(Collectors.toList());
        ProteinGraph conventionalProteinGraph = ProteinGraphFactory.createProteinGraph(chain, ProteinGraphFactory.InteractionScheme.CALPHA8);
        return Optional.of(chain.aminoAcids().map(aminoAcid -> {
            GenericSecondaryStructure sse = aminoAcid.getFeature(GenericSecondaryStructure.class);
            HotSpotScoring hotSpotScoring = aminoAcid.getFeature(HotSpotScoring.class);
            PLIPInteractionContainer plipInteractionContainer = aminoAcid.getFeature(PLIPInteractionContainer.class);
            PLIPInteractionContainer nonLocalPlipInteractionContainer = new PLIPInteractionContainer(null, plipInteractionContainer.getInteractions().stream().filter(inter -> Math.abs(inter.getPartner1().getResidueIndex() - inter.getPartner2().getResidueIndex()) > 5).collect(Collectors.toList()));
            PLIPInteractionContainer localPlipInteractionContainer = new PLIPInteractionContainer(null, plipInteractionContainer.getInteractions().stream().filter(inter -> !nonLocalPlipInteractionContainer.getInteractions().contains(inter)).collect(Collectors.toList()));
            String equantScore = "NA";
            try {
                equantScore = StandardFormat.format(aminoAcid.getFeature(EQuantScore.class).getEvaluation());
            } catch (ComputationException e) {
                logger.warn("missing equant scoring for {}", aminoAcid);
            }
            String functionalAnnotation = "NA";
            if (functionalResidues.size() > 0) {
                functionalAnnotation = functionalResidues.contains(aminoAcid) ? "functional" : "non-functional";
            }
            ResidueTopologicPropertiesContainer residueTopologicPropertiesContainer = aminoAcid.getFeature(ResidueTopologicPropertiesContainer.class);
            double terminusDistance = aminoAcids.indexOf(aminoAcid);
            terminusDistance = Math.min(terminusDistance, aminoAcids.size() - terminusDistance);
            terminusDistance /= (double) aminoAcids.size();
            return pdbId + "," + "A" + "," + aminoAcid.getResidueIdentifier() + "," + aminoAcid.getOneLetterCode() + "," + sse.getSecondaryStructure().getReducedRepresentation() + "," + sse.getSecondaryStructure().getOneLetterRepresentation() + "," + sse.getSurroundingSecondaryStructureElement(aminoAcid).getSize() + "," + (aminoAcid.getFeature(AccessibleSurfaceArea.class).isExposed() ? "exposed" : "buried") + "," + StandardFormat.format(aminoAcid.getFeature(GeometricProperties.class).getDistanceToCentroid()) + "," + StandardFormat.format(terminusDistance) + "," + plipInteractionContainer.getHydrogenBonds().size() + "," + plipInteractionContainer.getHydrophobicInteractions().size() + "," + plipInteractionContainer.getBackboneInteractions().size() + "," + plipInteractionContainer.getInteractions().size() + "," + localPlipInteractionContainer.getHydrogenBonds().size() + "," + localPlipInteractionContainer.getHydrophobicInteractions().size() + "," + localPlipInteractionContainer.getBackboneInteractions().size() + "," + localPlipInteractionContainer.getInteractions().size() + "," + nonLocalPlipInteractionContainer.getHydrogenBonds().size() + "," + nonLocalPlipInteractionContainer.getHydrophobicInteractions().size() + "," + nonLocalPlipInteractionContainer.getBackboneInteractions().size() + "," + nonLocalPlipInteractionContainer.getInteractions().size() + "," + StandardFormat.format(aminoAcid.getFeature(EnergyProfile.class).getSolvationEnergy()) + "," + StandardFormat.format(aminoAcid.getFeature(EgorAgreement.class).getEgorPrediction()) + "," + equantScore + "," + StandardFormat.format(aminoAcid.getFeature(AccessibleSurfaceArea.class).getRelativeAccessibleSurfaceArea()) + "," + StandardFormat.format(aminoAcid.getFeature(LoopFraction.class).getLoopFraction()) + "," + hotSpotScoring.getEcCount() + "," + StandardFormat.format(hotSpotScoring.getCumStrength()) + "," + StandardFormat.format(hotSpotScoring.getEcStrength()) + "," + hotSpotScoring.getConservation() + "," + StandardFormat.format(residueTopologicPropertiesContainer.getFullPlip().getBetweenness()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getFullPlip().getCloseness()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getFullPlip().getClusteringCoefficient()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getHydrogenPlip().getBetweenness()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getHydrogenPlip().getCloseness()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getHydrogenPlip().getClusteringCoefficient()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getHydrophobicPlip().getBetweenness()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getHydrophobicPlip().getCloseness()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getHydrophobicPlip().getClusteringCoefficient()) + "," + conventionalProteinGraph.getContactsOf(aminoAcid).size() + "," + conventionalProteinGraph.getLocalContactsOf(aminoAcid).size() + "," + conventionalProteinGraph.getNonLocalContactsOf(aminoAcid).size() + "," + StandardFormat.format(residueTopologicPropertiesContainer.getConventional().getBetweenness()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getConventional().getCloseness()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getConventional().getClusteringCoefficient()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getFullPlip().getDistinctNeighborhoodCount()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getConventional().getDistinctNeighborhoodCount()) + "," + (earlyFoldingResidues.contains(aminoAcid) ? "early" : "late") + "," + functionalAnnotation;
        }).collect(Collectors.joining(System.lineSeparator())));
    } catch (Exception e) {
        logger.info("calculation failed for {}", line, e);
        return Optional.empty();
    }
}
Also used : LoopFraction(de.bioforscher.jstructure.feature.loopfraction.LoopFraction) ComputationException(de.bioforscher.jstructure.model.feature.ComputationException) LoggerFactory(org.slf4j.LoggerFactory) Structure(de.bioforscher.jstructure.model.structure.Structure) GenericSecondaryStructure(de.bioforscher.jstructure.feature.sse.GenericSecondaryStructure) StructureParser(de.bioforscher.jstructure.model.structure.StructureParser) EgorAgreement(de.bioforscher.jstructure.feature.energyprofile.EgorAgreement) HotSpotScoring(de.bioforscher.start2fold.model.HotSpotScoring) FunctionalResidueParser(de.bioforscher.start2fold.parser.FunctionalResidueParser) ArrayList(java.util.ArrayList) FunctionalResidueAnnotation(de.bioforscher.start2fold.model.FunctionalResidueAnnotation) EQuantScore(de.bioforscher.start2fold.model.EQuantScore) ProteinGraphFactory(de.bioforscher.jstructure.feature.graphs.ProteinGraphFactory) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) Start2FoldXmlParser(de.bioforscher.start2fold.parser.Start2FoldXmlParser) Chain(de.bioforscher.jstructure.model.structure.Chain) StandardFormat(de.bioforscher.jstructure.StandardFormat) EnergyProfile(de.bioforscher.jstructure.feature.energyprofile.EnergyProfile) Logger(org.slf4j.Logger) GeometricProperties(de.bioforscher.jstructure.feature.geometry.GeometricProperties) Files(java.nio.file.Files) ResidueTopologicPropertiesContainer(de.bioforscher.jstructure.feature.graphs.ResidueTopologicPropertiesContainer) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) PLIPInteractionContainer(de.bioforscher.jstructure.feature.interactions.PLIPInteractionContainer) Start2FoldResidueAnnotation(de.bioforscher.start2fold.model.Start2FoldResidueAnnotation) List(java.util.List) AccessibleSurfaceArea(de.bioforscher.jstructure.feature.asa.AccessibleSurfaceArea) Start2FoldConstants(de.bioforscher.start2fold.Start2FoldConstants) EQuantParser(de.bioforscher.start2fold.parser.EQuantParser) ProteinGraph(de.bioforscher.jstructure.feature.graphs.ProteinGraph) Optional(java.util.Optional) EvolutionaryCouplingParser(de.bioforscher.start2fold.parser.EvolutionaryCouplingParser) Pattern(java.util.regex.Pattern) Chain(de.bioforscher.jstructure.model.structure.Chain) ArrayList(java.util.ArrayList) GenericSecondaryStructure(de.bioforscher.jstructure.feature.sse.GenericSecondaryStructure) HotSpotScoring(de.bioforscher.start2fold.model.HotSpotScoring) LoopFraction(de.bioforscher.jstructure.feature.loopfraction.LoopFraction) ComputationException(de.bioforscher.jstructure.model.feature.ComputationException) Structure(de.bioforscher.jstructure.model.structure.Structure) GenericSecondaryStructure(de.bioforscher.jstructure.feature.sse.GenericSecondaryStructure) GeometricProperties(de.bioforscher.jstructure.feature.geometry.GeometricProperties) AccessibleSurfaceArea(de.bioforscher.jstructure.feature.asa.AccessibleSurfaceArea) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) ProteinGraph(de.bioforscher.jstructure.feature.graphs.ProteinGraph) EgorAgreement(de.bioforscher.jstructure.feature.energyprofile.EgorAgreement) Start2FoldResidueAnnotation(de.bioforscher.start2fold.model.Start2FoldResidueAnnotation) ResidueTopologicPropertiesContainer(de.bioforscher.jstructure.feature.graphs.ResidueTopologicPropertiesContainer) FunctionalResidueAnnotation(de.bioforscher.start2fold.model.FunctionalResidueAnnotation) EQuantScore(de.bioforscher.start2fold.model.EQuantScore) ComputationException(de.bioforscher.jstructure.model.feature.ComputationException) IOException(java.io.IOException) EnergyProfile(de.bioforscher.jstructure.feature.energyprofile.EnergyProfile) PLIPInteractionContainer(de.bioforscher.jstructure.feature.interactions.PLIPInteractionContainer)

Example 39 with AminoAcid

use of de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid in project jstructure by JonStargaryen.

the class A07_WriteCouplingRangeCsv method handleLine.

private static Optional<String> handleLine(String line) {
    try {
        System.out.println(line);
        String[] split = line.split(";");
        String entryId = split[0];
        String pdbId = split[1];
        List<Integer> experimentIds = Pattern.compile(",").splitAsStream(split[2].replaceAll("\\[", "").replaceAll("]", "")).map(Integer::valueOf).collect(Collectors.toList());
        Structure structure = StructureParser.fromPdbId(pdbId).parse();
        Chain chain = structure.chains().findFirst().get();
        Start2FoldXmlParser.parseSpecificExperiment(chain, Start2FoldConstants.XML_DIRECTORY.resolve(entryId + ".xml"), experimentIds);
        EvolutionaryCouplingParser.parseHotSpotFile(chain, Start2FoldConstants.COUPLING_DIRECTORY.resolve(entryId.toUpperCase() + "_hs.html"));
        List<AminoAcid> earlyFoldingResidues = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(Start2FoldResidueAnnotation.class).isEarly()).collect(Collectors.toList());
        Map<Integer, List<Double>> localPlmScores = new HashMap<>();
        Map<Integer, List<Double>> longRangePlmScores = new HashMap<>();
        Document hotSpotDocument = Jsoup.parse(Files.readAllLines(Paths.get("/home/bittrich/git/phd_sb_repo/data/start2fold/coupling/" + entryId + "_ec.html")).stream().collect(Collectors.joining(System.lineSeparator())));
        List<AminoAcid> aminoAcids = chain.aminoAcids().collect(Collectors.toList());
        for (int i = 0; i < aminoAcids.size(); i++) {
            localPlmScores.put(i, new ArrayList<>());
            longRangePlmScores.put(i, new ArrayList<>());
        }
        hotSpotDocument.getElementsByTag("tr").stream().skip(1).forEach(element -> {
            Elements tds = element.getElementsByTag("td");
            int residueNumber1 = Integer.valueOf(tds.get(2).text()) - 1;
            int residueNumber2 = Integer.valueOf(tds.get(4).text()) - 1;
            double plmScore = Double.valueOf(tds.get(6).text());
            boolean localContact = Math.abs(residueNumber1 - residueNumber2) < 6;
            if (localContact) {
                System.out.println("local contact: " + element.text());
                localPlmScores.get(residueNumber1).add(plmScore);
                localPlmScores.get(residueNumber2).add(plmScore);
            } else {
                System.out.println("long-range contact: " + element.text());
                longRangePlmScores.get(residueNumber1).add(plmScore);
                longRangePlmScores.get(residueNumber2).add(plmScore);
            }
        });
        return Optional.of(aminoAcids.stream().map(aminoAcid -> pdbId + ",A," + aminoAcid.getOneLetterCode() + "," + aminoAcid.getResidueIdentifier().getResidueNumber() + "," + (earlyFoldingResidues.contains(aminoAcid) ? "early" : "late") + "," + "local," + StandardFormat.format(localPlmScores.get(aminoAcid.getResidueIndex()).stream().mapToDouble(Double::valueOf).average().orElse(0.0)) + System.lineSeparator() + pdbId + ",A," + aminoAcid.getOneLetterCode() + "," + aminoAcid.getResidueIdentifier().getResidueNumber() + "," + (earlyFoldingResidues.contains(aminoAcid) ? "early" : "late") + "," + "long-range," + StandardFormat.format(longRangePlmScores.get(aminoAcid.getResidueIndex()).stream().mapToDouble(Double::valueOf).average().orElse(0.0))).collect(Collectors.joining(System.lineSeparator())));
    } catch (Exception e) {
        e.printStackTrace();
        return Optional.empty();
    }
}
Also used : java.util(java.util) Files(java.nio.file.Files) Structure(de.bioforscher.jstructure.model.structure.Structure) IOException(java.io.IOException) StructureParser(de.bioforscher.jstructure.model.structure.StructureParser) Collectors(java.util.stream.Collectors) Start2FoldResidueAnnotation(de.bioforscher.start2fold.model.Start2FoldResidueAnnotation) Start2FoldConstants(de.bioforscher.start2fold.Start2FoldConstants) Paths(java.nio.file.Paths) Document(org.jsoup.nodes.Document) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) Start2FoldXmlParser(de.bioforscher.start2fold.parser.Start2FoldXmlParser) Chain(de.bioforscher.jstructure.model.structure.Chain) StandardFormat(de.bioforscher.jstructure.StandardFormat) EvolutionaryCouplingParser(de.bioforscher.start2fold.parser.EvolutionaryCouplingParser) Jsoup(org.jsoup.Jsoup) Elements(org.jsoup.select.Elements) Pattern(java.util.regex.Pattern) Chain(de.bioforscher.jstructure.model.structure.Chain) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) Start2FoldResidueAnnotation(de.bioforscher.start2fold.model.Start2FoldResidueAnnotation) Document(org.jsoup.nodes.Document) Elements(org.jsoup.select.Elements) IOException(java.io.IOException) Structure(de.bioforscher.jstructure.model.structure.Structure)

Example 40 with AminoAcid

use of de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid in project jstructure by JonStargaryen.

the class EvolutionaryCouplingParser method parseHotSpotFile.

static void parseHotSpotFile(Chain chain, InputStream inputStream) {
    Document hotSpotDocument = Jsoup.parse(new BufferedReader(new InputStreamReader(inputStream)).lines().collect(Collectors.joining(System.lineSeparator())));
    List<AminoAcid> aminoAcids = chain.aminoAcids().collect(Collectors.toList());
    hotSpotDocument.getElementsByTag("tr").stream().skip(1).forEach(element -> {
        Elements tds = element.getElementsByTag("td");
        int residueNumber = Integer.valueOf(tds.get(0).text());
        String aa = tds.get(1).text();
        int ecCount = Integer.valueOf(tds.get(2).text());
        double cumStrength = Double.valueOf(tds.get(3).text());
        double ecStrength = Double.valueOf(tds.get(4).text());
        int conservation = Integer.valueOf(tds.get(5).text());
        AminoAcid aminoAcid = aminoAcids.get(residueNumber - 1);
        aminoAcid.getFeatureContainer().addFeature(new HotSpotScoring(ecCount, cumStrength, ecStrength, conservation));
    });
    // assign baseline
    aminoAcids.stream().map(AminoAcid::getFeatureContainer).filter(featureContainer -> !featureContainer.getFeatureOptional(HotSpotScoring.class).isPresent()).forEach(featureContainer -> featureContainer.addFeature(new HotSpotScoring()));
}
Also used : InputStreamReader(java.io.InputStreamReader) Collectors(java.util.stream.Collectors) HotSpotScoring(de.bioforscher.start2fold.model.HotSpotScoring) List(java.util.List) Start2FoldConstants(de.bioforscher.start2fold.Start2FoldConstants) Document(org.jsoup.nodes.Document) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) Chain(de.bioforscher.jstructure.model.structure.Chain) Jsoup(org.jsoup.Jsoup) Elements(org.jsoup.select.Elements) BufferedReader(java.io.BufferedReader) Path(java.nio.file.Path) InputStream(java.io.InputStream) HotSpotScoring(de.bioforscher.start2fold.model.HotSpotScoring) InputStreamReader(java.io.InputStreamReader) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) BufferedReader(java.io.BufferedReader) Document(org.jsoup.nodes.Document) Elements(org.jsoup.select.Elements)

Aggregations

AminoAcid (de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid)66 Chain (de.bioforscher.jstructure.model.structure.Chain)40 Collectors (java.util.stream.Collectors)40 IOException (java.io.IOException)36 Files (java.nio.file.Files)35 List (java.util.List)31 StandardFormat (de.bioforscher.jstructure.StandardFormat)26 StructureParser (de.bioforscher.jstructure.model.structure.StructureParser)26 Path (java.nio.file.Path)25 Structure (de.bioforscher.jstructure.model.structure.Structure)23 Pattern (java.util.regex.Pattern)17 Logger (org.slf4j.Logger)16 LoggerFactory (org.slf4j.LoggerFactory)16 Start2FoldResidueAnnotation (de.bioforscher.jstructure.efr.model.Start2FoldResidueAnnotation)15 UncheckedIOException (java.io.UncheckedIOException)14 ArrayList (java.util.ArrayList)14 Stream (java.util.stream.Stream)14 Start2FoldResidueAnnotation (de.bioforscher.start2fold.model.Start2FoldResidueAnnotation)13 Optional (java.util.Optional)13 Pair (de.bioforscher.jstructure.mathematics.Pair)11