Search in sources :

Example 1 with ProteinGraph

use of de.bioforscher.jstructure.feature.graphs.ProteinGraph in project jstructure by JonStargaryen.

the class A03_WriteDatasetCsv method handleLine.

private static Optional<String> handleLine(String line) {
    try {
        System.out.println(line);
        String[] split = line.split(";");
        String entryId = split[0];
        String pdbId = split[1];
        List<Integer> experimentIds = Pattern.compile(",").splitAsStream(split[2].replaceAll("\\[", "").replaceAll("]", "")).map(Integer::valueOf).collect(Collectors.toList());
        Structure structure = StructureParser.fromPdbId(pdbId).parse();
        Chain chain = structure.chains().findFirst().get();
        Start2FoldXmlParser.parseSpecificExperiment(chain, Start2FoldConstants.XML_DIRECTORY.resolve(entryId + ".xml"), experimentIds);
        EvolutionaryCouplingParser.parseHotSpotFile(chain, Start2FoldConstants.COUPLING_DIRECTORY.resolve(entryId.toUpperCase() + "_hs.html"));
        EQuantParser.parseEQuantFile(chain, Start2FoldConstants.EQUANT_DIRECTORY.resolve(entryId.toLowerCase() + ".equant-small.txt"));
        List<AminoAcid> earlyFoldingResidues = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(Start2FoldResidueAnnotation.class).isEarly()).collect(Collectors.toList());
        List<Integer> functionalResidueNumbers = Start2FoldConstants.extractFunctioanlResidueNumbers(split);
        List<AminoAcid> functionalResidues = new ArrayList<>();
        // do nothing if no annotation of functional residues exists
        if (!functionalResidueNumbers.isEmpty()) {
            FunctionalResidueParser.parse(chain, functionalResidueNumbers);
            chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(FunctionalResidueAnnotation.class).isFunctional()).forEach(functionalResidues::add);
        }
        List<AminoAcid> aminoAcids = chain.aminoAcids().collect(Collectors.toList());
        ProteinGraph conventionalProteinGraph = ProteinGraphFactory.createProteinGraph(chain, ProteinGraphFactory.InteractionScheme.CALPHA8);
        return Optional.of(chain.aminoAcids().map(aminoAcid -> {
            GenericSecondaryStructure sse = aminoAcid.getFeature(GenericSecondaryStructure.class);
            HotSpotScoring hotSpotScoring = aminoAcid.getFeature(HotSpotScoring.class);
            PLIPInteractionContainer plipInteractionContainer = aminoAcid.getFeature(PLIPInteractionContainer.class);
            PLIPInteractionContainer nonLocalPlipInteractionContainer = new PLIPInteractionContainer(null, plipInteractionContainer.getInteractions().stream().filter(inter -> Math.abs(inter.getPartner1().getResidueIndex() - inter.getPartner2().getResidueIndex()) > 5).collect(Collectors.toList()));
            PLIPInteractionContainer localPlipInteractionContainer = new PLIPInteractionContainer(null, plipInteractionContainer.getInteractions().stream().filter(inter -> !nonLocalPlipInteractionContainer.getInteractions().contains(inter)).collect(Collectors.toList()));
            String equantScore = "NA";
            try {
                equantScore = StandardFormat.format(aminoAcid.getFeature(EQuantScore.class).getEvaluation());
            } catch (ComputationException e) {
                logger.warn("missing equant scoring for {}", aminoAcid);
            }
            String functionalAnnotation = "NA";
            if (functionalResidues.size() > 0) {
                functionalAnnotation = functionalResidues.contains(aminoAcid) ? "functional" : "non-functional";
            }
            ResidueTopologicPropertiesContainer residueTopologicPropertiesContainer = aminoAcid.getFeature(ResidueTopologicPropertiesContainer.class);
            double terminusDistance = aminoAcids.indexOf(aminoAcid);
            terminusDistance = Math.min(terminusDistance, aminoAcids.size() - terminusDistance);
            terminusDistance /= (double) aminoAcids.size();
            return pdbId + "," + "A" + "," + aminoAcid.getResidueIdentifier() + "," + aminoAcid.getOneLetterCode() + "," + sse.getSecondaryStructure().getReducedRepresentation() + "," + sse.getSecondaryStructure().getOneLetterRepresentation() + "," + sse.getSurroundingSecondaryStructureElement(aminoAcid).getSize() + "," + (aminoAcid.getFeature(AccessibleSurfaceArea.class).isExposed() ? "exposed" : "buried") + "," + StandardFormat.format(aminoAcid.getFeature(GeometricProperties.class).getDistanceToCentroid()) + "," + StandardFormat.format(terminusDistance) + "," + plipInteractionContainer.getHydrogenBonds().size() + "," + plipInteractionContainer.getHydrophobicInteractions().size() + "," + plipInteractionContainer.getBackboneInteractions().size() + "," + plipInteractionContainer.getInteractions().size() + "," + localPlipInteractionContainer.getHydrogenBonds().size() + "," + localPlipInteractionContainer.getHydrophobicInteractions().size() + "," + localPlipInteractionContainer.getBackboneInteractions().size() + "," + localPlipInteractionContainer.getInteractions().size() + "," + nonLocalPlipInteractionContainer.getHydrogenBonds().size() + "," + nonLocalPlipInteractionContainer.getHydrophobicInteractions().size() + "," + nonLocalPlipInteractionContainer.getBackboneInteractions().size() + "," + nonLocalPlipInteractionContainer.getInteractions().size() + "," + StandardFormat.format(aminoAcid.getFeature(EnergyProfile.class).getSolvationEnergy()) + "," + StandardFormat.format(aminoAcid.getFeature(EgorAgreement.class).getEgorPrediction()) + "," + equantScore + "," + StandardFormat.format(aminoAcid.getFeature(AccessibleSurfaceArea.class).getRelativeAccessibleSurfaceArea()) + "," + StandardFormat.format(aminoAcid.getFeature(LoopFraction.class).getLoopFraction()) + "," + hotSpotScoring.getEcCount() + "," + StandardFormat.format(hotSpotScoring.getCumStrength()) + "," + StandardFormat.format(hotSpotScoring.getEcStrength()) + "," + hotSpotScoring.getConservation() + "," + StandardFormat.format(residueTopologicPropertiesContainer.getFullPlip().getBetweenness()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getFullPlip().getCloseness()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getFullPlip().getClusteringCoefficient()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getHydrogenPlip().getBetweenness()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getHydrogenPlip().getCloseness()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getHydrogenPlip().getClusteringCoefficient()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getHydrophobicPlip().getBetweenness()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getHydrophobicPlip().getCloseness()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getHydrophobicPlip().getClusteringCoefficient()) + "," + conventionalProteinGraph.getContactsOf(aminoAcid).size() + "," + conventionalProteinGraph.getLocalContactsOf(aminoAcid).size() + "," + conventionalProteinGraph.getNonLocalContactsOf(aminoAcid).size() + "," + StandardFormat.format(residueTopologicPropertiesContainer.getConventional().getBetweenness()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getConventional().getCloseness()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getConventional().getClusteringCoefficient()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getFullPlip().getDistinctNeighborhoodCount()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getConventional().getDistinctNeighborhoodCount()) + "," + (earlyFoldingResidues.contains(aminoAcid) ? "early" : "late") + "," + functionalAnnotation;
        }).collect(Collectors.joining(System.lineSeparator())));
    } catch (Exception e) {
        logger.info("calculation failed for {}", line, e);
        return Optional.empty();
    }
}
Also used : LoopFraction(de.bioforscher.jstructure.feature.loopfraction.LoopFraction) ComputationException(de.bioforscher.jstructure.model.feature.ComputationException) LoggerFactory(org.slf4j.LoggerFactory) Structure(de.bioforscher.jstructure.model.structure.Structure) GenericSecondaryStructure(de.bioforscher.jstructure.feature.sse.GenericSecondaryStructure) StructureParser(de.bioforscher.jstructure.model.structure.StructureParser) EgorAgreement(de.bioforscher.jstructure.feature.energyprofile.EgorAgreement) HotSpotScoring(de.bioforscher.start2fold.model.HotSpotScoring) FunctionalResidueParser(de.bioforscher.start2fold.parser.FunctionalResidueParser) ArrayList(java.util.ArrayList) FunctionalResidueAnnotation(de.bioforscher.start2fold.model.FunctionalResidueAnnotation) EQuantScore(de.bioforscher.start2fold.model.EQuantScore) ProteinGraphFactory(de.bioforscher.jstructure.feature.graphs.ProteinGraphFactory) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) Start2FoldXmlParser(de.bioforscher.start2fold.parser.Start2FoldXmlParser) Chain(de.bioforscher.jstructure.model.structure.Chain) StandardFormat(de.bioforscher.jstructure.StandardFormat) EnergyProfile(de.bioforscher.jstructure.feature.energyprofile.EnergyProfile) Logger(org.slf4j.Logger) GeometricProperties(de.bioforscher.jstructure.feature.geometry.GeometricProperties) Files(java.nio.file.Files) ResidueTopologicPropertiesContainer(de.bioforscher.jstructure.feature.graphs.ResidueTopologicPropertiesContainer) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) PLIPInteractionContainer(de.bioforscher.jstructure.feature.interactions.PLIPInteractionContainer) Start2FoldResidueAnnotation(de.bioforscher.start2fold.model.Start2FoldResidueAnnotation) List(java.util.List) AccessibleSurfaceArea(de.bioforscher.jstructure.feature.asa.AccessibleSurfaceArea) Start2FoldConstants(de.bioforscher.start2fold.Start2FoldConstants) EQuantParser(de.bioforscher.start2fold.parser.EQuantParser) ProteinGraph(de.bioforscher.jstructure.feature.graphs.ProteinGraph) Optional(java.util.Optional) EvolutionaryCouplingParser(de.bioforscher.start2fold.parser.EvolutionaryCouplingParser) Pattern(java.util.regex.Pattern) Chain(de.bioforscher.jstructure.model.structure.Chain) ArrayList(java.util.ArrayList) GenericSecondaryStructure(de.bioforscher.jstructure.feature.sse.GenericSecondaryStructure) HotSpotScoring(de.bioforscher.start2fold.model.HotSpotScoring) LoopFraction(de.bioforscher.jstructure.feature.loopfraction.LoopFraction) ComputationException(de.bioforscher.jstructure.model.feature.ComputationException) Structure(de.bioforscher.jstructure.model.structure.Structure) GenericSecondaryStructure(de.bioforscher.jstructure.feature.sse.GenericSecondaryStructure) GeometricProperties(de.bioforscher.jstructure.feature.geometry.GeometricProperties) AccessibleSurfaceArea(de.bioforscher.jstructure.feature.asa.AccessibleSurfaceArea) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) ProteinGraph(de.bioforscher.jstructure.feature.graphs.ProteinGraph) EgorAgreement(de.bioforscher.jstructure.feature.energyprofile.EgorAgreement) Start2FoldResidueAnnotation(de.bioforscher.start2fold.model.Start2FoldResidueAnnotation) ResidueTopologicPropertiesContainer(de.bioforscher.jstructure.feature.graphs.ResidueTopologicPropertiesContainer) FunctionalResidueAnnotation(de.bioforscher.start2fold.model.FunctionalResidueAnnotation) EQuantScore(de.bioforscher.start2fold.model.EQuantScore) ComputationException(de.bioforscher.jstructure.model.feature.ComputationException) IOException(java.io.IOException) EnergyProfile(de.bioforscher.jstructure.feature.energyprofile.EnergyProfile) PLIPInteractionContainer(de.bioforscher.jstructure.feature.interactions.PLIPInteractionContainer)

Aggregations

StandardFormat (de.bioforscher.jstructure.StandardFormat)1 AccessibleSurfaceArea (de.bioforscher.jstructure.feature.asa.AccessibleSurfaceArea)1 EgorAgreement (de.bioforscher.jstructure.feature.energyprofile.EgorAgreement)1 EnergyProfile (de.bioforscher.jstructure.feature.energyprofile.EnergyProfile)1 GeometricProperties (de.bioforscher.jstructure.feature.geometry.GeometricProperties)1 ProteinGraph (de.bioforscher.jstructure.feature.graphs.ProteinGraph)1 ProteinGraphFactory (de.bioforscher.jstructure.feature.graphs.ProteinGraphFactory)1 ResidueTopologicPropertiesContainer (de.bioforscher.jstructure.feature.graphs.ResidueTopologicPropertiesContainer)1 PLIPInteractionContainer (de.bioforscher.jstructure.feature.interactions.PLIPInteractionContainer)1 LoopFraction (de.bioforscher.jstructure.feature.loopfraction.LoopFraction)1 GenericSecondaryStructure (de.bioforscher.jstructure.feature.sse.GenericSecondaryStructure)1 ComputationException (de.bioforscher.jstructure.model.feature.ComputationException)1 Chain (de.bioforscher.jstructure.model.structure.Chain)1 Structure (de.bioforscher.jstructure.model.structure.Structure)1 StructureParser (de.bioforscher.jstructure.model.structure.StructureParser)1 AminoAcid (de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid)1 Start2FoldConstants (de.bioforscher.start2fold.Start2FoldConstants)1 EQuantScore (de.bioforscher.start2fold.model.EQuantScore)1 FunctionalResidueAnnotation (de.bioforscher.start2fold.model.FunctionalResidueAnnotation)1 HotSpotScoring (de.bioforscher.start2fold.model.HotSpotScoring)1