use of de.bioforscher.jstructure.graph.ResidueGraph in project jstructure by JonStargaryen.
the class A06_WriteStructuralInformationByResidueCsv method handleLine.
private static Optional<String> handleLine(String line) {
try {
System.out.println(line);
String[] split = line.split(";");
String entryId = split[0];
String pdbId = split[1];
List<Integer> experimentIds = Pattern.compile(",").splitAsStream(split[2].replaceAll("\\[", "").replaceAll("]", "")).map(Integer::valueOf).collect(Collectors.toList());
// boolean sane = split[6].equalsIgnoreCase("true");
Structure structure = StructureParser.fromPdbId(pdbId).parse();
Chain chain = structure.chains().findFirst().get();
Path start2foldXml = Start2FoldConstants.XML_DIRECTORY.resolve(entryId + ".xml");
Start2FoldXmlParser.parseStability(chain, start2foldXml);
Start2FoldXmlParser.parseSpecificExperiment(chain, start2foldXml, experimentIds);
try {
EvolutionaryCouplingParser.parseHotSpotFile(chain, Start2FoldConstants.COUPLING_DIRECTORY.resolve(entryId.toUpperCase() + "_hs.html"));
} catch (Exception e) {
}
boolean ecAnnotation = chain.aminoAcids().anyMatch(residue -> residue.getFeature(HotSpotScoring.class).getEcCount() > 0);
List<AminoAcid> topScoringResidues;
if (ecAnnotation) {
double fraction = 0.4;
int residuesToSelect = (int) (fraction * chain.aminoAcids().count());
topScoringResidues = chain.aminoAcids().sorted(Comparator.comparingDouble((AminoAcid aminoAcid) -> aminoAcid.getFeature(HotSpotScoring.class).getCumStrength()).reversed()).limit(residuesToSelect).collect(Collectors.toList());
} else {
topScoringResidues = new ArrayList<>();
}
EQuantParser.parseEQuantFile(chain, Start2FoldConstants.EQUANT_DIRECTORY.resolve(entryId.toLowerCase() + ".equant-small.txt"));
List<AminoAcid> earlyFoldingResidues = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(Start2FoldResidueAnnotation.class).isEarly()).collect(Collectors.toList());
List<AminoAcid> strongResidues = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(Start2FoldResidueAnnotation.class).isStrong()).collect(Collectors.toList());
List<Integer> functionalResidueNumbers = Start2FoldConstants.extractFunctionalResidueNumbers(split);
List<AminoAcid> functionalResidues = new ArrayList<>();
// do nothing if no annotation of functional residues exists
if (!functionalResidueNumbers.isEmpty()) {
FunctionalResidueParser.parse(chain, functionalResidueNumbers);
chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(FunctionalResidueAnnotation.class).isFunctional()).forEach(functionalResidues::add);
}
List<AminoAcid> aminoAcids = chain.aminoAcids().collect(Collectors.toList());
List<ContactStructuralInformation> contactStructuralInformation = StructuralInformationParserService.getInstance().parseContactStructuralInformation(Start2FoldConstants.DATA_DIRECTORY.resolve("si").resolve("raw").resolve(entryId.toUpperCase() + ".out"), chain, earlyFoldingResidues);
List<ResidueStructuralInformation> residueStructuralInformation = StructuralInformationParserService.getInstance().composeResidueStructuralInformation(aminoAcids, earlyFoldingResidues, contactStructuralInformation);
ResidueGraph conventionalProteinGraph = ResidueGraph.createResidueGraph(chain, ContactDefinitionFactory.createAlphaCarbonContactDefinition(8.0));
List<AminoAcid> residuesInEarlyFoldingSecondaryStructureElements = chain.aminoAcids().filter(aminoAcid -> !aminoAcid.getFeature(GenericSecondaryStructure.class).getSecondaryStructure().isCoilType()).filter(aminoAcid -> {
GenericSecondaryStructure.SecondaryStructureElement surroundingSecondaryStructureElement = aminoAcid.getFeature(GenericSecondaryStructure.class).getSurroundingSecondaryStructureElement(aminoAcid);
List<AminoAcid> surroundingAminoAcids = chain.getAminoAcids().subList(surroundingSecondaryStructureElement.getStart(), surroundingSecondaryStructureElement.getEnd() + 1);
return surroundingAminoAcids.stream().anyMatch(earlyFoldingResidues::contains);
}).collect(Collectors.toList());
System.out.println("efr: " + (earlyFoldingResidues.size() > 0) + " strong: " + (strongResidues.size() > 0) + " functional: " + (functionalResidues.size() > 0));
return Optional.of(chain.aminoAcids().filter(AminoAcid::isStandardAminoAcid).map(aminoAcid -> {
GenericSecondaryStructure sse = aminoAcid.getFeature(GenericSecondaryStructure.class);
HotSpotScoring hotSpotScoring = aminoAcid.getFeature(HotSpotScoring.class);
PLIPInteractionContainer plipInteractionContainer = aminoAcid.getFeature(PLIPInteractionContainer.class);
PLIPInteractionContainer nonLocalPlipInteractionContainer = new PLIPInteractionContainer(null, plipInteractionContainer.getInteractions().stream().filter(inter -> Math.abs(inter.getPartner1().getResidueIndex() - inter.getPartner2().getResidueIndex()) > 5).collect(Collectors.toList()));
PLIPInteractionContainer localPlipInteractionContainer = new PLIPInteractionContainer(null, plipInteractionContainer.getInteractions().stream().filter(inter -> !nonLocalPlipInteractionContainer.getInteractions().contains(inter)).collect(Collectors.toList()));
String equantScore = "NA";
try {
equantScore = StandardFormat.format(aminoAcid.getFeature(EQuantScore.class).getEvaluation());
} catch (ComputationException e) {
logger.warn("missing equant scoring for {}", aminoAcid);
}
String functionalAnnotation = "NA";
if (functionalResidues.size() > 0) {
functionalAnnotation = functionalResidues.contains(aminoAcid) ? "functional" : "non-functional";
}
ResidueTopologicPropertiesContainer residueTopologicPropertiesContainer = aminoAcid.getFeature(ResidueTopologicPropertiesContainer.class);
double terminusDistance = aminoAcids.indexOf(aminoAcid);
terminusDistance = Math.min(terminusDistance, aminoAcids.size() - terminusDistance);
terminusDistance /= (double) aminoAcids.size();
ResidueStructuralInformation residueStructuralInformationEntry = residueStructuralInformation.get(aminoAcid.getAminoAcidIndex());
GenericSecondaryStructure.SecondaryStructureElement surroundingSecondaryStructureElement = sse.getSurroundingSecondaryStructureElement(aminoAcid);
int sseSize = surroundingSecondaryStructureElement.getSize();
if (sse.getSecondaryStructure().isCoilType()) {
sseSize = 0;
}
int sseTerminusDistance = surroundingSecondaryStructureElement.getTerminusDistance();
if (sse.getSecondaryStructure().isCoilType()) {
sseTerminusDistance = -1;
}
return pdbId + "," + "A" + "," + aminoAcid.getResidueIdentifier() + "," + aminoAcid.getOneLetterCode() + "," + sse.getSecondaryStructure().getReducedRepresentation() + "," + sse.getSecondaryStructure().getOneLetterRepresentation() + "," + (sse.getSecondaryStructure().isHelixType() ? "true" : "false") + "," + (sse.getSecondaryStructure().isStrandType() ? "true" : "false") + "," + (sse.getSecondaryStructure().isCoilType() ? "true" : "false") + "," + sseSize + "," + sseTerminusDistance + "," + (aminoAcid.getFeature(AccessibleSurfaceArea.class).isExposed() ? "exposed" : "buried") + "," + StandardFormat.format(aminoAcid.getFeature(GeometricProperties.class).getDistanceToCentroid()) + "," + StandardFormat.format(terminusDistance) + "," + plipInteractionContainer.getHydrogenBonds().size() + "," + plipInteractionContainer.getHydrophobicInteractions().size() + "," + plipInteractionContainer.getBackboneInteractions().size() + "," + plipInteractionContainer.getInteractions().size() + "," + localPlipInteractionContainer.getHydrogenBonds().size() + "," + localPlipInteractionContainer.getHydrophobicInteractions().size() + "," + localPlipInteractionContainer.getBackboneInteractions().size() + "," + localPlipInteractionContainer.getInteractions().size() + "," + nonLocalPlipInteractionContainer.getHydrogenBonds().size() + "," + nonLocalPlipInteractionContainer.getHydrophobicInteractions().size() + "," + nonLocalPlipInteractionContainer.getBackboneInteractions().size() + "," + nonLocalPlipInteractionContainer.getInteractions().size() + "," + StandardFormat.format(aminoAcid.getFeature(EnergyProfile.class).getSolvationEnergy()) + "," + StandardFormat.format(aminoAcid.getFeature(EgorAgreement.class).getEgorPrediction()) + "," + equantScore + "," + StandardFormat.format(aminoAcid.getFeature(AccessibleSurfaceArea.class).getRelativeAccessibleSurfaceArea()) + "," + StandardFormat.format(aminoAcid.getFeature(LoopFraction.class).getLoopFraction()) + "," + hotSpotScoring.getEcCount() + "," + StandardFormat.format(hotSpotScoring.getCumStrength()) + "," + StandardFormat.format(hotSpotScoring.getEcStrength()) + "," + hotSpotScoring.getConservation() + "," + topScoringResidues.contains(aminoAcid) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getFullPlip().getBetweenness()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getFullPlip().getCloseness()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getFullPlip().getClusteringCoefficient()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getHydrogenPlip().getBetweenness()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getHydrogenPlip().getCloseness()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getHydrogenPlip().getClusteringCoefficient()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getHydrophobicPlip().getBetweenness()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getHydrophobicPlip().getCloseness()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getHydrophobicPlip().getClusteringCoefficient()) + "," + conventionalProteinGraph.getContactsOf(aminoAcid).size() + "," + conventionalProteinGraph.getLocalContactsOf(aminoAcid).size() + "," + conventionalProteinGraph.getNonLocalContactsOf(aminoAcid).size() + "," + StandardFormat.format(residueTopologicPropertiesContainer.getConventional().getBetweenness()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getConventional().getCloseness()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getConventional().getClusteringCoefficient()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getFullPlip().getDistinctNeighborhoodCount()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getConventional().getDistinctNeighborhoodCount()) + "," + StandardFormat.format(residueStructuralInformationEntry.getAverageRmsdIncrease()) + "," + StandardFormat.format(residueStructuralInformationEntry.getAverageTmScoreIncrease()) + "," + StandardFormat.format(residueStructuralInformationEntry.getAverageQIncrease()) + "," + StandardFormat.format(residueStructuralInformationEntry.getMaximumRmsdIncrease()) + "," + StandardFormat.format(residueStructuralInformationEntry.getMaximumTmScoreIncrease()) + "," + StandardFormat.format(residueStructuralInformationEntry.getMaximumQIncrease()) + "," + StandardFormat.format(residueStructuralInformationEntry.getAverageRmsdIncreaseZScore()) + "," + StandardFormat.format(residueStructuralInformationEntry.getFractionOfTopScoringContacts()) + "," + (earlyFoldingResidues.contains(aminoAcid) ? "early" : "late") + "," + (residuesInEarlyFoldingSecondaryStructureElements.contains(aminoAcid) ? "true" : "false") + "," + functionalAnnotation + "," + (strongResidues.contains(aminoAcid) ? "strong" : "weak") + "," + (earlyFoldingResidues.size() > 0) + "," + (strongResidues.size() > 0) + "," + (functionalResidues.size() > 0) + "," + ecAnnotation;
}).collect(Collectors.joining(System.lineSeparator())));
} catch (Exception e) {
logger.info("calculation failed for {}\nby: {}", line, e.getMessage());
return Optional.empty();
}
}
use of de.bioforscher.jstructure.graph.ResidueGraph in project jstructure by JonStargaryen.
the class A02_WriteDatasetCsv method handleLine.
private static Optional<String> handleLine(String line) {
try {
System.out.println(line);
String[] split = line.split(";");
String entryId = split[0];
String pdbId = split[1];
List<Integer> experimentIds = Pattern.compile(",").splitAsStream(split[2].replaceAll("\\[", "").replaceAll("]", "")).map(Integer::valueOf).collect(Collectors.toList());
Structure structure = StructureParser.fromPdbId(pdbId).parse();
Chain chain = structure.chains().findFirst().get();
Start2FoldXmlParser.parseStability(chain, Start2FoldConstants.XML_DIRECTORY.resolve(entryId + ".xml"));
Start2FoldXmlParser.parseSpecificExperiment(chain, Start2FoldConstants.XML_DIRECTORY.resolve(entryId + ".xml"), experimentIds);
EvolutionaryCouplingParser.parseHotSpotFile(chain, Start2FoldConstants.COUPLING_DIRECTORY.resolve(entryId.toUpperCase() + "_hs.html"));
EQuantParser.parseEQuantFile(chain, Start2FoldConstants.EQUANT_DIRECTORY.resolve(entryId.toLowerCase() + ".equant-small.txt"));
List<AminoAcid> earlyFoldingResidues = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(Start2FoldResidueAnnotation.class).isEarly()).collect(Collectors.toList());
List<AminoAcid> stableResidues = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(Start2FoldResidueAnnotation.class).isStrong()).collect(Collectors.toList());
List<Integer> functionalResidueNumbers = Start2FoldConstants.extractFunctionalResidueNumbers(split);
List<AminoAcid> functionalResidues = new ArrayList<>();
// do nothing if no annotation of functional residues exists
if (!functionalResidueNumbers.isEmpty()) {
FunctionalResidueParser.parse(chain, functionalResidueNumbers);
chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(FunctionalResidueAnnotation.class).isFunctional()).forEach(functionalResidues::add);
}
List<AminoAcid> aminoAcids = chain.aminoAcids().collect(Collectors.toList());
ResidueGraph conventionalProteinGraph = ResidueGraph.createResidueGraph(chain, ContactDefinitionFactory.createAlphaCarbonContactDefinition(8));
return Optional.of(chain.aminoAcids().map(aminoAcid -> {
GenericSecondaryStructure sse = aminoAcid.getFeature(GenericSecondaryStructure.class);
HotSpotScoring hotSpotScoring = aminoAcid.getFeature(HotSpotScoring.class);
PLIPInteractionContainer plipInteractionContainer = aminoAcid.getFeature(PLIPInteractionContainer.class);
PLIPInteractionContainer nonLocalPlipInteractionContainer = new PLIPInteractionContainer(null, plipInteractionContainer.getInteractions().stream().filter(inter -> Math.abs(inter.getPartner1().getResidueIndex() - inter.getPartner2().getResidueIndex()) > 5).collect(Collectors.toList()));
PLIPInteractionContainer localPlipInteractionContainer = new PLIPInteractionContainer(null, plipInteractionContainer.getInteractions().stream().filter(inter -> !nonLocalPlipInteractionContainer.getInteractions().contains(inter)).collect(Collectors.toList()));
String equantScore = "NA";
try {
equantScore = StandardFormat.format(aminoAcid.getFeature(EQuantScore.class).getEvaluation());
} catch (ComputationException e) {
logger.warn("missing equant scoring for {}", aminoAcid);
}
String functionalAnnotation = "NA";
if (functionalResidues.size() > 0) {
functionalAnnotation = functionalResidues.contains(aminoAcid) ? "functional" : "non-functional";
}
ResidueTopologicPropertiesContainer residueTopologicPropertiesContainer = aminoAcid.getFeature(ResidueTopologicPropertiesContainer.class);
double terminusDistance = aminoAcids.indexOf(aminoAcid);
terminusDistance = Math.min(terminusDistance, aminoAcids.size() - terminusDistance);
terminusDistance /= (double) aminoAcids.size();
return pdbId + "," + "A" + "," + aminoAcid.getResidueIdentifier() + "," + aminoAcid.getOneLetterCode() + "," + sse.getSecondaryStructure().getReducedRepresentation() + "," + sse.getSecondaryStructure().getOneLetterRepresentation() + "," + sse.getSurroundingSecondaryStructureElement(aminoAcid).getSize() + "," + (aminoAcid.getFeature(AccessibleSurfaceArea.class).isExposed() ? "exposed" : "buried") + "," + StandardFormat.format(aminoAcid.getFeature(GeometricProperties.class).getDistanceToCentroid()) + "," + StandardFormat.format(terminusDistance) + "," + plipInteractionContainer.getHydrogenBonds().size() + "," + plipInteractionContainer.getHydrophobicInteractions().size() + "," + plipInteractionContainer.getBackboneInteractions().size() + "," + plipInteractionContainer.getInteractions().size() + "," + localPlipInteractionContainer.getHydrogenBonds().size() + "," + localPlipInteractionContainer.getHydrophobicInteractions().size() + "," + localPlipInteractionContainer.getBackboneInteractions().size() + "," + localPlipInteractionContainer.getInteractions().size() + "," + nonLocalPlipInteractionContainer.getHydrogenBonds().size() + "," + nonLocalPlipInteractionContainer.getHydrophobicInteractions().size() + "," + nonLocalPlipInteractionContainer.getBackboneInteractions().size() + "," + nonLocalPlipInteractionContainer.getInteractions().size() + "," + StandardFormat.format(aminoAcid.getFeature(EnergyProfile.class).getSolvationEnergy()) + "," + StandardFormat.format(aminoAcid.getFeature(EgorAgreement.class).getEgorPrediction()) + "," + equantScore + "," + StandardFormat.format(aminoAcid.getFeature(AccessibleSurfaceArea.class).getRelativeAccessibleSurfaceArea()) + "," + StandardFormat.format(aminoAcid.getFeature(LoopFraction.class).getLoopFraction()) + "," + hotSpotScoring.getEcCount() + "," + StandardFormat.format(hotSpotScoring.getCumStrength()) + "," + StandardFormat.format(hotSpotScoring.getEcStrength()) + "," + hotSpotScoring.getConservation() + "," + StandardFormat.format(residueTopologicPropertiesContainer.getFullPlip().getBetweenness()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getFullPlip().getCloseness()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getFullPlip().getClusteringCoefficient()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getHydrogenPlip().getBetweenness()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getHydrogenPlip().getCloseness()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getHydrogenPlip().getClusteringCoefficient()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getHydrophobicPlip().getBetweenness()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getHydrophobicPlip().getCloseness()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getHydrophobicPlip().getClusteringCoefficient()) + "," + conventionalProteinGraph.getContactsOf(aminoAcid).size() + "," + conventionalProteinGraph.getLocalContactsOf(aminoAcid).size() + "," + conventionalProteinGraph.getNonLocalContactsOf(aminoAcid).size() + "," + StandardFormat.format(residueTopologicPropertiesContainer.getConventional().getBetweenness()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getConventional().getCloseness()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getConventional().getClusteringCoefficient()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getFullPlip().getDistinctNeighborhoodCount()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getConventional().getDistinctNeighborhoodCount()) + "," + (earlyFoldingResidues.contains(aminoAcid) ? "early" : "late") + "," + functionalAnnotation + "," + (stableResidues.contains(aminoAcid) ? "stable" : "unstable");
}).collect(Collectors.joining(System.lineSeparator())));
} catch (Exception e) {
logger.info("calculation failed for {}", line, e);
return Optional.empty();
}
}
use of de.bioforscher.jstructure.graph.ResidueGraph in project jstructure by JonStargaryen.
the class StructuralInformationParserServiceTest method shouldPrintStructuralInformationByResidue.
@Test
public void shouldPrintStructuralInformationByResidue() {
Chain chain = StructureParser.fromPdbId("1bdd").parse().getFirstChain();
List<AminoAcid> aminoAcids = chain.aminoAcids().collect(Collectors.toList());
Start2FoldXmlParser.parseSpecificExperiment(chain, TestUtils.getResourceAsInputStream("efr/STF0045.xml"), Stream.of(185).collect(Collectors.toList()));
List<AminoAcid> earlyFoldingResidues = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(Start2FoldResidueAnnotation.class).isEarly()).collect(Collectors.toList());
List<ContactStructuralInformation> contactStructuralInformation = StructuralInformationParserService.getInstance().parseContactStructuralInformationFile(TestUtils.getResourceAsInputStream("si/STF0045.out"), chain, earlyFoldingResidues);
List<ResidueStructuralInformation> residueStructuralInformation = StructuralInformationParserService.getInstance().composeResidueStructuralInformation(aminoAcids, earlyFoldingResidues, contactStructuralInformation);
ResidueGraph residueGraph = ResidueGraph.createDistanceResidueGraph(chain);
ResidueGraphCalculations residueGraphCalculations = new ResidueGraphCalculations(residueGraph);
residueStructuralInformation.stream().map(si -> {
AminoAcid aminoAcid = chain.select().residueIdentifier(IdentifierFactory.createResidueIdentifier(si.getResidueIdentifier())).asAminoAcid();
double betweenness = residueGraphCalculations.betweenness(aminoAcid);
double closeness = residueGraphCalculations.closeness(aminoAcid);
double cc = residueGraphCalculations.clusteringCoefficient(aminoAcid);
int degree = residueGraph.degreeOf(aminoAcid);
return StandardFormat.format(si.getAverageRmsdIncrease()) + "," + StandardFormat.format(si.getAverageTmScoreIncrease()) + "," + StandardFormat.format(si.getAverageQIncrease()) + "," + StandardFormat.format(si.getMaximumRmsdIncrease()) + "," + StandardFormat.format(si.getMaximumTmScoreIncrease()) + "," + StandardFormat.format(si.getMaximumQIncrease()) + "," + StandardFormat.format(betweenness) + "," + StandardFormat.format(closeness) + "," + StandardFormat.format(cc) + "," + StandardFormat.format(degree);
}).forEach(System.out::println);
}
use of de.bioforscher.jstructure.graph.ResidueGraph in project jstructure by JonStargaryen.
the class StructuralInformationParserServiceTest method shouldPrintStructuralInformationByContact.
@Test
public void shouldPrintStructuralInformationByContact() {
Chain chain = StructureParser.fromPdbId("1bdd").parse().getFirstChain();
List<AminoAcid> aminoAcids = chain.aminoAcids().collect(Collectors.toList());
Start2FoldXmlParser.parseSpecificExperiment(chain, TestUtils.getResourceAsInputStream("efr/STF0045.xml"), Stream.of(185).collect(Collectors.toList()));
List<AminoAcid> earlyFoldingResidues = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(Start2FoldResidueAnnotation.class).isEarly()).collect(Collectors.toList());
List<ContactStructuralInformation> contactStructuralInformation = StructuralInformationParserService.getInstance().parseContactStructuralInformationFile(TestUtils.getResourceAsInputStream("si/STF0045.out"), chain, earlyFoldingResidues);
ResidueGraph residueGraph = ResidueGraph.createDistanceResidueGraph(chain);
ResidueGraphCalculations residueGraphCalculations = new ResidueGraphCalculations(residueGraph);
contactStructuralInformation.stream().map(si -> {
AminoAcid aminoAcid1 = chain.select().residueIdentifier(IdentifierFactory.createResidueIdentifier(si.getResidueIdentifier1())).asAminoAcid();
AminoAcid aminoAcid2 = chain.select().residueIdentifier(IdentifierFactory.createResidueIdentifier(si.getResidueIdentifier2())).asAminoAcid();
Pair<AminoAcid, AminoAcid> pair = new Pair<>(aminoAcid1, aminoAcid2);
double betweenness = residueGraphCalculations.betweenness(pair);
return StandardFormat.format(si.getAverageRmsdIncrease()) + "," + StandardFormat.format(si.getAverageTmScoreIncrease()) + "," + StandardFormat.format(si.getAverageQIncrease()) + "," + StandardFormat.format(si.getMaximumRmsdIncrease()) + "," + StandardFormat.format(si.getMaximumTmScoreIncrease()) + "," + StandardFormat.format(si.getMaximumQIncrease()) + "," + StandardFormat.format(betweenness);
}).forEach(System.out::println);
}
use of de.bioforscher.jstructure.graph.ResidueGraph in project jstructure by JonStargaryen.
the class A07_WriteStructuralInformationByContactCsv method handleLine.
private static Optional<String> handleLine(String line) {
try {
System.out.println(line);
String[] split = line.split(";");
String entryId = split[0];
String pdbId = split[1];
List<Integer> experimentIds = Pattern.compile(",").splitAsStream(split[2].replaceAll("\\[", "").replaceAll("]", "")).map(Integer::valueOf).collect(Collectors.toList());
// boolean sane = split[6].equalsIgnoreCase("true");
Structure structure = StructureParser.fromPdbId(pdbId).parse();
Chain chain = structure.chains().findFirst().get();
LinearAlgebra.PrimitiveDoubleArrayLinearAlgebra centroid = chain.calculate().centroid();
Path start2foldXml = Start2FoldConstants.XML_DIRECTORY.resolve(entryId + ".xml");
Start2FoldXmlParser.parseStability(chain, start2foldXml);
Start2FoldXmlParser.parseSpecificExperiment(chain, start2foldXml, experimentIds);
List<AminoAcid> earlyFoldingResidues = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(Start2FoldResidueAnnotation.class).isEarly()).collect(Collectors.toList());
List<Integer> functionalResidueNumbers = Start2FoldConstants.extractFunctionalResidueNumbers(split);
List<AminoAcid> functionalResidues = new ArrayList<>();
// do nothing if no annotation of functional residues exists
if (!functionalResidueNumbers.isEmpty()) {
FunctionalResidueParser.parse(chain, functionalResidueNumbers);
chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(FunctionalResidueAnnotation.class).isFunctional()).forEach(functionalResidues::add);
}
List<AminoAcid> strongResidues = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(Start2FoldResidueAnnotation.class).isStrong()).collect(Collectors.toList());
List<AminoAcid> orderedResidues = chain.aminoAcids().filter(aminoAcid -> !aminoAcid.getFeature(GenericSecondaryStructure.class).getSecondaryStructure().isCoilType()).collect(Collectors.toList());
List<AminoAcid> buriedResidues = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(AccessibleSurfaceArea.class).isBuried()).collect(Collectors.toList());
List<AminoAcid> residuesInEarlyFoldingSecondaryStructureElements = chain.aminoAcids().filter(aminoAcid -> !aminoAcid.getFeature(GenericSecondaryStructure.class).getSecondaryStructure().isCoilType()).filter(aminoAcid -> {
GenericSecondaryStructure.SecondaryStructureElement surroundingSecondaryStructureElement = aminoAcid.getFeature(GenericSecondaryStructure.class).getSurroundingSecondaryStructureElement(aminoAcid);
List<AminoAcid> surroundingAminoAcids = chain.getAminoAcids().subList(surroundingSecondaryStructureElement.getStart(), surroundingSecondaryStructureElement.getEnd() + 1);
return surroundingAminoAcids.stream().anyMatch(earlyFoldingResidues::contains);
}).collect(Collectors.toList());
List<AminoAcid> aromaticResidues = chain.aminoAcids().filter(AminoAcid.Filter.AROMATIC).collect(Collectors.toList());
List<ContactStructuralInformation> contactStructuralInformation = StructuralInformationParserService.getInstance().parseContactStructuralInformation(Start2FoldConstants.DATA_DIRECTORY.resolve("si").resolve("raw").resolve(entryId.toUpperCase() + ".out"), chain, earlyFoldingResidues);
ResidueGraph conventionalProteinGraph = ResidueGraph.createResidueGraph(chain, ContactDefinitionFactory.createAlphaCarbonContactDefinition(8.0));
ResidueGraphCalculations residueGraphCalculations = new ResidueGraphCalculations(conventionalProteinGraph);
try {
EvolutionaryCouplingParser.parsePlmScore(contactStructuralInformation, Jsoup.parse(Start2FoldConstants.newInputStream(Start2FoldConstants.COUPLING_DIRECTORY.resolve(entryId + "_ec.html")), "UTF-8", ""), chain.getAminoAcids().size());
} catch (Exception e) {
}
boolean ecAnnotation = contactStructuralInformation.stream().anyMatch(csi -> csi.getPlmScore() != 0.0);
PLIPInteractionContainer plipInteractionContainer = chain.getFeature(PLIPInteractionContainer.class);
System.out.println("efr: " + (earlyFoldingResidues.size() > 0) + " strong: " + (strongResidues.size() > 0) + " functional: " + (functionalResidues.size() > 0) + " couplings: " + ecAnnotation);
return Optional.of(contactStructuralInformation.stream().map(contact -> {
AminoAcid aminoAcid1 = chain.select().residueNumber(contact.getResidueIdentifier1()).asAminoAcid();
AminoAcid aminoAcid2 = chain.select().residueNumber(contact.getResidueIdentifier2()).asAminoAcid();
Pair<AminoAcid, AminoAcid> pair = new Pair<>(aminoAcid1, aminoAcid2);
ResidueTopologicPropertiesContainer residueTopologicPropertiesContainer1 = aminoAcid1.getFeature(ResidueTopologicPropertiesContainer.class);
ResidueTopologicPropertiesContainer residueTopologicPropertiesContainer2 = aminoAcid1.getFeature(ResidueTopologicPropertiesContainer.class);
LinearAlgebra.PrimitiveDoubleArrayLinearAlgebra contactCentroid = aminoAcid1.calculate().centroid().add(aminoAcid2.calculate().centroid()).divide(2);
return pdbId + "," + "A" + "," + contact.getResidueIdentifier1() + "," + contact.getAa1() + "," + contact.getResidueIdentifier2() + "," + contact.getAa2() + "," + contact.getContactDistanceBin() + "," + (contact.getContactDistanceBin() == ContactDistanceBin.LONG) + "," + (contact.getContactDistanceBin() == ContactDistanceBin.MEDIUM) + "," + (contact.getContactDistanceBin() == ContactDistanceBin.SHORT) + "," + StandardFormat.format(contactCentroid.distance(centroid)) + "," + StandardFormat.format(contact.getAverageRmsdIncrease()) + "," + StandardFormat.format(contact.getAverageTmScoreIncrease()) + "," + StandardFormat.format(contact.getAverageQIncrease()) + "," + StandardFormat.format(contact.getMaximumRmsdIncrease()) + "," + StandardFormat.format(contact.getMaximumTmScoreIncrease()) + "," + StandardFormat.format(contact.getMaximumQIncrease()) + "," + StandardFormat.format(contact.getAverageRmsdIncreaseZScore()) + "," + contact.getFractionOfTopScoringContacts() + "," + StandardFormat.format(contact.getPlmScore()) + "," + contact.getCouplingRank() + "," + contact.istop02() + "," + contact.isTop04() + "," + contact.isTop06() + "," + contact.isTop08() + "," + contact.isTop10() + "," + contact.isTop12() + "," + contact.isTop14() + "," + contact.isTop16() + "," + StandardFormat.format(residueGraphCalculations.betweenness(pair)) + "," + StandardFormat.format(0.5 * residueTopologicPropertiesContainer1.getConventional().getBetweenness() + 0.5 * residueTopologicPropertiesContainer2.getConventional().getBetweenness()) + "," + StandardFormat.format(0.5 * residueTopologicPropertiesContainer1.getConventional().getCloseness() + 0.5 * residueTopologicPropertiesContainer2.getConventional().getCloseness()) + "," + StandardFormat.format(0.5 * residueTopologicPropertiesContainer1.getConventional().getClusteringCoefficient() + 0.5 * residueTopologicPropertiesContainer2.getConventional().getClusteringCoefficient()) + "," + plipInteractionContainer.getHydrogenBonds().stream().anyMatch(hydrogenBond -> isContact(hydrogenBond, aminoAcid1, aminoAcid2)) + "," + plipInteractionContainer.getHydrophobicInteractions().stream().anyMatch(hydrophobicInteraction -> isContact(hydrophobicInteraction, aminoAcid1, aminoAcid2)) + "," + contact.isEarlyFoldingResidue() + "," + contact.isEarlyFoldingContact() + "," + residueIsInCollection(functionalResidues, contact.getResidueIdentifier1(), contact.getResidueIdentifier2()) + "," + contactIsInCollection(functionalResidues, contact.getResidueIdentifier1(), contact.getResidueIdentifier2()) + "," + residueIsInCollection(strongResidues, contact.getResidueIdentifier1(), contact.getResidueIdentifier2()) + "," + contactIsInCollection(strongResidues, contact.getResidueIdentifier1(), contact.getResidueIdentifier2()) + "," + residueIsInCollection(buriedResidues, contact.getResidueIdentifier1(), contact.getResidueIdentifier2()) + "," + contactIsInCollection(buriedResidues, contact.getResidueIdentifier1(), contact.getResidueIdentifier2()) + "," + residueIsInCollection(orderedResidues, contact.getResidueIdentifier1(), contact.getResidueIdentifier2()) + "," + contactIsInCollection(orderedResidues, contact.getResidueIdentifier1(), contact.getResidueIdentifier2()) + "," + residueIsInCollection(residuesInEarlyFoldingSecondaryStructureElements, contact.getResidueIdentifier1(), contact.getResidueIdentifier2()) + "," + contactIsInCollection(residuesInEarlyFoldingSecondaryStructureElements, contact.getResidueIdentifier1(), contact.getResidueIdentifier2()) + "," + residueIsInCollection(aromaticResidues, contact.getResidueIdentifier1(), contact.getResidueIdentifier2()) + "," + contactIsInCollection(aromaticResidues, contact.getResidueIdentifier1(), contact.getResidueIdentifier2()) + "," + (earlyFoldingResidues.size() > 0) + "," + (strongResidues.size() > 0) + "," + (functionalResidues.size() > 0) + "," + ecAnnotation;
}).collect(Collectors.joining(System.lineSeparator())));
} catch (Exception e) {
logger.info("calculation failed for {}\nby: {}", line, e.getMessage());
return Optional.empty();
}
}
Aggregations