use of de.bioforscher.jstructure.efr.model.si.ContactStructuralInformation in project jstructure by JonStargaryen.
the class StructuralInformationParserService method parseContactStructuralInformationFile.
public List<ContactStructuralInformation> parseContactStructuralInformationFile(InputStream inputStream, Chain chain, List<AminoAcid> earlyFoldingResidues) {
Map<Pair<Integer, Integer>, List<String>> parsingMap = new HashMap<>();
try (Stream<String> stream = new BufferedReader(new InputStreamReader(inputStream)).lines()) {
stream.forEach(line -> {
String[] split = line.split("\t");
String[] idSplit = split[0].split(",");
Pair<Integer, Integer> idPair = new Pair<>(Integer.valueOf(idSplit[0].split("\\(")[1].trim()), Integer.valueOf(idSplit[1].split("\\)")[0].trim()));
if (!parsingMap.containsKey(idPair)) {
parsingMap.put(idPair, new ArrayList<>());
}
parsingMap.get(idPair).add(line);
});
}
Map<Pair<Integer, Integer>, List<ReconstructionStructuralInformation>> reconstructionMap = new HashMap<>();
parsingMap.entrySet().stream().flatMap(entry -> {
String aa1 = chain.select().residueNumber(entry.getKey().getLeft()).asAminoAcid().getOneLetterCode();
String aa2 = chain.select().residueNumber(entry.getKey().getRight()).asAminoAcid().getOneLetterCode();
return entry.getValue().stream().map(line -> line.split("\t")).map(split -> new ReconstructionStructuralInformation(entry.getKey().getLeft(), aa1, entry.getKey().getRight(), aa2, ContactDistanceBin.resolve(new Pair<>(IdentifierFactory.createResidueIdentifier(entry.getKey().getLeft()), IdentifierFactory.createResidueIdentifier(entry.getKey().getRight()))).orElse(null), split[1].equals("true"), Double.valueOf(split[2]), Double.valueOf(split[3]), Double.valueOf(split[4]), Double.valueOf(split[5]), Double.valueOf(split[6]), Double.valueOf(split[7]), Double.valueOf(split[8]), Double.valueOf(split[9]), Double.valueOf(split[10])));
}).forEach(rsi -> {
Pair<Integer, Integer> idPair = new Pair<>(rsi.getResidueIdentifier1(), rsi.getResidueIdentifier2());
if (!reconstructionMap.containsKey(idPair)) {
reconstructionMap.put(idPair, new ArrayList<>());
}
reconstructionMap.get(idPair).add(rsi);
});
List<ReconstructionStructuralInformation> reconstructionStructuralInformation = reconstructionMap.values().stream().flatMap(Collection::stream).collect(Collectors.toList());
int numberOfReconstructions = reconstructionStructuralInformation.size();
double averageRmsd = reconstructionStructuralInformation.stream().mapToDouble(ReconstructionStructuralInformation::getRmsdIncrease).average().orElse(0.0);
double standardDeviationRmsd = new StandardDeviation().evaluate(reconstructionStructuralInformation.stream().mapToDouble(ReconstructionStructuralInformation::getRmsdIncrease).toArray());
double averageMaximumRmsd = reconstructionMap.entrySet().stream().mapToDouble(entry -> entry.getValue().stream().mapToDouble(ReconstructionStructuralInformation::getRmsdIncrease).max().orElse(0.0)).average().orElse(0.0);
double standardDeviationMaximumRmsd = new StandardDeviation().evaluate(reconstructionMap.entrySet().stream().mapToDouble(entry -> entry.getValue().stream().mapToDouble(ReconstructionStructuralInformation::getRmsdIncrease).max().orElse(0.0)).toArray());
List<ReconstructionStructuralInformation> topScoringReconstructions = reconstructionMap.values().stream().flatMap(Collection::stream).sorted(Comparator.comparingDouble(ReconstructionStructuralInformation::getRmsdIncrease).reversed()).limit((int) (0.1 * numberOfReconstructions)).collect(Collectors.toList());
return reconstructionMap.entrySet().stream().map(entry -> {
List<ReconstructionStructuralInformation> values = entry.getValue();
ReconstructionStructuralInformation reference = values.get(0);
return new ContactStructuralInformation(reference.getResidueIdentifier1(), reference.getAa1(), reference.getResidueIdentifier2(), reference.getAa2(), reference.getContactDistanceBin(), computeAverage(values, ReconstructionStructuralInformation::getBaselineRmsd), computeAverage(values, ReconstructionStructuralInformation::getBaselineTmScore), computeAverage(values, ReconstructionStructuralInformation::getBaselineQ), computeAverage(values, ReconstructionStructuralInformation::getRmsdIncrease), computeAverage(values, ReconstructionStructuralInformation::getTmScoreIncrease), computeAverage(values, ReconstructionStructuralInformation::getqIncrease), computeMaximum(values, ReconstructionStructuralInformation::getRmsdIncrease), computeMaximum(values, ReconstructionStructuralInformation::getTmScoreIncrease), computeMaximum(values, ReconstructionStructuralInformation::getqIncrease), residueIsInCollection(earlyFoldingResidues, entry.getKey().getLeft(), entry.getKey().getRight()), contactIsInCollection(earlyFoldingResidues, entry.getKey().getLeft(), entry.getKey().getRight()), averageRmsd, standardDeviationRmsd, averageMaximumRmsd, standardDeviationMaximumRmsd, reconstructionStructuralInformation, topScoringReconstructions, values.stream().map(ReconstructionStructuralInformation::getRmsdIncrease).collect(Collectors.toList()));
}).collect(Collectors.toList());
}
use of de.bioforscher.jstructure.efr.model.si.ContactStructuralInformation in project jstructure by JonStargaryen.
the class StructuralInformationParserServiceTest method shouldPrintStructuralInformationByResidue.
@Test
public void shouldPrintStructuralInformationByResidue() {
Chain chain = StructureParser.fromPdbId("1bdd").parse().getFirstChain();
List<AminoAcid> aminoAcids = chain.aminoAcids().collect(Collectors.toList());
Start2FoldXmlParser.parseSpecificExperiment(chain, TestUtils.getResourceAsInputStream("efr/STF0045.xml"), Stream.of(185).collect(Collectors.toList()));
List<AminoAcid> earlyFoldingResidues = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(Start2FoldResidueAnnotation.class).isEarly()).collect(Collectors.toList());
List<ContactStructuralInformation> contactStructuralInformation = StructuralInformationParserService.getInstance().parseContactStructuralInformationFile(TestUtils.getResourceAsInputStream("si/STF0045.out"), chain, earlyFoldingResidues);
List<ResidueStructuralInformation> residueStructuralInformation = StructuralInformationParserService.getInstance().composeResidueStructuralInformation(aminoAcids, earlyFoldingResidues, contactStructuralInformation);
ResidueGraph residueGraph = ResidueGraph.createDistanceResidueGraph(chain);
ResidueGraphCalculations residueGraphCalculations = new ResidueGraphCalculations(residueGraph);
residueStructuralInformation.stream().map(si -> {
AminoAcid aminoAcid = chain.select().residueIdentifier(IdentifierFactory.createResidueIdentifier(si.getResidueIdentifier())).asAminoAcid();
double betweenness = residueGraphCalculations.betweenness(aminoAcid);
double closeness = residueGraphCalculations.closeness(aminoAcid);
double cc = residueGraphCalculations.clusteringCoefficient(aminoAcid);
int degree = residueGraph.degreeOf(aminoAcid);
return StandardFormat.format(si.getAverageRmsdIncrease()) + "," + StandardFormat.format(si.getAverageTmScoreIncrease()) + "," + StandardFormat.format(si.getAverageQIncrease()) + "," + StandardFormat.format(si.getMaximumRmsdIncrease()) + "," + StandardFormat.format(si.getMaximumTmScoreIncrease()) + "," + StandardFormat.format(si.getMaximumQIncrease()) + "," + StandardFormat.format(betweenness) + "," + StandardFormat.format(closeness) + "," + StandardFormat.format(cc) + "," + StandardFormat.format(degree);
}).forEach(System.out::println);
}
use of de.bioforscher.jstructure.efr.model.si.ContactStructuralInformation in project jstructure by JonStargaryen.
the class StructuralInformationParserServiceTest method shouldPrintStructuralInformationByContact.
@Test
public void shouldPrintStructuralInformationByContact() {
Chain chain = StructureParser.fromPdbId("1bdd").parse().getFirstChain();
List<AminoAcid> aminoAcids = chain.aminoAcids().collect(Collectors.toList());
Start2FoldXmlParser.parseSpecificExperiment(chain, TestUtils.getResourceAsInputStream("efr/STF0045.xml"), Stream.of(185).collect(Collectors.toList()));
List<AminoAcid> earlyFoldingResidues = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(Start2FoldResidueAnnotation.class).isEarly()).collect(Collectors.toList());
List<ContactStructuralInformation> contactStructuralInformation = StructuralInformationParserService.getInstance().parseContactStructuralInformationFile(TestUtils.getResourceAsInputStream("si/STF0045.out"), chain, earlyFoldingResidues);
ResidueGraph residueGraph = ResidueGraph.createDistanceResidueGraph(chain);
ResidueGraphCalculations residueGraphCalculations = new ResidueGraphCalculations(residueGraph);
contactStructuralInformation.stream().map(si -> {
AminoAcid aminoAcid1 = chain.select().residueIdentifier(IdentifierFactory.createResidueIdentifier(si.getResidueIdentifier1())).asAminoAcid();
AminoAcid aminoAcid2 = chain.select().residueIdentifier(IdentifierFactory.createResidueIdentifier(si.getResidueIdentifier2())).asAminoAcid();
Pair<AminoAcid, AminoAcid> pair = new Pair<>(aminoAcid1, aminoAcid2);
double betweenness = residueGraphCalculations.betweenness(pair);
return StandardFormat.format(si.getAverageRmsdIncrease()) + "," + StandardFormat.format(si.getAverageTmScoreIncrease()) + "," + StandardFormat.format(si.getAverageQIncrease()) + "," + StandardFormat.format(si.getMaximumRmsdIncrease()) + "," + StandardFormat.format(si.getMaximumTmScoreIncrease()) + "," + StandardFormat.format(si.getMaximumQIncrease()) + "," + StandardFormat.format(betweenness);
}).forEach(System.out::println);
}
use of de.bioforscher.jstructure.efr.model.si.ContactStructuralInformation in project jstructure by JonStargaryen.
the class EvolutionaryCouplingParser method parsePlmScore.
public static void parsePlmScore(List<ContactStructuralInformation> contacts, Document document, int numberOfResidues) {
Element table = document.getElementsByTag("table").first();
contacts.forEach(contact -> parsePlmScore(contact, table));
contacts.sort(Comparator.comparingDouble(ContactStructuralInformation::getCouplingRank));
double fractionTopScoring02 = 0.2;
int contactsToSelect02 = (int) (fractionTopScoring02 * numberOfResidues);
contacts.stream().filter(contactStructuralInformation -> contactStructuralInformation.getCouplingRank() > 0).limit(contactsToSelect02).forEach(ContactStructuralInformation::markAsTopScoringContact02);
double fractionTopScoring04 = 0.4;
int contactsToSelect04 = (int) (fractionTopScoring04 * numberOfResidues);
contacts.stream().filter(contactStructuralInformation -> contactStructuralInformation.getCouplingRank() > 0).limit(contactsToSelect04).forEach(ContactStructuralInformation::markAsTopScoringContact04);
double fractionTopScoring06 = 0.6;
int contactsToSelect06 = (int) (fractionTopScoring06 * numberOfResidues);
contacts.stream().filter(contactStructuralInformation -> contactStructuralInformation.getCouplingRank() > 0).limit(contactsToSelect06).forEach(ContactStructuralInformation::markAsTopScoringContact06);
double fractionTopScoring08 = 0.8;
int contactsToSelect08 = (int) (fractionTopScoring08 * numberOfResidues);
contacts.stream().filter(contactStructuralInformation -> contactStructuralInformation.getCouplingRank() > 0).limit(contactsToSelect08).forEach(ContactStructuralInformation::markAsTopScoringContact08);
double fractionTopScoring10 = 1.0;
int contactsToSelect10 = (int) (fractionTopScoring10 * numberOfResidues);
contacts.stream().filter(contactStructuralInformation -> contactStructuralInformation.getCouplingRank() > 0).limit(contactsToSelect10).forEach(ContactStructuralInformation::markAsTopScoringContact10);
double fractionTopScoring12 = 1.2;
int contactsToSelect12 = (int) (fractionTopScoring12 * numberOfResidues);
contacts.stream().filter(contactStructuralInformation -> contactStructuralInformation.getCouplingRank() > 0).limit(contactsToSelect12).forEach(ContactStructuralInformation::markAsTopScoringContact12);
double fractionTopScoring14 = 1.4;
int contactsToSelect14 = (int) (fractionTopScoring14 * numberOfResidues);
contacts.stream().filter(contactStructuralInformation -> contactStructuralInformation.getCouplingRank() > 0).limit(contactsToSelect14).forEach(ContactStructuralInformation::markAsTopScoringContact14);
double fractionTopScoring16 = 1.6;
int contactsToSelect16 = (int) (fractionTopScoring16 * numberOfResidues);
contacts.stream().filter(contactStructuralInformation -> contactStructuralInformation.getCouplingRank() > 0).limit(contactsToSelect16).forEach(ContactStructuralInformation::markAsTopScoringContact16);
}
use of de.bioforscher.jstructure.efr.model.si.ContactStructuralInformation in project jstructure by JonStargaryen.
the class A07_WriteStructuralInformationByContactCsv method handleLine.
private static Optional<String> handleLine(String line) {
try {
System.out.println(line);
String[] split = line.split(";");
String entryId = split[0];
String pdbId = split[1];
List<Integer> experimentIds = Pattern.compile(",").splitAsStream(split[2].replaceAll("\\[", "").replaceAll("]", "")).map(Integer::valueOf).collect(Collectors.toList());
// boolean sane = split[6].equalsIgnoreCase("true");
Structure structure = StructureParser.fromPdbId(pdbId).parse();
Chain chain = structure.chains().findFirst().get();
LinearAlgebra.PrimitiveDoubleArrayLinearAlgebra centroid = chain.calculate().centroid();
Path start2foldXml = Start2FoldConstants.XML_DIRECTORY.resolve(entryId + ".xml");
Start2FoldXmlParser.parseStability(chain, start2foldXml);
Start2FoldXmlParser.parseSpecificExperiment(chain, start2foldXml, experimentIds);
List<AminoAcid> earlyFoldingResidues = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(Start2FoldResidueAnnotation.class).isEarly()).collect(Collectors.toList());
List<Integer> functionalResidueNumbers = Start2FoldConstants.extractFunctionalResidueNumbers(split);
List<AminoAcid> functionalResidues = new ArrayList<>();
// do nothing if no annotation of functional residues exists
if (!functionalResidueNumbers.isEmpty()) {
FunctionalResidueParser.parse(chain, functionalResidueNumbers);
chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(FunctionalResidueAnnotation.class).isFunctional()).forEach(functionalResidues::add);
}
List<AminoAcid> strongResidues = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(Start2FoldResidueAnnotation.class).isStrong()).collect(Collectors.toList());
List<AminoAcid> orderedResidues = chain.aminoAcids().filter(aminoAcid -> !aminoAcid.getFeature(GenericSecondaryStructure.class).getSecondaryStructure().isCoilType()).collect(Collectors.toList());
List<AminoAcid> buriedResidues = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(AccessibleSurfaceArea.class).isBuried()).collect(Collectors.toList());
List<AminoAcid> residuesInEarlyFoldingSecondaryStructureElements = chain.aminoAcids().filter(aminoAcid -> !aminoAcid.getFeature(GenericSecondaryStructure.class).getSecondaryStructure().isCoilType()).filter(aminoAcid -> {
GenericSecondaryStructure.SecondaryStructureElement surroundingSecondaryStructureElement = aminoAcid.getFeature(GenericSecondaryStructure.class).getSurroundingSecondaryStructureElement(aminoAcid);
List<AminoAcid> surroundingAminoAcids = chain.getAminoAcids().subList(surroundingSecondaryStructureElement.getStart(), surroundingSecondaryStructureElement.getEnd() + 1);
return surroundingAminoAcids.stream().anyMatch(earlyFoldingResidues::contains);
}).collect(Collectors.toList());
List<AminoAcid> aromaticResidues = chain.aminoAcids().filter(AminoAcid.Filter.AROMATIC).collect(Collectors.toList());
List<ContactStructuralInformation> contactStructuralInformation = StructuralInformationParserService.getInstance().parseContactStructuralInformation(Start2FoldConstants.DATA_DIRECTORY.resolve("si").resolve("raw").resolve(entryId.toUpperCase() + ".out"), chain, earlyFoldingResidues);
ResidueGraph conventionalProteinGraph = ResidueGraph.createResidueGraph(chain, ContactDefinitionFactory.createAlphaCarbonContactDefinition(8.0));
ResidueGraphCalculations residueGraphCalculations = new ResidueGraphCalculations(conventionalProteinGraph);
try {
EvolutionaryCouplingParser.parsePlmScore(contactStructuralInformation, Jsoup.parse(Start2FoldConstants.newInputStream(Start2FoldConstants.COUPLING_DIRECTORY.resolve(entryId + "_ec.html")), "UTF-8", ""), chain.getAminoAcids().size());
} catch (Exception e) {
}
boolean ecAnnotation = contactStructuralInformation.stream().anyMatch(csi -> csi.getPlmScore() != 0.0);
PLIPInteractionContainer plipInteractionContainer = chain.getFeature(PLIPInteractionContainer.class);
System.out.println("efr: " + (earlyFoldingResidues.size() > 0) + " strong: " + (strongResidues.size() > 0) + " functional: " + (functionalResidues.size() > 0) + " couplings: " + ecAnnotation);
return Optional.of(contactStructuralInformation.stream().map(contact -> {
AminoAcid aminoAcid1 = chain.select().residueNumber(contact.getResidueIdentifier1()).asAminoAcid();
AminoAcid aminoAcid2 = chain.select().residueNumber(contact.getResidueIdentifier2()).asAminoAcid();
Pair<AminoAcid, AminoAcid> pair = new Pair<>(aminoAcid1, aminoAcid2);
ResidueTopologicPropertiesContainer residueTopologicPropertiesContainer1 = aminoAcid1.getFeature(ResidueTopologicPropertiesContainer.class);
ResidueTopologicPropertiesContainer residueTopologicPropertiesContainer2 = aminoAcid1.getFeature(ResidueTopologicPropertiesContainer.class);
LinearAlgebra.PrimitiveDoubleArrayLinearAlgebra contactCentroid = aminoAcid1.calculate().centroid().add(aminoAcid2.calculate().centroid()).divide(2);
return pdbId + "," + "A" + "," + contact.getResidueIdentifier1() + "," + contact.getAa1() + "," + contact.getResidueIdentifier2() + "," + contact.getAa2() + "," + contact.getContactDistanceBin() + "," + (contact.getContactDistanceBin() == ContactDistanceBin.LONG) + "," + (contact.getContactDistanceBin() == ContactDistanceBin.MEDIUM) + "," + (contact.getContactDistanceBin() == ContactDistanceBin.SHORT) + "," + StandardFormat.format(contactCentroid.distance(centroid)) + "," + StandardFormat.format(contact.getAverageRmsdIncrease()) + "," + StandardFormat.format(contact.getAverageTmScoreIncrease()) + "," + StandardFormat.format(contact.getAverageQIncrease()) + "," + StandardFormat.format(contact.getMaximumRmsdIncrease()) + "," + StandardFormat.format(contact.getMaximumTmScoreIncrease()) + "," + StandardFormat.format(contact.getMaximumQIncrease()) + "," + StandardFormat.format(contact.getAverageRmsdIncreaseZScore()) + "," + contact.getFractionOfTopScoringContacts() + "," + StandardFormat.format(contact.getPlmScore()) + "," + contact.getCouplingRank() + "," + contact.istop02() + "," + contact.isTop04() + "," + contact.isTop06() + "," + contact.isTop08() + "," + contact.isTop10() + "," + contact.isTop12() + "," + contact.isTop14() + "," + contact.isTop16() + "," + StandardFormat.format(residueGraphCalculations.betweenness(pair)) + "," + StandardFormat.format(0.5 * residueTopologicPropertiesContainer1.getConventional().getBetweenness() + 0.5 * residueTopologicPropertiesContainer2.getConventional().getBetweenness()) + "," + StandardFormat.format(0.5 * residueTopologicPropertiesContainer1.getConventional().getCloseness() + 0.5 * residueTopologicPropertiesContainer2.getConventional().getCloseness()) + "," + StandardFormat.format(0.5 * residueTopologicPropertiesContainer1.getConventional().getClusteringCoefficient() + 0.5 * residueTopologicPropertiesContainer2.getConventional().getClusteringCoefficient()) + "," + plipInteractionContainer.getHydrogenBonds().stream().anyMatch(hydrogenBond -> isContact(hydrogenBond, aminoAcid1, aminoAcid2)) + "," + plipInteractionContainer.getHydrophobicInteractions().stream().anyMatch(hydrophobicInteraction -> isContact(hydrophobicInteraction, aminoAcid1, aminoAcid2)) + "," + contact.isEarlyFoldingResidue() + "," + contact.isEarlyFoldingContact() + "," + residueIsInCollection(functionalResidues, contact.getResidueIdentifier1(), contact.getResidueIdentifier2()) + "," + contactIsInCollection(functionalResidues, contact.getResidueIdentifier1(), contact.getResidueIdentifier2()) + "," + residueIsInCollection(strongResidues, contact.getResidueIdentifier1(), contact.getResidueIdentifier2()) + "," + contactIsInCollection(strongResidues, contact.getResidueIdentifier1(), contact.getResidueIdentifier2()) + "," + residueIsInCollection(buriedResidues, contact.getResidueIdentifier1(), contact.getResidueIdentifier2()) + "," + contactIsInCollection(buriedResidues, contact.getResidueIdentifier1(), contact.getResidueIdentifier2()) + "," + residueIsInCollection(orderedResidues, contact.getResidueIdentifier1(), contact.getResidueIdentifier2()) + "," + contactIsInCollection(orderedResidues, contact.getResidueIdentifier1(), contact.getResidueIdentifier2()) + "," + residueIsInCollection(residuesInEarlyFoldingSecondaryStructureElements, contact.getResidueIdentifier1(), contact.getResidueIdentifier2()) + "," + contactIsInCollection(residuesInEarlyFoldingSecondaryStructureElements, contact.getResidueIdentifier1(), contact.getResidueIdentifier2()) + "," + residueIsInCollection(aromaticResidues, contact.getResidueIdentifier1(), contact.getResidueIdentifier2()) + "," + contactIsInCollection(aromaticResidues, contact.getResidueIdentifier1(), contact.getResidueIdentifier2()) + "," + (earlyFoldingResidues.size() > 0) + "," + (strongResidues.size() > 0) + "," + (functionalResidues.size() > 0) + "," + ecAnnotation;
}).collect(Collectors.joining(System.lineSeparator())));
} catch (Exception e) {
logger.info("calculation failed for {}\nby: {}", line, e.getMessage());
return Optional.empty();
}
}
Aggregations