use of de.bioforscher.jstructure.model.structure.Structure in project jstructure by JonStargaryen.
the class DictionaryOfProteinSecondaryStructureTest method shouldClearPseudoAtomsAfterRun.
@Test
public void shouldClearPseudoAtomsAfterRun() {
Structure protein = StructureParser.fromPdbId(ID).parse();
featureProvider.process(protein);
boolean containsPseudoHydrogenLine = protein.getPdbRepresentation().contains("ATOM 0 H");
Assert.assertFalse("pseudo-atoms were not removed!", containsPseudoHydrogenLine);
}
use of de.bioforscher.jstructure.model.structure.Structure in project jstructure by JonStargaryen.
the class DictionaryOfProteinSecondaryStructureTest method test1bta.
@Test
public void test1bta() {
Structure structure = StructureParser.fromPdbId("1bta").parse();
featureProvider.process(structure);
structure.aminoAcids().forEach(aminoAcid -> {
System.out.println(aminoAcid);
DSSPSecondaryStructure sse = aminoAcid.getFeature(DSSPSecondaryStructure.class);
System.out.println(sse.getSecondaryStructure().name());
});
System.out.println(structure.getFirstChain().aminoAcids().map(aminoAcid -> aminoAcid.getFeature(DSSPSecondaryStructure.class).getSecondaryStructure().getOneLetterRepresentation()).collect(Collectors.joining()));
}
use of de.bioforscher.jstructure.model.structure.Structure in project jstructure by JonStargaryen.
the class A03_PrintStart2FoldDatasetTable method handleLine.
private static String handleLine(String line) {
try {
String[] split = line.split(";");
String entryId = split[0];
String pdbId = split[1];
List<Integer> experimentIds = Pattern.compile(",").splitAsStream(split[2].replaceAll("\\[", "").replaceAll("]", "")).map(Integer::valueOf).collect(Collectors.toList());
Structure structure = StructureParser.fromPdbId(pdbId).parse();
Chain chain = structure.chains().findFirst().get();
Start2FoldXmlParser.parseStability(chain, Start2FoldConstants.XML_DIRECTORY.resolve(entryId + ".xml"));
Start2FoldXmlParser.parseSpecificExperiment(chain, Start2FoldConstants.XML_DIRECTORY.resolve(entryId + ".xml"), experimentIds);
List<AminoAcid> earlyFoldingResidues = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(Start2FoldResidueAnnotation.class).isEarly()).collect(Collectors.toList());
List<AminoAcid> stableResidues = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(Start2FoldResidueAnnotation.class).isStrong()).collect(Collectors.toList());
List<Integer> functionalResidueNumbers = Start2FoldConstants.extractFunctionalResidueNumbers(split);
List<AminoAcid> functionalResidues = new ArrayList<>();
// do nothing if no annotation of functional residues exists
if (!functionalResidueNumbers.isEmpty()) {
FunctionalResidueParser.parse(chain, functionalResidueNumbers);
chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(FunctionalResidueAnnotation.class).isFunctional()).forEach(functionalResidues::add);
}
List<AminoAcid> aminoAcids = chain.aminoAcids().collect(Collectors.toList());
long intersection = earlyFoldingResidues.stream().filter(functionalResidues::contains).count();
return entryId + "\t" + pdbId + "\t" + split[2] + "\t" + aminoAcids.size() + "\t" + earlyFoldingResidues.size() + "\t" + functionalResidues.size() + "\t" + intersection;
} catch (Exception e) {
e.printStackTrace();
return "";
}
}
use of de.bioforscher.jstructure.model.structure.Structure in project jstructure by JonStargaryen.
the class A07_WriteCouplingRangeCsv method handleLine.
private static Optional<String> handleLine(String line) {
try {
System.out.println(line);
String[] split = line.split(";");
String entryId = split[0];
String pdbId = split[1];
List<Integer> experimentIds = Pattern.compile(",").splitAsStream(split[2].replaceAll("\\[", "").replaceAll("]", "")).map(Integer::valueOf).collect(Collectors.toList());
Structure structure = StructureParser.fromPdbId(pdbId).parse();
Chain chain = structure.chains().findFirst().get();
Start2FoldXmlParser.parseSpecificExperiment(chain, Start2FoldConstants.XML_DIRECTORY.resolve(entryId + ".xml"), experimentIds);
EvolutionaryCouplingParser.parseHotSpotFile(chain, Start2FoldConstants.COUPLING_DIRECTORY.resolve(entryId.toUpperCase() + "_hs.html"));
List<AminoAcid> earlyFoldingResidues = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(Start2FoldResidueAnnotation.class).isEarly()).collect(Collectors.toList());
Map<Integer, List<Double>> localPlmScores = new HashMap<>();
Map<Integer, List<Double>> longRangePlmScores = new HashMap<>();
Document hotSpotDocument = Jsoup.parse(Files.readAllLines(Paths.get("/home/bittrich/git/phd_sb_repo/data/start2fold/coupling/" + entryId + "_ec.html")).stream().collect(Collectors.joining(System.lineSeparator())));
List<AminoAcid> aminoAcids = chain.aminoAcids().collect(Collectors.toList());
for (int i = 0; i < aminoAcids.size(); i++) {
localPlmScores.put(i, new ArrayList<>());
longRangePlmScores.put(i, new ArrayList<>());
}
hotSpotDocument.getElementsByTag("tr").stream().skip(1).forEach(element -> {
Elements tds = element.getElementsByTag("td");
int residueNumber1 = Integer.valueOf(tds.get(2).text()) - 1;
int residueNumber2 = Integer.valueOf(tds.get(4).text()) - 1;
double plmScore = Double.valueOf(tds.get(6).text());
boolean localContact = Math.abs(residueNumber1 - residueNumber2) < 6;
if (localContact) {
System.out.println("local contact: " + element.text());
localPlmScores.get(residueNumber1).add(plmScore);
localPlmScores.get(residueNumber2).add(plmScore);
} else {
System.out.println("long-range contact: " + element.text());
longRangePlmScores.get(residueNumber1).add(plmScore);
longRangePlmScores.get(residueNumber2).add(plmScore);
}
});
return Optional.of(aminoAcids.stream().map(aminoAcid -> pdbId + ",A," + aminoAcid.getOneLetterCode() + "," + aminoAcid.getResidueIdentifier().getResidueNumber() + "," + (earlyFoldingResidues.contains(aminoAcid) ? "early" : "late") + "," + "local," + StandardFormat.format(localPlmScores.get(aminoAcid.getResidueIndex()).stream().mapToDouble(Double::valueOf).average().orElse(0.0)) + System.lineSeparator() + pdbId + ",A," + aminoAcid.getOneLetterCode() + "," + aminoAcid.getResidueIdentifier().getResidueNumber() + "," + (earlyFoldingResidues.contains(aminoAcid) ? "early" : "late") + "," + "long-range," + StandardFormat.format(longRangePlmScores.get(aminoAcid.getResidueIndex()).stream().mapToDouble(Double::valueOf).average().orElse(0.0))).collect(Collectors.joining(System.lineSeparator())));
} catch (Exception e) {
e.printStackTrace();
return Optional.empty();
}
}
use of de.bioforscher.jstructure.model.structure.Structure in project jstructure by JonStargaryen.
the class A03_WriteDatasetCsv method handleLine.
private static Optional<String> handleLine(String line) {
try {
System.out.println(line);
String[] split = line.split(";");
String entryId = split[0];
String pdbId = split[1];
List<Integer> experimentIds = Pattern.compile(",").splitAsStream(split[2].replaceAll("\\[", "").replaceAll("]", "")).map(Integer::valueOf).collect(Collectors.toList());
Structure structure = StructureParser.fromPdbId(pdbId).parse();
Chain chain = structure.chains().findFirst().get();
Start2FoldXmlParser.parseSpecificExperiment(chain, Start2FoldConstants.XML_DIRECTORY.resolve(entryId + ".xml"), experimentIds);
EvolutionaryCouplingParser.parseHotSpotFile(chain, Start2FoldConstants.COUPLING_DIRECTORY.resolve(entryId.toUpperCase() + "_hs.html"));
EQuantParser.parseEQuantFile(chain, Start2FoldConstants.EQUANT_DIRECTORY.resolve(entryId.toLowerCase() + ".equant-small.txt"));
List<AminoAcid> earlyFoldingResidues = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(Start2FoldResidueAnnotation.class).isEarly()).collect(Collectors.toList());
List<Integer> functionalResidueNumbers = Start2FoldConstants.extractFunctioanlResidueNumbers(split);
List<AminoAcid> functionalResidues = new ArrayList<>();
// do nothing if no annotation of functional residues exists
if (!functionalResidueNumbers.isEmpty()) {
FunctionalResidueParser.parse(chain, functionalResidueNumbers);
chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(FunctionalResidueAnnotation.class).isFunctional()).forEach(functionalResidues::add);
}
List<AminoAcid> aminoAcids = chain.aminoAcids().collect(Collectors.toList());
ProteinGraph conventionalProteinGraph = ProteinGraphFactory.createProteinGraph(chain, ProteinGraphFactory.InteractionScheme.CALPHA8);
return Optional.of(chain.aminoAcids().map(aminoAcid -> {
GenericSecondaryStructure sse = aminoAcid.getFeature(GenericSecondaryStructure.class);
HotSpotScoring hotSpotScoring = aminoAcid.getFeature(HotSpotScoring.class);
PLIPInteractionContainer plipInteractionContainer = aminoAcid.getFeature(PLIPInteractionContainer.class);
PLIPInteractionContainer nonLocalPlipInteractionContainer = new PLIPInteractionContainer(null, plipInteractionContainer.getInteractions().stream().filter(inter -> Math.abs(inter.getPartner1().getResidueIndex() - inter.getPartner2().getResidueIndex()) > 5).collect(Collectors.toList()));
PLIPInteractionContainer localPlipInteractionContainer = new PLIPInteractionContainer(null, plipInteractionContainer.getInteractions().stream().filter(inter -> !nonLocalPlipInteractionContainer.getInteractions().contains(inter)).collect(Collectors.toList()));
String equantScore = "NA";
try {
equantScore = StandardFormat.format(aminoAcid.getFeature(EQuantScore.class).getEvaluation());
} catch (ComputationException e) {
logger.warn("missing equant scoring for {}", aminoAcid);
}
String functionalAnnotation = "NA";
if (functionalResidues.size() > 0) {
functionalAnnotation = functionalResidues.contains(aminoAcid) ? "functional" : "non-functional";
}
ResidueTopologicPropertiesContainer residueTopologicPropertiesContainer = aminoAcid.getFeature(ResidueTopologicPropertiesContainer.class);
double terminusDistance = aminoAcids.indexOf(aminoAcid);
terminusDistance = Math.min(terminusDistance, aminoAcids.size() - terminusDistance);
terminusDistance /= (double) aminoAcids.size();
return pdbId + "," + "A" + "," + aminoAcid.getResidueIdentifier() + "," + aminoAcid.getOneLetterCode() + "," + sse.getSecondaryStructure().getReducedRepresentation() + "," + sse.getSecondaryStructure().getOneLetterRepresentation() + "," + sse.getSurroundingSecondaryStructureElement(aminoAcid).getSize() + "," + (aminoAcid.getFeature(AccessibleSurfaceArea.class).isExposed() ? "exposed" : "buried") + "," + StandardFormat.format(aminoAcid.getFeature(GeometricProperties.class).getDistanceToCentroid()) + "," + StandardFormat.format(terminusDistance) + "," + plipInteractionContainer.getHydrogenBonds().size() + "," + plipInteractionContainer.getHydrophobicInteractions().size() + "," + plipInteractionContainer.getBackboneInteractions().size() + "," + plipInteractionContainer.getInteractions().size() + "," + localPlipInteractionContainer.getHydrogenBonds().size() + "," + localPlipInteractionContainer.getHydrophobicInteractions().size() + "," + localPlipInteractionContainer.getBackboneInteractions().size() + "," + localPlipInteractionContainer.getInteractions().size() + "," + nonLocalPlipInteractionContainer.getHydrogenBonds().size() + "," + nonLocalPlipInteractionContainer.getHydrophobicInteractions().size() + "," + nonLocalPlipInteractionContainer.getBackboneInteractions().size() + "," + nonLocalPlipInteractionContainer.getInteractions().size() + "," + StandardFormat.format(aminoAcid.getFeature(EnergyProfile.class).getSolvationEnergy()) + "," + StandardFormat.format(aminoAcid.getFeature(EgorAgreement.class).getEgorPrediction()) + "," + equantScore + "," + StandardFormat.format(aminoAcid.getFeature(AccessibleSurfaceArea.class).getRelativeAccessibleSurfaceArea()) + "," + StandardFormat.format(aminoAcid.getFeature(LoopFraction.class).getLoopFraction()) + "," + hotSpotScoring.getEcCount() + "," + StandardFormat.format(hotSpotScoring.getCumStrength()) + "," + StandardFormat.format(hotSpotScoring.getEcStrength()) + "," + hotSpotScoring.getConservation() + "," + StandardFormat.format(residueTopologicPropertiesContainer.getFullPlip().getBetweenness()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getFullPlip().getCloseness()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getFullPlip().getClusteringCoefficient()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getHydrogenPlip().getBetweenness()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getHydrogenPlip().getCloseness()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getHydrogenPlip().getClusteringCoefficient()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getHydrophobicPlip().getBetweenness()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getHydrophobicPlip().getCloseness()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getHydrophobicPlip().getClusteringCoefficient()) + "," + conventionalProteinGraph.getContactsOf(aminoAcid).size() + "," + conventionalProteinGraph.getLocalContactsOf(aminoAcid).size() + "," + conventionalProteinGraph.getNonLocalContactsOf(aminoAcid).size() + "," + StandardFormat.format(residueTopologicPropertiesContainer.getConventional().getBetweenness()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getConventional().getCloseness()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getConventional().getClusteringCoefficient()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getFullPlip().getDistinctNeighborhoodCount()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getConventional().getDistinctNeighborhoodCount()) + "," + (earlyFoldingResidues.contains(aminoAcid) ? "early" : "late") + "," + functionalAnnotation;
}).collect(Collectors.joining(System.lineSeparator())));
} catch (Exception e) {
logger.info("calculation failed for {}", line, e);
return Optional.empty();
}
}
Aggregations