use of de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid in project jstructure by JonStargaryen.
the class A01_ReportGeneralStatistics method handleStrongFile.
private static void handleStrongFile(Path path) {
try {
String pdbId = Jsoup.parse(path.toFile(), "UTF-8").getElementsByTag("protein").attr("pdb_id");
Structure structure = StructureParser.fromPdbId(pdbId).parse();
Chain chain = structure.chains().findFirst().get();
Start2FoldXmlParser.parse(chain, path);
boolean hasStabilityData = chain.aminoAcids().map(aminoAcid -> aminoAcid.getFeature(Start2FoldResidueAnnotation.class)).anyMatch(Start2FoldResidueAnnotation::isStrong);
if (!hasStabilityData) {
return;
}
long count = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(Start2FoldResidueAnnotation.class).isStrong()).count();
strong.add((int) count);
weak.add((int) (chain.aminoAcids().count() - count));
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
use of de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid in project jstructure by JonStargaryen.
the class A03_WriteFunctionalClassificationArff method handleLine.
private static Optional<String> handleLine(String line) {
try {
logger.info("handling {}", line);
String[] split = line.split(";");
String pdbId = split[1];
Structure structure = StructureParser.fromPdbId(pdbId).parse();
Chain chain = structure.getFirstChain();
List<AminoAcid> aminoAcids = chain.aminoAcids().collect(Collectors.toList());
aminoAcids.forEach(RawFeatureVector::assignRawFeatureVector);
List<Integer> functionalResidueNumbers = Start2FoldConstants.extractFunctioanlResidueNumbers(split);
List<AminoAcid> functionalResidues = new ArrayList<>();
// do nothing if no annotation of functional residues exists
if (!functionalResidueNumbers.isEmpty()) {
FunctionalResidueParser.parse(chain, functionalResidueNumbers);
chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(FunctionalResidueAnnotation.class).isFunctional()).forEach(functionalResidues::add);
}
// ignore proteins lacking annotation of functional residues
if (functionalResidues.isEmpty()) {
return Optional.empty();
}
// smooth features
aminoAcids.forEach(aminoAcid -> SmoothedFeatureVector.assignSmoothedFeatureVector(aminoAcids, aminoAcid));
return Optional.of(aminoAcids.stream().filter(aminoAcid -> !(aminoAcid instanceof Proline)).map(aminoAcid -> {
SmoothedFeatureVector smoothedFeatureVector = aminoAcid.getFeature(SmoothedFeatureVector.class);
return StandardFormat.format(smoothedFeatureVector.getEnergy()) + "," + StandardFormat.format(smoothedFeatureVector.getEgor()) + "," + StandardFormat.format(smoothedFeatureVector.getSecondaryStructureElementSize()) + "," + StandardFormat.format(aminoAcid.getFeature(LoopFraction.class).getLoopFraction()) + // already smoothed
"," + StandardFormat.format(smoothedFeatureVector.getRasa()) + "," + StandardFormat.format(smoothedFeatureVector.getLocalInteractions()) + "," + StandardFormat.format(smoothedFeatureVector.getLocalHydrogen()) + "," + StandardFormat.format(smoothedFeatureVector.getLocalHydrophobic()) + "," + StandardFormat.format(smoothedFeatureVector.getLocalBackbone()) + "," + StandardFormat.format(smoothedFeatureVector.getNonLocalInteractions()) + "," + StandardFormat.format(smoothedFeatureVector.getNonLocalHydrogen()) + "," + StandardFormat.format(smoothedFeatureVector.getNonLocalHydrophobic()) + "," + StandardFormat.format(smoothedFeatureVector.getNonLocalBackbone()) + "," + StandardFormat.format(smoothedFeatureVector.getBetweenness()) + "," + StandardFormat.format(smoothedFeatureVector.getCloseness()) + "," + StandardFormat.format(smoothedFeatureVector.getClusteringCoefficient()) + "," + StandardFormat.format(smoothedFeatureVector.getHydrogenBetweenness()) + "," + StandardFormat.format(smoothedFeatureVector.getHydrogenCloseness()) + "," + StandardFormat.format(smoothedFeatureVector.getHydrogenClusteringCoefficient()) + "," + StandardFormat.format(smoothedFeatureVector.getHydrophobicBetweenness()) + "," + StandardFormat.format(smoothedFeatureVector.getHydrophobicCloseness()) + "," + StandardFormat.format(smoothedFeatureVector.getHydrophobicClusteringCoefficient()) + "," + StandardFormat.format(smoothedFeatureVector.getConvBetweenness()) + "," + StandardFormat.format(smoothedFeatureVector.getConvCloseness()) + "," + StandardFormat.format(smoothedFeatureVector.getConvClusteringCoefficient()) + "," + StandardFormat.format(smoothedFeatureVector.getDistinctNeighborhoods()) + "," + StandardFormat.format(smoothedFeatureVector.getConvDistinctNeighborhoods()) + "," + (functionalResidues.contains(aminoAcid) ? "functional" : "non-functional");
}).collect(Collectors.joining(System.lineSeparator())));
} catch (Exception e) {
logger.warn("computation for {} failed", line, e);
return Optional.empty();
}
}
use of de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid in project jstructure by JonStargaryen.
the class StructuralInformationParserServiceTest method shouldPrintStructuralInformationByResidue.
@Test
public void shouldPrintStructuralInformationByResidue() {
Chain chain = StructureParser.fromPdbId("1bdd").parse().getFirstChain();
List<AminoAcid> aminoAcids = chain.aminoAcids().collect(Collectors.toList());
Start2FoldXmlParser.parseSpecificExperiment(chain, TestUtils.getResourceAsInputStream("efr/STF0045.xml"), Stream.of(185).collect(Collectors.toList()));
List<AminoAcid> earlyFoldingResidues = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(Start2FoldResidueAnnotation.class).isEarly()).collect(Collectors.toList());
List<ContactStructuralInformation> contactStructuralInformation = StructuralInformationParserService.getInstance().parseContactStructuralInformationFile(TestUtils.getResourceAsInputStream("si/STF0045.out"), chain, earlyFoldingResidues);
List<ResidueStructuralInformation> residueStructuralInformation = StructuralInformationParserService.getInstance().composeResidueStructuralInformation(aminoAcids, earlyFoldingResidues, contactStructuralInformation);
ResidueGraph residueGraph = ResidueGraph.createDistanceResidueGraph(chain);
ResidueGraphCalculations residueGraphCalculations = new ResidueGraphCalculations(residueGraph);
residueStructuralInformation.stream().map(si -> {
AminoAcid aminoAcid = chain.select().residueIdentifier(IdentifierFactory.createResidueIdentifier(si.getResidueIdentifier())).asAminoAcid();
double betweenness = residueGraphCalculations.betweenness(aminoAcid);
double closeness = residueGraphCalculations.closeness(aminoAcid);
double cc = residueGraphCalculations.clusteringCoefficient(aminoAcid);
int degree = residueGraph.degreeOf(aminoAcid);
return StandardFormat.format(si.getAverageRmsdIncrease()) + "," + StandardFormat.format(si.getAverageTmScoreIncrease()) + "," + StandardFormat.format(si.getAverageQIncrease()) + "," + StandardFormat.format(si.getMaximumRmsdIncrease()) + "," + StandardFormat.format(si.getMaximumTmScoreIncrease()) + "," + StandardFormat.format(si.getMaximumQIncrease()) + "," + StandardFormat.format(betweenness) + "," + StandardFormat.format(closeness) + "," + StandardFormat.format(cc) + "," + StandardFormat.format(degree);
}).forEach(System.out::println);
}
use of de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid in project jstructure by JonStargaryen.
the class StructuralInformationParserServiceTest method shouldPrintStructuralInformationByContact.
@Test
public void shouldPrintStructuralInformationByContact() {
Chain chain = StructureParser.fromPdbId("1bdd").parse().getFirstChain();
List<AminoAcid> aminoAcids = chain.aminoAcids().collect(Collectors.toList());
Start2FoldXmlParser.parseSpecificExperiment(chain, TestUtils.getResourceAsInputStream("efr/STF0045.xml"), Stream.of(185).collect(Collectors.toList()));
List<AminoAcid> earlyFoldingResidues = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(Start2FoldResidueAnnotation.class).isEarly()).collect(Collectors.toList());
List<ContactStructuralInformation> contactStructuralInformation = StructuralInformationParserService.getInstance().parseContactStructuralInformationFile(TestUtils.getResourceAsInputStream("si/STF0045.out"), chain, earlyFoldingResidues);
ResidueGraph residueGraph = ResidueGraph.createDistanceResidueGraph(chain);
ResidueGraphCalculations residueGraphCalculations = new ResidueGraphCalculations(residueGraph);
contactStructuralInformation.stream().map(si -> {
AminoAcid aminoAcid1 = chain.select().residueIdentifier(IdentifierFactory.createResidueIdentifier(si.getResidueIdentifier1())).asAminoAcid();
AminoAcid aminoAcid2 = chain.select().residueIdentifier(IdentifierFactory.createResidueIdentifier(si.getResidueIdentifier2())).asAminoAcid();
Pair<AminoAcid, AminoAcid> pair = new Pair<>(aminoAcid1, aminoAcid2);
double betweenness = residueGraphCalculations.betweenness(pair);
return StandardFormat.format(si.getAverageRmsdIncrease()) + "," + StandardFormat.format(si.getAverageTmScoreIncrease()) + "," + StandardFormat.format(si.getAverageQIncrease()) + "," + StandardFormat.format(si.getMaximumRmsdIncrease()) + "," + StandardFormat.format(si.getMaximumTmScoreIncrease()) + "," + StandardFormat.format(si.getMaximumQIncrease()) + "," + StandardFormat.format(betweenness);
}).forEach(System.out::println);
}
use of de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid in project jstructure by JonStargaryen.
the class A06_EarlyFoldingFrequencies method handleLine.
private static void handleLine(String line) {
System.out.println(line);
String[] split = line.split(";");
String entryId = split[0];
String pdbId = split[1];
List<Integer> experimentIds = Pattern.compile(",").splitAsStream(split[2].replaceAll("\\[", "").replaceAll("]", "")).map(Integer::valueOf).collect(Collectors.toList());
Structure structure = StructureParser.fromPdbId(pdbId).parse();
Chain chain = structure.chains().findFirst().get();
Start2FoldXmlParser.parseSpecificExperiment(chain, Start2FoldConstants.XML_DIRECTORY.resolve(entryId + ".xml"), experimentIds);
List<AminoAcid> earlyFoldingResidues = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(Start2FoldResidueAnnotation.class).isEarly()).collect(Collectors.toList());
chain.aminoAcids().forEach(aminoAcid -> {
int ordinal = AminoAcid.Family.resolveOneLetterCode(aminoAcid.getOneLetterCode()).ordinal();
aminoAcidFrequencies[ordinal]++;
});
earlyFoldingResidues.forEach(aminoAcid -> {
int ordinal = AminoAcid.Family.resolveOneLetterCode(aminoAcid.getOneLetterCode()).ordinal();
earlyFoldingFrequencies[ordinal]++;
});
}
Aggregations