use of de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid in project jstructure by JonStargaryen.
the class SelectionTest method shouldSelectAminoAcidsAndReturnAsContainer.
@Test
public void shouldSelectAminoAcidsAndReturnAsContainer() {
List<Group> groups = protein.select().aminoAcids().asFilteredGroups().collect(Collectors.toList());
groups.forEach(group -> Assert.assertTrue("group " + group + " of original selected stream is no amino acid, was " + group.getClass().getSimpleName(), group instanceof AminoAcid));
GroupContainer container = groups.stream().collect(StructureCollectors.toIsolatedStructure());
container.groups().forEach(group -> Assert.assertTrue("group " + group + " of container stream is no amino acid, was " + group.getClass().getSimpleName(), group instanceof AminoAcid));
}
use of de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid in project jstructure by JonStargaryen.
the class A01_ReportGeneralStatistics method handleEFRLine.
private static void handleEFRLine(String line) {
String[] split = line.split(";");
String entryId = split[0];
String pdbId = split[1];
List<Integer> experimentIds = Pattern.compile(",").splitAsStream(split[2].replaceAll("\\[", "").replaceAll("]", "")).map(Integer::valueOf).collect(Collectors.toList());
int numberOfEarlyFoldingResidues = Integer.valueOf(split[3]);
Structure structure = StructureParser.fromPdbId(pdbId).parse();
Chain chain = structure.chains().findFirst().get();
Start2FoldXmlParser.parseSpecificExperiment(chain, Start2FoldConstants.XML_DIRECTORY.resolve(entryId + ".xml"), experimentIds);
List<AminoAcid> earlyFoldingResidues = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(Start2FoldResidueAnnotation.class).isEarly()).collect(Collectors.toList());
List<AminoAcid> lateFoldingResidues = chain.aminoAcids().filter(aminoAcid -> !earlyFoldingResidues.contains(aminoAcid)).collect(Collectors.toList());
early.add(earlyFoldingResidues.size());
late.add((int) (chain.aminoAcids().count() - earlyFoldingResidues.size()));
if (earlyFoldingResidues.size() != numberOfEarlyFoldingResidues) {
System.err.println("number of EFR did not match expectation for " + entryId + ": " + earlyFoldingResidues.size() + " vs " + numberOfEarlyFoldingResidues);
}
String uniProtId = split[4];
List<Integer> functionalResidueNumbers = Start2FoldConstants.extractFunctioanlResidueNumbers(split);
List<AminoAcid> functionalResidues = new ArrayList<>();
// do nothing if no annotation of functional residues exists
if (!functionalResidueNumbers.isEmpty()) {
FunctionalResidueParser.parse(chain, functionalResidueNumbers);
chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(FunctionalResidueAnnotation.class).isFunctional()).forEach(functionalResidues::add);
}
List<AminoAcid> nonFunctionalResidues = chain.aminoAcids().filter(aminoAcid -> !functionalResidues.contains(aminoAcid)).collect(Collectors.toList());
List<AminoAcid> exposedAminoAcids = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(AccessibleSurfaceArea.class).isExposed()).collect(Collectors.toList());
List<AminoAcid> buriedAminoAcids = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(AccessibleSurfaceArea.class).isBuried()).collect(Collectors.toList());
rasaContingencyTable[0] += SetOperations.createIntersectionSet(earlyFoldingResidues, buriedAminoAcids).size();
rasaContingencyTable[1] += SetOperations.createIntersectionSet(earlyFoldingResidues, exposedAminoAcids).size();
rasaContingencyTable[2] += SetOperations.createIntersectionSet(lateFoldingResidues, buriedAminoAcids).size();
rasaContingencyTable[3] += SetOperations.createIntersectionSet(lateFoldingResidues, exposedAminoAcids).size();
int earlyFunctionalCount = 0;
if (!functionalResidues.isEmpty()) {
functional.add(functionalResidues.size());
nonFunctional.add((int) chain.aminoAcids().count() - functionalResidues.size());
earlyFunctionalCount = SetOperations.createIntersectionSet(earlyFoldingResidues, functionalResidues).size();
overlap.add(earlyFunctionalCount);
int ef = earlyFunctionalCount;
int en = SetOperations.createIntersectionSet(earlyFoldingResidues, nonFunctionalResidues).size();
int lf = SetOperations.createIntersectionSet(lateFoldingResidues, functionalResidues).size();
int ln = SetOperations.createIntersectionSet(lateFoldingResidues, nonFunctionalResidues).size();
contingencyTable[0] += ef;
contingencyTable[1] += en;
contingencyTable[2] += lf;
contingencyTable[3] += ln;
double[] test = FishersExactTest.fishersExactTest(ef, en, lf, ln);
System.out.println("values: " + ef + ", " + en + ", " + lf + ", " + ln);
System.out.println("test: " + Arrays.toString(test));
functionalTableLines.add(entryId + " & " + chain.aminoAcids().count() + " & " + earlyFoldingResidues.size() + " & " + functionalResidues.size() + " & " + ef + " & " + StandardFormat.format(test[0]) + " & " + "? \\\\");
}
tableLines.add(entryId + " & " + pdbId + "\\_A & " + uniProtId + " & " + chain.aminoAcids().count() + " & " + earlyFoldingResidues.size() + " & " + (functionalResidues.isEmpty() ? "-" : functionalResidues.size()) + " & " + (functionalResidues.isEmpty() ? "-" : earlyFunctionalCount) + " \\\\");
}
use of de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid in project jstructure by JonStargaryen.
the class A01_WriteEarlyFoldingClassificationArff method handleLine.
private static Optional<String> handleLine(String line) {
try {
logger.info("handling {}", line);
String[] split = line.split(";");
String entryId = split[0];
String pdbId = split[1];
List<Integer> experimentIds = Pattern.compile(",").splitAsStream(split[2].replaceAll("\\[", "").replaceAll("]", "")).map(Integer::valueOf).collect(Collectors.toList());
Structure structure = StructureParser.fromPdbId(pdbId).parse();
Chain chain = structure.getFirstChain();
Start2FoldXmlParser.parseSpecificExperiment(chain, Start2FoldConstants.XML_DIRECTORY.resolve(entryId + ".xml"), experimentIds);
List<AminoAcid> earlyFoldingResidues = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(Start2FoldResidueAnnotation.class).isEarly()).collect(Collectors.toList());
List<AminoAcid> aminoAcids = chain.aminoAcids().collect(Collectors.toList());
aminoAcids.forEach(RawFeatureVector::assignRawFeatureVector);
// smooth features
aminoAcids.forEach(aminoAcid -> SmoothedFeatureVector.assignSmoothedFeatureVector(aminoAcids, aminoAcid));
return Optional.of(aminoAcids.stream().filter(aminoAcid -> !(aminoAcid instanceof Proline)).map(aminoAcid -> {
SmoothedFeatureVector smoothedFeatureVector = aminoAcid.getFeature(SmoothedFeatureVector.class);
return StandardFormat.format(smoothedFeatureVector.getEnergy()) + "," + StandardFormat.format(smoothedFeatureVector.getEgor()) + "," + StandardFormat.format(smoothedFeatureVector.getSecondaryStructureElementSize()) + "," + StandardFormat.format(aminoAcid.getFeature(LoopFraction.class).getLoopFraction()) + // already smoothed
"," + StandardFormat.format(smoothedFeatureVector.getRasa()) + "," + StandardFormat.format(smoothedFeatureVector.getLocalInteractions()) + "," + StandardFormat.format(smoothedFeatureVector.getLocalHydrogen()) + "," + StandardFormat.format(smoothedFeatureVector.getLocalHydrophobic()) + "," + StandardFormat.format(smoothedFeatureVector.getLocalBackbone()) + "," + StandardFormat.format(smoothedFeatureVector.getNonLocalInteractions()) + "," + StandardFormat.format(smoothedFeatureVector.getNonLocalHydrogen()) + "," + StandardFormat.format(smoothedFeatureVector.getNonLocalHydrophobic()) + "," + StandardFormat.format(smoothedFeatureVector.getNonLocalBackbone()) + "," + StandardFormat.format(smoothedFeatureVector.getBetweenness()) + "," + StandardFormat.format(smoothedFeatureVector.getCloseness()) + "," + StandardFormat.format(smoothedFeatureVector.getClusteringCoefficient()) + "," + StandardFormat.format(smoothedFeatureVector.getHydrogenBetweenness()) + "," + StandardFormat.format(smoothedFeatureVector.getHydrogenCloseness()) + "," + StandardFormat.format(smoothedFeatureVector.getHydrogenClusteringCoefficient()) + "," + StandardFormat.format(smoothedFeatureVector.getHydrophobicBetweenness()) + "," + StandardFormat.format(smoothedFeatureVector.getHydrophobicCloseness()) + "," + StandardFormat.format(smoothedFeatureVector.getHydrophobicClusteringCoefficient()) + "," + StandardFormat.format(smoothedFeatureVector.getConvBetweenness()) + "," + StandardFormat.format(smoothedFeatureVector.getConvCloseness()) + "," + StandardFormat.format(smoothedFeatureVector.getConvClusteringCoefficient()) + "," + StandardFormat.format(smoothedFeatureVector.getDistinctNeighborhoods()) + "," + StandardFormat.format(smoothedFeatureVector.getConvDistinctNeighborhoods()) + "," + (earlyFoldingResidues.contains(aminoAcid) ? "early" : "late");
}).collect(Collectors.joining(System.lineSeparator())));
} catch (Exception e) {
logger.warn("computation for {} failed", line, e);
return Optional.empty();
}
}
use of de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid in project jstructure by JonStargaryen.
the class StructuralInformationParserServiceTest method shouldParseStructuralInformationFile.
@Test
public void shouldParseStructuralInformationFile() {
Chain chain = StructureParser.fromPdbId("1bdd").parse().getFirstChain();
List<AminoAcid> aminoAcids = chain.aminoAcids().collect(Collectors.toList());
Start2FoldXmlParser.parseSpecificExperiment(chain, TestUtils.getResourceAsInputStream("efr/STF0045.xml"), Stream.of(185).collect(Collectors.toList()));
List<AminoAcid> earlyFoldingResidues = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(Start2FoldResidueAnnotation.class).isEarly()).collect(Collectors.toList());
List<ContactStructuralInformation> contactStructuralInformation = StructuralInformationParserService.getInstance().parseContactStructuralInformationFile(TestUtils.getResourceAsInputStream("si/STF0045.out"), chain, earlyFoldingResidues);
contactStructuralInformation.stream().map(ContactStructuralInformation::getCsvLine).forEach(System.out::println);
System.out.println();
List<ResidueStructuralInformation> residueStructuralInformation = StructuralInformationParserService.getInstance().composeResidueStructuralInformation(aminoAcids, earlyFoldingResidues, contactStructuralInformation);
residueStructuralInformation.stream().map(ResidueStructuralInformation::getCsvLine).forEach(System.out::println);
}
use of de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid in project jstructure by JonStargaryen.
the class EQuantParser method parseEQuantFile.
public static void parseEQuantFile(Chain chain, Path equantPath) {
try {
String chainId = chain.getChainIdentifier().getChainId();
try (Stream<String> lines = Files.lines(equantPath)) {
// skip header
lines.filter(line -> !line.startsWith("chain")).filter(line -> line.startsWith(chainId)).forEach(line -> {
String[] split = line.split("\\s+");
int residueNumber = Integer.valueOf(split[1]);
double evaluation = Double.valueOf(split[4]);
AminoAcid aminoAcid = chain.select().residueNumber(residueNumber).asAminoAcid();
aminoAcid.getFeatureContainer().addFeature(new EQuantScore(evaluation));
});
}
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
Aggregations