use of de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid in project jstructure by JonStargaryen.
the class Start2FoldXmlParser method parseStability.
public static void parseStability(Chain chain, InputStream inputStream) {
try {
// assign baseline resp. entry container for each residue if not already happened
chain.aminoAcids().filter(aminoAcid -> !aminoAcid.getFeatureContainer().getFeatureOptional(Start2FoldResidueAnnotation.class).isPresent()).forEach(aminoAcid -> aminoAcid.getFeatureContainer().addFeature(new Start2FoldResidueAnnotation()));
Document document = Jsoup.parse(inputStream, "UTF-8", "/");
Elements experimentElements = document.getElementsByTag("experiment");
List<Experiment> experiments = experimentElements.stream().map(Experiment::parse).filter(experiment -> experiment.getMethod() == Method.STABILITY).collect(Collectors.toList());
for (Experiment experiment : experiments) {
assignValuesForStrong(experiment, chain);
}
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
use of de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid in project jstructure by JonStargaryen.
the class Start2FoldXmlParser method assignValuesForEarly.
private static void assignValuesForEarly(Experiment experiment, Chain chain) {
String pdbSequence = chain.getAminoAcidSequence();
String experimentSequence = experiment.getSequence();
// align sequences to ensure correct mapping
SequencePair<ProteinSequence, AminoAcidCompound> pair = null;
try {
pair = Alignments.getPairwiseAlignment(new ProteinSequence(experimentSequence), new ProteinSequence(pdbSequence), Alignments.PairwiseSequenceAlignerType.GLOBAL, new SimpleGapPenalty(), SubstitutionMatrixHelper.getBlosum62());
List<AminoAcid> aminoAcids = chain.aminoAcids().collect(Collectors.toList());
for (Experiment.Residue residue : experiment.getResidues()) {
int experimentIndex = residue.getIndex() - 1;
try {
int pdbIndex;
if (residue.getCode().equals("P") && residue.getIndex() == 1) {
// super-russian fix for STF0017 where the alignment should match theoretically
pdbIndex = 0;
} else {
pdbIndex = pair.getIndexInTargetForQueryAt(experimentIndex);
}
AminoAcid aminoAcid = aminoAcids.get(pdbIndex);
// assign experiment-specific protection level to residue
aminoAcid.getFeature(Start2FoldResidueAnnotation.class).addProtectionLevelEntry(ProtectionLevel.EARLY);
} catch (Exception e) {
// residue not present in structure - e.g. for STF0031 and STF0032
logger.warn("alignment:{}{}", System.lineSeparator(), pair.toString());
logger.warn("failed to map residue {}-{}", residue.getCode(), residue.getIndex(), e);
}
}
} catch (CompoundNotFoundException e) {
throw new IllegalArgumentException(e);
}
}
use of de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid in project jstructure by JonStargaryen.
the class StructuralInformationParserService method parseContactStructuralInformationFile.
public List<ContactStructuralInformation> parseContactStructuralInformationFile(InputStream inputStream, Chain chain, List<AminoAcid> earlyFoldingResidues) {
Map<Pair<Integer, Integer>, List<String>> parsingMap = new HashMap<>();
try (Stream<String> stream = new BufferedReader(new InputStreamReader(inputStream)).lines()) {
stream.forEach(line -> {
String[] split = line.split("\t");
String[] idSplit = split[0].split(",");
Pair<Integer, Integer> idPair = new Pair<>(Integer.valueOf(idSplit[0].split("\\(")[1].trim()), Integer.valueOf(idSplit[1].split("\\)")[0].trim()));
if (!parsingMap.containsKey(idPair)) {
parsingMap.put(idPair, new ArrayList<>());
}
parsingMap.get(idPair).add(line);
});
}
Map<Pair<Integer, Integer>, List<ReconstructionStructuralInformation>> reconstructionMap = new HashMap<>();
parsingMap.entrySet().stream().flatMap(entry -> {
String aa1 = chain.select().residueNumber(entry.getKey().getLeft()).asAminoAcid().getOneLetterCode();
String aa2 = chain.select().residueNumber(entry.getKey().getRight()).asAminoAcid().getOneLetterCode();
return entry.getValue().stream().map(line -> line.split("\t")).map(split -> new ReconstructionStructuralInformation(entry.getKey().getLeft(), aa1, entry.getKey().getRight(), aa2, ContactDistanceBin.resolve(new Pair<>(IdentifierFactory.createResidueIdentifier(entry.getKey().getLeft()), IdentifierFactory.createResidueIdentifier(entry.getKey().getRight()))).orElse(null), split[1].equals("true"), Double.valueOf(split[2]), Double.valueOf(split[3]), Double.valueOf(split[4]), Double.valueOf(split[5]), Double.valueOf(split[6]), Double.valueOf(split[7]), Double.valueOf(split[8]), Double.valueOf(split[9]), Double.valueOf(split[10])));
}).forEach(rsi -> {
Pair<Integer, Integer> idPair = new Pair<>(rsi.getResidueIdentifier1(), rsi.getResidueIdentifier2());
if (!reconstructionMap.containsKey(idPair)) {
reconstructionMap.put(idPair, new ArrayList<>());
}
reconstructionMap.get(idPair).add(rsi);
});
List<ReconstructionStructuralInformation> reconstructionStructuralInformation = reconstructionMap.values().stream().flatMap(Collection::stream).collect(Collectors.toList());
int numberOfReconstructions = reconstructionStructuralInformation.size();
double averageRmsd = reconstructionStructuralInformation.stream().mapToDouble(ReconstructionStructuralInformation::getRmsdIncrease).average().orElse(0.0);
double standardDeviationRmsd = new StandardDeviation().evaluate(reconstructionStructuralInformation.stream().mapToDouble(ReconstructionStructuralInformation::getRmsdIncrease).toArray());
double averageMaximumRmsd = reconstructionMap.entrySet().stream().mapToDouble(entry -> entry.getValue().stream().mapToDouble(ReconstructionStructuralInformation::getRmsdIncrease).max().orElse(0.0)).average().orElse(0.0);
double standardDeviationMaximumRmsd = new StandardDeviation().evaluate(reconstructionMap.entrySet().stream().mapToDouble(entry -> entry.getValue().stream().mapToDouble(ReconstructionStructuralInformation::getRmsdIncrease).max().orElse(0.0)).toArray());
List<ReconstructionStructuralInformation> topScoringReconstructions = reconstructionMap.values().stream().flatMap(Collection::stream).sorted(Comparator.comparingDouble(ReconstructionStructuralInformation::getRmsdIncrease).reversed()).limit((int) (0.1 * numberOfReconstructions)).collect(Collectors.toList());
return reconstructionMap.entrySet().stream().map(entry -> {
List<ReconstructionStructuralInformation> values = entry.getValue();
ReconstructionStructuralInformation reference = values.get(0);
return new ContactStructuralInformation(reference.getResidueIdentifier1(), reference.getAa1(), reference.getResidueIdentifier2(), reference.getAa2(), reference.getContactDistanceBin(), computeAverage(values, ReconstructionStructuralInformation::getBaselineRmsd), computeAverage(values, ReconstructionStructuralInformation::getBaselineTmScore), computeAverage(values, ReconstructionStructuralInformation::getBaselineQ), computeAverage(values, ReconstructionStructuralInformation::getRmsdIncrease), computeAverage(values, ReconstructionStructuralInformation::getTmScoreIncrease), computeAverage(values, ReconstructionStructuralInformation::getqIncrease), computeMaximum(values, ReconstructionStructuralInformation::getRmsdIncrease), computeMaximum(values, ReconstructionStructuralInformation::getTmScoreIncrease), computeMaximum(values, ReconstructionStructuralInformation::getqIncrease), residueIsInCollection(earlyFoldingResidues, entry.getKey().getLeft(), entry.getKey().getRight()), contactIsInCollection(earlyFoldingResidues, entry.getKey().getLeft(), entry.getKey().getRight()), averageRmsd, standardDeviationRmsd, averageMaximumRmsd, standardDeviationMaximumRmsd, reconstructionStructuralInformation, topScoringReconstructions, values.stream().map(ReconstructionStructuralInformation::getRmsdIncrease).collect(Collectors.toList()));
}).collect(Collectors.toList());
}
use of de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid in project jstructure by JonStargaryen.
the class A03_PrintStart2FoldDatasetTable method handleLine.
private static String handleLine(String line) {
try {
String[] split = line.split(";");
String entryId = split[0];
String pdbId = split[1];
List<Integer> experimentIds = Pattern.compile(",").splitAsStream(split[2].replaceAll("\\[", "").replaceAll("]", "")).map(Integer::valueOf).collect(Collectors.toList());
Structure structure = StructureParser.fromPdbId(pdbId).parse();
Chain chain = structure.chains().findFirst().get();
Start2FoldXmlParser.parseStability(chain, Start2FoldConstants.XML_DIRECTORY.resolve(entryId + ".xml"));
Start2FoldXmlParser.parseSpecificExperiment(chain, Start2FoldConstants.XML_DIRECTORY.resolve(entryId + ".xml"), experimentIds);
List<AminoAcid> earlyFoldingResidues = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(Start2FoldResidueAnnotation.class).isEarly()).collect(Collectors.toList());
List<AminoAcid> stableResidues = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(Start2FoldResidueAnnotation.class).isStrong()).collect(Collectors.toList());
List<Integer> functionalResidueNumbers = Start2FoldConstants.extractFunctionalResidueNumbers(split);
List<AminoAcid> functionalResidues = new ArrayList<>();
// do nothing if no annotation of functional residues exists
if (!functionalResidueNumbers.isEmpty()) {
FunctionalResidueParser.parse(chain, functionalResidueNumbers);
chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(FunctionalResidueAnnotation.class).isFunctional()).forEach(functionalResidues::add);
}
List<AminoAcid> aminoAcids = chain.aminoAcids().collect(Collectors.toList());
long intersection = earlyFoldingResidues.stream().filter(functionalResidues::contains).count();
return entryId + "\t" + pdbId + "\t" + split[2] + "\t" + aminoAcids.size() + "\t" + earlyFoldingResidues.size() + "\t" + functionalResidues.size() + "\t" + intersection;
} catch (Exception e) {
e.printStackTrace();
return "";
}
}
use of de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid in project jstructure by JonStargaryen.
the class A07_WriteCouplingRangeCsv method handleLine.
private static Optional<String> handleLine(String line) {
try {
System.out.println(line);
String[] split = line.split(";");
String entryId = split[0];
String pdbId = split[1];
List<Integer> experimentIds = Pattern.compile(",").splitAsStream(split[2].replaceAll("\\[", "").replaceAll("]", "")).map(Integer::valueOf).collect(Collectors.toList());
Structure structure = StructureParser.fromPdbId(pdbId).parse();
Chain chain = structure.chains().findFirst().get();
Start2FoldXmlParser.parseSpecificExperiment(chain, Start2FoldConstants.XML_DIRECTORY.resolve(entryId + ".xml"), experimentIds);
EvolutionaryCouplingParser.parseHotSpotFile(chain, Start2FoldConstants.COUPLING_DIRECTORY.resolve(entryId.toUpperCase() + "_hs.html"));
List<AminoAcid> earlyFoldingResidues = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(Start2FoldResidueAnnotation.class).isEarly()).collect(Collectors.toList());
Map<Integer, List<Double>> localPlmScores = new HashMap<>();
Map<Integer, List<Double>> longRangePlmScores = new HashMap<>();
Document hotSpotDocument = Jsoup.parse(Files.readAllLines(Paths.get("/home/bittrich/git/phd_sb_repo/data/start2fold/coupling/" + entryId + "_ec.html")).stream().collect(Collectors.joining(System.lineSeparator())));
List<AminoAcid> aminoAcids = chain.aminoAcids().collect(Collectors.toList());
for (int i = 0; i < aminoAcids.size(); i++) {
localPlmScores.put(i, new ArrayList<>());
longRangePlmScores.put(i, new ArrayList<>());
}
hotSpotDocument.getElementsByTag("tr").stream().skip(1).forEach(element -> {
Elements tds = element.getElementsByTag("td");
int residueNumber1 = Integer.valueOf(tds.get(2).text()) - 1;
int residueNumber2 = Integer.valueOf(tds.get(4).text()) - 1;
double plmScore = Double.valueOf(tds.get(6).text());
boolean localContact = Math.abs(residueNumber1 - residueNumber2) < 6;
if (localContact) {
System.out.println("local contact: " + element.text());
localPlmScores.get(residueNumber1).add(plmScore);
localPlmScores.get(residueNumber2).add(plmScore);
} else {
System.out.println("long-range contact: " + element.text());
longRangePlmScores.get(residueNumber1).add(plmScore);
longRangePlmScores.get(residueNumber2).add(plmScore);
}
});
return Optional.of(aminoAcids.stream().map(aminoAcid -> pdbId + ",A," + aminoAcid.getOneLetterCode() + "," + aminoAcid.getResidueIdentifier().getResidueNumber() + "," + (earlyFoldingResidues.contains(aminoAcid) ? "early" : "late") + "," + "local," + StandardFormat.format(localPlmScores.get(aminoAcid.getResidueIndex()).stream().mapToDouble(Double::valueOf).average().orElse(0.0)) + System.lineSeparator() + pdbId + ",A," + aminoAcid.getOneLetterCode() + "," + aminoAcid.getResidueIdentifier().getResidueNumber() + "," + (earlyFoldingResidues.contains(aminoAcid) ? "early" : "late") + "," + "long-range," + StandardFormat.format(longRangePlmScores.get(aminoAcid.getResidueIndex()).stream().mapToDouble(Double::valueOf).average().orElse(0.0))).collect(Collectors.joining(System.lineSeparator())));
} catch (Exception e) {
e.printStackTrace();
return Optional.empty();
}
}
Aggregations