Search in sources :

Example 1 with ReconstructionStructuralInformation

use of de.bioforscher.jstructure.efr.model.si.ReconstructionStructuralInformation in project jstructure by JonStargaryen.

the class StructuralInformationParserService method parseContactStructuralInformationFile.

public List<ContactStructuralInformation> parseContactStructuralInformationFile(InputStream inputStream, Chain chain, List<AminoAcid> earlyFoldingResidues) {
    Map<Pair<Integer, Integer>, List<String>> parsingMap = new HashMap<>();
    try (Stream<String> stream = new BufferedReader(new InputStreamReader(inputStream)).lines()) {
        stream.forEach(line -> {
            String[] split = line.split("\t");
            String[] idSplit = split[0].split(",");
            Pair<Integer, Integer> idPair = new Pair<>(Integer.valueOf(idSplit[0].split("\\(")[1].trim()), Integer.valueOf(idSplit[1].split("\\)")[0].trim()));
            if (!parsingMap.containsKey(idPair)) {
                parsingMap.put(idPair, new ArrayList<>());
            }
            parsingMap.get(idPair).add(line);
        });
    }
    Map<Pair<Integer, Integer>, List<ReconstructionStructuralInformation>> reconstructionMap = new HashMap<>();
    parsingMap.entrySet().stream().flatMap(entry -> {
        String aa1 = chain.select().residueNumber(entry.getKey().getLeft()).asAminoAcid().getOneLetterCode();
        String aa2 = chain.select().residueNumber(entry.getKey().getRight()).asAminoAcid().getOneLetterCode();
        return entry.getValue().stream().map(line -> line.split("\t")).map(split -> new ReconstructionStructuralInformation(entry.getKey().getLeft(), aa1, entry.getKey().getRight(), aa2, ContactDistanceBin.resolve(new Pair<>(IdentifierFactory.createResidueIdentifier(entry.getKey().getLeft()), IdentifierFactory.createResidueIdentifier(entry.getKey().getRight()))).orElse(null), split[1].equals("true"), Double.valueOf(split[2]), Double.valueOf(split[3]), Double.valueOf(split[4]), Double.valueOf(split[5]), Double.valueOf(split[6]), Double.valueOf(split[7]), Double.valueOf(split[8]), Double.valueOf(split[9]), Double.valueOf(split[10])));
    }).forEach(rsi -> {
        Pair<Integer, Integer> idPair = new Pair<>(rsi.getResidueIdentifier1(), rsi.getResidueIdentifier2());
        if (!reconstructionMap.containsKey(idPair)) {
            reconstructionMap.put(idPair, new ArrayList<>());
        }
        reconstructionMap.get(idPair).add(rsi);
    });
    List<ReconstructionStructuralInformation> reconstructionStructuralInformation = reconstructionMap.values().stream().flatMap(Collection::stream).collect(Collectors.toList());
    int numberOfReconstructions = reconstructionStructuralInformation.size();
    double averageRmsd = reconstructionStructuralInformation.stream().mapToDouble(ReconstructionStructuralInformation::getRmsdIncrease).average().orElse(0.0);
    double standardDeviationRmsd = new StandardDeviation().evaluate(reconstructionStructuralInformation.stream().mapToDouble(ReconstructionStructuralInformation::getRmsdIncrease).toArray());
    double averageMaximumRmsd = reconstructionMap.entrySet().stream().mapToDouble(entry -> entry.getValue().stream().mapToDouble(ReconstructionStructuralInformation::getRmsdIncrease).max().orElse(0.0)).average().orElse(0.0);
    double standardDeviationMaximumRmsd = new StandardDeviation().evaluate(reconstructionMap.entrySet().stream().mapToDouble(entry -> entry.getValue().stream().mapToDouble(ReconstructionStructuralInformation::getRmsdIncrease).max().orElse(0.0)).toArray());
    List<ReconstructionStructuralInformation> topScoringReconstructions = reconstructionMap.values().stream().flatMap(Collection::stream).sorted(Comparator.comparingDouble(ReconstructionStructuralInformation::getRmsdIncrease).reversed()).limit((int) (0.1 * numberOfReconstructions)).collect(Collectors.toList());
    return reconstructionMap.entrySet().stream().map(entry -> {
        List<ReconstructionStructuralInformation> values = entry.getValue();
        ReconstructionStructuralInformation reference = values.get(0);
        return new ContactStructuralInformation(reference.getResidueIdentifier1(), reference.getAa1(), reference.getResidueIdentifier2(), reference.getAa2(), reference.getContactDistanceBin(), computeAverage(values, ReconstructionStructuralInformation::getBaselineRmsd), computeAverage(values, ReconstructionStructuralInformation::getBaselineTmScore), computeAverage(values, ReconstructionStructuralInformation::getBaselineQ), computeAverage(values, ReconstructionStructuralInformation::getRmsdIncrease), computeAverage(values, ReconstructionStructuralInformation::getTmScoreIncrease), computeAverage(values, ReconstructionStructuralInformation::getqIncrease), computeMaximum(values, ReconstructionStructuralInformation::getRmsdIncrease), computeMaximum(values, ReconstructionStructuralInformation::getTmScoreIncrease), computeMaximum(values, ReconstructionStructuralInformation::getqIncrease), residueIsInCollection(earlyFoldingResidues, entry.getKey().getLeft(), entry.getKey().getRight()), contactIsInCollection(earlyFoldingResidues, entry.getKey().getLeft(), entry.getKey().getRight()), averageRmsd, standardDeviationRmsd, averageMaximumRmsd, standardDeviationMaximumRmsd, reconstructionStructuralInformation, topScoringReconstructions, values.stream().map(ReconstructionStructuralInformation::getRmsdIncrease).collect(Collectors.toList()));
    }).collect(Collectors.toList());
}
Also used : java.util(java.util) Files(java.nio.file.Files) ResidueIdentifier(de.bioforscher.jstructure.model.identifier.ResidueIdentifier) Collectors(java.util.stream.Collectors) Pair(de.bioforscher.jstructure.mathematics.Pair) HotSpotScoring(de.bioforscher.jstructure.efr.model.HotSpotScoring) IdentifierFactory(de.bioforscher.jstructure.model.identifier.IdentifierFactory) Stream(java.util.stream.Stream) java.io(java.io) ContactDistanceBin(de.bioforscher.jstructure.efr.model.ContactDistanceBin) Group(de.bioforscher.jstructure.model.structure.Group) ToDoubleFunction(java.util.function.ToDoubleFunction) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) Chain(de.bioforscher.jstructure.model.structure.Chain) StandardDeviation(org.apache.commons.math3.stat.descriptive.moment.StandardDeviation) ReconstructionStructuralInformation(de.bioforscher.jstructure.efr.model.si.ReconstructionStructuralInformation) ResidueStructuralInformation(de.bioforscher.jstructure.efr.model.si.ResidueStructuralInformation) StandardFormat(de.bioforscher.jstructure.StandardFormat) ContactStructuralInformation(de.bioforscher.jstructure.efr.model.si.ContactStructuralInformation) Path(java.nio.file.Path) ReconstructionStructuralInformation(de.bioforscher.jstructure.efr.model.si.ReconstructionStructuralInformation) ContactStructuralInformation(de.bioforscher.jstructure.efr.model.si.ContactStructuralInformation) StandardDeviation(org.apache.commons.math3.stat.descriptive.moment.StandardDeviation) Pair(de.bioforscher.jstructure.mathematics.Pair)

Aggregations

StandardFormat (de.bioforscher.jstructure.StandardFormat)1 ContactDistanceBin (de.bioforscher.jstructure.efr.model.ContactDistanceBin)1 HotSpotScoring (de.bioforscher.jstructure.efr.model.HotSpotScoring)1 ContactStructuralInformation (de.bioforscher.jstructure.efr.model.si.ContactStructuralInformation)1 ReconstructionStructuralInformation (de.bioforscher.jstructure.efr.model.si.ReconstructionStructuralInformation)1 ResidueStructuralInformation (de.bioforscher.jstructure.efr.model.si.ResidueStructuralInformation)1 Pair (de.bioforscher.jstructure.mathematics.Pair)1 IdentifierFactory (de.bioforscher.jstructure.model.identifier.IdentifierFactory)1 ResidueIdentifier (de.bioforscher.jstructure.model.identifier.ResidueIdentifier)1 Chain (de.bioforscher.jstructure.model.structure.Chain)1 Group (de.bioforscher.jstructure.model.structure.Group)1 AminoAcid (de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid)1 java.io (java.io)1 Files (java.nio.file.Files)1 Path (java.nio.file.Path)1 java.util (java.util)1 ToDoubleFunction (java.util.function.ToDoubleFunction)1 Collectors (java.util.stream.Collectors)1 Stream (java.util.stream.Stream)1 StandardDeviation (org.apache.commons.math3.stat.descriptive.moment.StandardDeviation)1