Search in sources :

Example 6 with ContactStructuralInformation

use of de.bioforscher.jstructure.efr.model.si.ContactStructuralInformation in project jstructure by JonStargaryen.

the class StructuralInformationParserService method parseContactStructuralInformationFile.

public List<ContactStructuralInformation> parseContactStructuralInformationFile(InputStream inputStream, Chain chain, List<AminoAcid> earlyFoldingResidues) {
    Map<Pair<Integer, Integer>, List<String>> parsingMap = new HashMap<>();
    try (Stream<String> stream = new BufferedReader(new InputStreamReader(inputStream)).lines()) {
        stream.forEach(line -> {
            String[] split = line.split("\t");
            String[] idSplit = split[0].split(",");
            Pair<Integer, Integer> idPair = new Pair<>(Integer.valueOf(idSplit[0].split("\\(")[1].trim()), Integer.valueOf(idSplit[1].split("\\)")[0].trim()));
            if (!parsingMap.containsKey(idPair)) {
                parsingMap.put(idPair, new ArrayList<>());
            }
            parsingMap.get(idPair).add(line);
        });
    }
    Map<Pair<Integer, Integer>, List<ReconstructionStructuralInformation>> reconstructionMap = new HashMap<>();
    parsingMap.entrySet().stream().flatMap(entry -> {
        String aa1 = chain.select().residueNumber(entry.getKey().getLeft()).asAminoAcid().getOneLetterCode();
        String aa2 = chain.select().residueNumber(entry.getKey().getRight()).asAminoAcid().getOneLetterCode();
        return entry.getValue().stream().map(line -> line.split("\t")).map(split -> new ReconstructionStructuralInformation(entry.getKey().getLeft(), aa1, entry.getKey().getRight(), aa2, ContactDistanceBin.resolve(new Pair<>(IdentifierFactory.createResidueIdentifier(entry.getKey().getLeft()), IdentifierFactory.createResidueIdentifier(entry.getKey().getRight()))).orElse(null), split[1].equals("true"), Double.valueOf(split[2]), Double.valueOf(split[3]), Double.valueOf(split[4]), Double.valueOf(split[5]), Double.valueOf(split[6]), Double.valueOf(split[7]), Double.valueOf(split[8]), Double.valueOf(split[9]), Double.valueOf(split[10])));
    }).forEach(rsi -> {
        Pair<Integer, Integer> idPair = new Pair<>(rsi.getResidueIdentifier1(), rsi.getResidueIdentifier2());
        if (!reconstructionMap.containsKey(idPair)) {
            reconstructionMap.put(idPair, new ArrayList<>());
        }
        reconstructionMap.get(idPair).add(rsi);
    });
    List<ReconstructionStructuralInformation> reconstructionStructuralInformation = reconstructionMap.values().stream().flatMap(Collection::stream).collect(Collectors.toList());
    int numberOfReconstructions = reconstructionStructuralInformation.size();
    double averageRmsd = reconstructionStructuralInformation.stream().mapToDouble(ReconstructionStructuralInformation::getRmsdIncrease).average().orElse(0.0);
    double standardDeviationRmsd = new StandardDeviation().evaluate(reconstructionStructuralInformation.stream().mapToDouble(ReconstructionStructuralInformation::getRmsdIncrease).toArray());
    double averageMaximumRmsd = reconstructionMap.entrySet().stream().mapToDouble(entry -> entry.getValue().stream().mapToDouble(ReconstructionStructuralInformation::getRmsdIncrease).max().orElse(0.0)).average().orElse(0.0);
    double standardDeviationMaximumRmsd = new StandardDeviation().evaluate(reconstructionMap.entrySet().stream().mapToDouble(entry -> entry.getValue().stream().mapToDouble(ReconstructionStructuralInformation::getRmsdIncrease).max().orElse(0.0)).toArray());
    List<ReconstructionStructuralInformation> topScoringReconstructions = reconstructionMap.values().stream().flatMap(Collection::stream).sorted(Comparator.comparingDouble(ReconstructionStructuralInformation::getRmsdIncrease).reversed()).limit((int) (0.1 * numberOfReconstructions)).collect(Collectors.toList());
    return reconstructionMap.entrySet().stream().map(entry -> {
        List<ReconstructionStructuralInformation> values = entry.getValue();
        ReconstructionStructuralInformation reference = values.get(0);
        return new ContactStructuralInformation(reference.getResidueIdentifier1(), reference.getAa1(), reference.getResidueIdentifier2(), reference.getAa2(), reference.getContactDistanceBin(), computeAverage(values, ReconstructionStructuralInformation::getBaselineRmsd), computeAverage(values, ReconstructionStructuralInformation::getBaselineTmScore), computeAverage(values, ReconstructionStructuralInformation::getBaselineQ), computeAverage(values, ReconstructionStructuralInformation::getRmsdIncrease), computeAverage(values, ReconstructionStructuralInformation::getTmScoreIncrease), computeAverage(values, ReconstructionStructuralInformation::getqIncrease), computeMaximum(values, ReconstructionStructuralInformation::getRmsdIncrease), computeMaximum(values, ReconstructionStructuralInformation::getTmScoreIncrease), computeMaximum(values, ReconstructionStructuralInformation::getqIncrease), residueIsInCollection(earlyFoldingResidues, entry.getKey().getLeft(), entry.getKey().getRight()), contactIsInCollection(earlyFoldingResidues, entry.getKey().getLeft(), entry.getKey().getRight()), averageRmsd, standardDeviationRmsd, averageMaximumRmsd, standardDeviationMaximumRmsd, reconstructionStructuralInformation, topScoringReconstructions, values.stream().map(ReconstructionStructuralInformation::getRmsdIncrease).collect(Collectors.toList()));
    }).collect(Collectors.toList());
}
Also used : java.util(java.util) Files(java.nio.file.Files) ResidueIdentifier(de.bioforscher.jstructure.model.identifier.ResidueIdentifier) Collectors(java.util.stream.Collectors) Pair(de.bioforscher.jstructure.mathematics.Pair) HotSpotScoring(de.bioforscher.jstructure.efr.model.HotSpotScoring) IdentifierFactory(de.bioforscher.jstructure.model.identifier.IdentifierFactory) Stream(java.util.stream.Stream) java.io(java.io) ContactDistanceBin(de.bioforscher.jstructure.efr.model.ContactDistanceBin) Group(de.bioforscher.jstructure.model.structure.Group) ToDoubleFunction(java.util.function.ToDoubleFunction) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) Chain(de.bioforscher.jstructure.model.structure.Chain) StandardDeviation(org.apache.commons.math3.stat.descriptive.moment.StandardDeviation) ReconstructionStructuralInformation(de.bioforscher.jstructure.efr.model.si.ReconstructionStructuralInformation) ResidueStructuralInformation(de.bioforscher.jstructure.efr.model.si.ResidueStructuralInformation) StandardFormat(de.bioforscher.jstructure.StandardFormat) ContactStructuralInformation(de.bioforscher.jstructure.efr.model.si.ContactStructuralInformation) Path(java.nio.file.Path) ReconstructionStructuralInformation(de.bioforscher.jstructure.efr.model.si.ReconstructionStructuralInformation) ContactStructuralInformation(de.bioforscher.jstructure.efr.model.si.ContactStructuralInformation) StandardDeviation(org.apache.commons.math3.stat.descriptive.moment.StandardDeviation) Pair(de.bioforscher.jstructure.mathematics.Pair)

Example 7 with ContactStructuralInformation

use of de.bioforscher.jstructure.efr.model.si.ContactStructuralInformation in project jstructure by JonStargaryen.

the class StructuralInformationParserServiceTest method shouldPrintStructuralInformationByResidue.

@Test
public void shouldPrintStructuralInformationByResidue() {
    Chain chain = StructureParser.fromPdbId("1bdd").parse().getFirstChain();
    List<AminoAcid> aminoAcids = chain.aminoAcids().collect(Collectors.toList());
    Start2FoldXmlParser.parseSpecificExperiment(chain, TestUtils.getResourceAsInputStream("efr/STF0045.xml"), Stream.of(185).collect(Collectors.toList()));
    List<AminoAcid> earlyFoldingResidues = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(Start2FoldResidueAnnotation.class).isEarly()).collect(Collectors.toList());
    List<ContactStructuralInformation> contactStructuralInformation = StructuralInformationParserService.getInstance().parseContactStructuralInformationFile(TestUtils.getResourceAsInputStream("si/STF0045.out"), chain, earlyFoldingResidues);
    List<ResidueStructuralInformation> residueStructuralInformation = StructuralInformationParserService.getInstance().composeResidueStructuralInformation(aminoAcids, earlyFoldingResidues, contactStructuralInformation);
    ResidueGraph residueGraph = ResidueGraph.createDistanceResidueGraph(chain);
    ResidueGraphCalculations residueGraphCalculations = new ResidueGraphCalculations(residueGraph);
    residueStructuralInformation.stream().map(si -> {
        AminoAcid aminoAcid = chain.select().residueIdentifier(IdentifierFactory.createResidueIdentifier(si.getResidueIdentifier())).asAminoAcid();
        double betweenness = residueGraphCalculations.betweenness(aminoAcid);
        double closeness = residueGraphCalculations.closeness(aminoAcid);
        double cc = residueGraphCalculations.clusteringCoefficient(aminoAcid);
        int degree = residueGraph.degreeOf(aminoAcid);
        return StandardFormat.format(si.getAverageRmsdIncrease()) + "," + StandardFormat.format(si.getAverageTmScoreIncrease()) + "," + StandardFormat.format(si.getAverageQIncrease()) + "," + StandardFormat.format(si.getMaximumRmsdIncrease()) + "," + StandardFormat.format(si.getMaximumTmScoreIncrease()) + "," + StandardFormat.format(si.getMaximumQIncrease()) + "," + StandardFormat.format(betweenness) + "," + StandardFormat.format(closeness) + "," + StandardFormat.format(cc) + "," + StandardFormat.format(degree);
    }).forEach(System.out::println);
}
Also used : StructuralInformationParserService(de.bioforscher.jstructure.efr.parser.StructuralInformationParserService) Start2FoldResidueAnnotation(de.bioforscher.jstructure.efr.model.Start2FoldResidueAnnotation) Test(org.junit.Test) TestUtils(de.bioforscher.testutil.TestUtils) StructureParser(de.bioforscher.jstructure.model.structure.StructureParser) Collectors(java.util.stream.Collectors) Start2FoldXmlParser(de.bioforscher.jstructure.efr.parser.Start2FoldXmlParser) Pair(de.bioforscher.jstructure.mathematics.Pair) IdentifierFactory(de.bioforscher.jstructure.model.identifier.IdentifierFactory) ResidueGraphCalculations(de.bioforscher.jstructure.graph.ResidueGraphCalculations) List(java.util.List) Stream(java.util.stream.Stream) ResidueGraph(de.bioforscher.jstructure.graph.ResidueGraph) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) Chain(de.bioforscher.jstructure.model.structure.Chain) ResidueStructuralInformation(de.bioforscher.jstructure.efr.model.si.ResidueStructuralInformation) StandardFormat(de.bioforscher.jstructure.StandardFormat) ContactStructuralInformation(de.bioforscher.jstructure.efr.model.si.ContactStructuralInformation) Chain(de.bioforscher.jstructure.model.structure.Chain) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) ResidueGraph(de.bioforscher.jstructure.graph.ResidueGraph) Start2FoldResidueAnnotation(de.bioforscher.jstructure.efr.model.Start2FoldResidueAnnotation) ResidueGraphCalculations(de.bioforscher.jstructure.graph.ResidueGraphCalculations) ResidueStructuralInformation(de.bioforscher.jstructure.efr.model.si.ResidueStructuralInformation) ContactStructuralInformation(de.bioforscher.jstructure.efr.model.si.ContactStructuralInformation) Test(org.junit.Test)

Example 8 with ContactStructuralInformation

use of de.bioforscher.jstructure.efr.model.si.ContactStructuralInformation in project jstructure by JonStargaryen.

the class StructuralInformationParserServiceTest method shouldPrintStructuralInformationByContact.

@Test
public void shouldPrintStructuralInformationByContact() {
    Chain chain = StructureParser.fromPdbId("1bdd").parse().getFirstChain();
    List<AminoAcid> aminoAcids = chain.aminoAcids().collect(Collectors.toList());
    Start2FoldXmlParser.parseSpecificExperiment(chain, TestUtils.getResourceAsInputStream("efr/STF0045.xml"), Stream.of(185).collect(Collectors.toList()));
    List<AminoAcid> earlyFoldingResidues = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(Start2FoldResidueAnnotation.class).isEarly()).collect(Collectors.toList());
    List<ContactStructuralInformation> contactStructuralInformation = StructuralInformationParserService.getInstance().parseContactStructuralInformationFile(TestUtils.getResourceAsInputStream("si/STF0045.out"), chain, earlyFoldingResidues);
    ResidueGraph residueGraph = ResidueGraph.createDistanceResidueGraph(chain);
    ResidueGraphCalculations residueGraphCalculations = new ResidueGraphCalculations(residueGraph);
    contactStructuralInformation.stream().map(si -> {
        AminoAcid aminoAcid1 = chain.select().residueIdentifier(IdentifierFactory.createResidueIdentifier(si.getResidueIdentifier1())).asAminoAcid();
        AminoAcid aminoAcid2 = chain.select().residueIdentifier(IdentifierFactory.createResidueIdentifier(si.getResidueIdentifier2())).asAminoAcid();
        Pair<AminoAcid, AminoAcid> pair = new Pair<>(aminoAcid1, aminoAcid2);
        double betweenness = residueGraphCalculations.betweenness(pair);
        return StandardFormat.format(si.getAverageRmsdIncrease()) + "," + StandardFormat.format(si.getAverageTmScoreIncrease()) + "," + StandardFormat.format(si.getAverageQIncrease()) + "," + StandardFormat.format(si.getMaximumRmsdIncrease()) + "," + StandardFormat.format(si.getMaximumTmScoreIncrease()) + "," + StandardFormat.format(si.getMaximumQIncrease()) + "," + StandardFormat.format(betweenness);
    }).forEach(System.out::println);
}
Also used : StructuralInformationParserService(de.bioforscher.jstructure.efr.parser.StructuralInformationParserService) Start2FoldResidueAnnotation(de.bioforscher.jstructure.efr.model.Start2FoldResidueAnnotation) Test(org.junit.Test) TestUtils(de.bioforscher.testutil.TestUtils) StructureParser(de.bioforscher.jstructure.model.structure.StructureParser) Collectors(java.util.stream.Collectors) Start2FoldXmlParser(de.bioforscher.jstructure.efr.parser.Start2FoldXmlParser) Pair(de.bioforscher.jstructure.mathematics.Pair) IdentifierFactory(de.bioforscher.jstructure.model.identifier.IdentifierFactory) ResidueGraphCalculations(de.bioforscher.jstructure.graph.ResidueGraphCalculations) List(java.util.List) Stream(java.util.stream.Stream) ResidueGraph(de.bioforscher.jstructure.graph.ResidueGraph) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) Chain(de.bioforscher.jstructure.model.structure.Chain) ResidueStructuralInformation(de.bioforscher.jstructure.efr.model.si.ResidueStructuralInformation) StandardFormat(de.bioforscher.jstructure.StandardFormat) ContactStructuralInformation(de.bioforscher.jstructure.efr.model.si.ContactStructuralInformation) Chain(de.bioforscher.jstructure.model.structure.Chain) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) ResidueGraph(de.bioforscher.jstructure.graph.ResidueGraph) Start2FoldResidueAnnotation(de.bioforscher.jstructure.efr.model.Start2FoldResidueAnnotation) ResidueGraphCalculations(de.bioforscher.jstructure.graph.ResidueGraphCalculations) ContactStructuralInformation(de.bioforscher.jstructure.efr.model.si.ContactStructuralInformation) Pair(de.bioforscher.jstructure.mathematics.Pair) Test(org.junit.Test)

Example 9 with ContactStructuralInformation

use of de.bioforscher.jstructure.efr.model.si.ContactStructuralInformation in project jstructure by JonStargaryen.

the class EvolutionaryCouplingParser method parsePlmScore.

public static void parsePlmScore(List<ContactStructuralInformation> contacts, Document document, int numberOfResidues) {
    Element table = document.getElementsByTag("table").first();
    contacts.forEach(contact -> parsePlmScore(contact, table));
    contacts.sort(Comparator.comparingDouble(ContactStructuralInformation::getCouplingRank));
    double fractionTopScoring02 = 0.2;
    int contactsToSelect02 = (int) (fractionTopScoring02 * numberOfResidues);
    contacts.stream().filter(contactStructuralInformation -> contactStructuralInformation.getCouplingRank() > 0).limit(contactsToSelect02).forEach(ContactStructuralInformation::markAsTopScoringContact02);
    double fractionTopScoring04 = 0.4;
    int contactsToSelect04 = (int) (fractionTopScoring04 * numberOfResidues);
    contacts.stream().filter(contactStructuralInformation -> contactStructuralInformation.getCouplingRank() > 0).limit(contactsToSelect04).forEach(ContactStructuralInformation::markAsTopScoringContact04);
    double fractionTopScoring06 = 0.6;
    int contactsToSelect06 = (int) (fractionTopScoring06 * numberOfResidues);
    contacts.stream().filter(contactStructuralInformation -> contactStructuralInformation.getCouplingRank() > 0).limit(contactsToSelect06).forEach(ContactStructuralInformation::markAsTopScoringContact06);
    double fractionTopScoring08 = 0.8;
    int contactsToSelect08 = (int) (fractionTopScoring08 * numberOfResidues);
    contacts.stream().filter(contactStructuralInformation -> contactStructuralInformation.getCouplingRank() > 0).limit(contactsToSelect08).forEach(ContactStructuralInformation::markAsTopScoringContact08);
    double fractionTopScoring10 = 1.0;
    int contactsToSelect10 = (int) (fractionTopScoring10 * numberOfResidues);
    contacts.stream().filter(contactStructuralInformation -> contactStructuralInformation.getCouplingRank() > 0).limit(contactsToSelect10).forEach(ContactStructuralInformation::markAsTopScoringContact10);
    double fractionTopScoring12 = 1.2;
    int contactsToSelect12 = (int) (fractionTopScoring12 * numberOfResidues);
    contacts.stream().filter(contactStructuralInformation -> contactStructuralInformation.getCouplingRank() > 0).limit(contactsToSelect12).forEach(ContactStructuralInformation::markAsTopScoringContact12);
    double fractionTopScoring14 = 1.4;
    int contactsToSelect14 = (int) (fractionTopScoring14 * numberOfResidues);
    contacts.stream().filter(contactStructuralInformation -> contactStructuralInformation.getCouplingRank() > 0).limit(contactsToSelect14).forEach(ContactStructuralInformation::markAsTopScoringContact14);
    double fractionTopScoring16 = 1.6;
    int contactsToSelect16 = (int) (fractionTopScoring16 * numberOfResidues);
    contacts.stream().filter(contactStructuralInformation -> contactStructuralInformation.getCouplingRank() > 0).limit(contactsToSelect16).forEach(ContactStructuralInformation::markAsTopScoringContact16);
}
Also used : Element(org.jsoup.nodes.Element) ContactStructuralInformation(de.bioforscher.jstructure.efr.model.si.ContactStructuralInformation)

Example 10 with ContactStructuralInformation

use of de.bioforscher.jstructure.efr.model.si.ContactStructuralInformation in project jstructure by JonStargaryen.

the class A07_WriteStructuralInformationByContactCsv method handleLine.

private static Optional<String> handleLine(String line) {
    try {
        System.out.println(line);
        String[] split = line.split(";");
        String entryId = split[0];
        String pdbId = split[1];
        List<Integer> experimentIds = Pattern.compile(",").splitAsStream(split[2].replaceAll("\\[", "").replaceAll("]", "")).map(Integer::valueOf).collect(Collectors.toList());
        // boolean sane = split[6].equalsIgnoreCase("true");
        Structure structure = StructureParser.fromPdbId(pdbId).parse();
        Chain chain = structure.chains().findFirst().get();
        LinearAlgebra.PrimitiveDoubleArrayLinearAlgebra centroid = chain.calculate().centroid();
        Path start2foldXml = Start2FoldConstants.XML_DIRECTORY.resolve(entryId + ".xml");
        Start2FoldXmlParser.parseStability(chain, start2foldXml);
        Start2FoldXmlParser.parseSpecificExperiment(chain, start2foldXml, experimentIds);
        List<AminoAcid> earlyFoldingResidues = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(Start2FoldResidueAnnotation.class).isEarly()).collect(Collectors.toList());
        List<Integer> functionalResidueNumbers = Start2FoldConstants.extractFunctionalResidueNumbers(split);
        List<AminoAcid> functionalResidues = new ArrayList<>();
        // do nothing if no annotation of functional residues exists
        if (!functionalResidueNumbers.isEmpty()) {
            FunctionalResidueParser.parse(chain, functionalResidueNumbers);
            chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(FunctionalResidueAnnotation.class).isFunctional()).forEach(functionalResidues::add);
        }
        List<AminoAcid> strongResidues = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(Start2FoldResidueAnnotation.class).isStrong()).collect(Collectors.toList());
        List<AminoAcid> orderedResidues = chain.aminoAcids().filter(aminoAcid -> !aminoAcid.getFeature(GenericSecondaryStructure.class).getSecondaryStructure().isCoilType()).collect(Collectors.toList());
        List<AminoAcid> buriedResidues = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(AccessibleSurfaceArea.class).isBuried()).collect(Collectors.toList());
        List<AminoAcid> residuesInEarlyFoldingSecondaryStructureElements = chain.aminoAcids().filter(aminoAcid -> !aminoAcid.getFeature(GenericSecondaryStructure.class).getSecondaryStructure().isCoilType()).filter(aminoAcid -> {
            GenericSecondaryStructure.SecondaryStructureElement surroundingSecondaryStructureElement = aminoAcid.getFeature(GenericSecondaryStructure.class).getSurroundingSecondaryStructureElement(aminoAcid);
            List<AminoAcid> surroundingAminoAcids = chain.getAminoAcids().subList(surroundingSecondaryStructureElement.getStart(), surroundingSecondaryStructureElement.getEnd() + 1);
            return surroundingAminoAcids.stream().anyMatch(earlyFoldingResidues::contains);
        }).collect(Collectors.toList());
        List<AminoAcid> aromaticResidues = chain.aminoAcids().filter(AminoAcid.Filter.AROMATIC).collect(Collectors.toList());
        List<ContactStructuralInformation> contactStructuralInformation = StructuralInformationParserService.getInstance().parseContactStructuralInformation(Start2FoldConstants.DATA_DIRECTORY.resolve("si").resolve("raw").resolve(entryId.toUpperCase() + ".out"), chain, earlyFoldingResidues);
        ResidueGraph conventionalProteinGraph = ResidueGraph.createResidueGraph(chain, ContactDefinitionFactory.createAlphaCarbonContactDefinition(8.0));
        ResidueGraphCalculations residueGraphCalculations = new ResidueGraphCalculations(conventionalProteinGraph);
        try {
            EvolutionaryCouplingParser.parsePlmScore(contactStructuralInformation, Jsoup.parse(Start2FoldConstants.newInputStream(Start2FoldConstants.COUPLING_DIRECTORY.resolve(entryId + "_ec.html")), "UTF-8", ""), chain.getAminoAcids().size());
        } catch (Exception e) {
        }
        boolean ecAnnotation = contactStructuralInformation.stream().anyMatch(csi -> csi.getPlmScore() != 0.0);
        PLIPInteractionContainer plipInteractionContainer = chain.getFeature(PLIPInteractionContainer.class);
        System.out.println("efr: " + (earlyFoldingResidues.size() > 0) + " strong: " + (strongResidues.size() > 0) + " functional: " + (functionalResidues.size() > 0) + " couplings: " + ecAnnotation);
        return Optional.of(contactStructuralInformation.stream().map(contact -> {
            AminoAcid aminoAcid1 = chain.select().residueNumber(contact.getResidueIdentifier1()).asAminoAcid();
            AminoAcid aminoAcid2 = chain.select().residueNumber(contact.getResidueIdentifier2()).asAminoAcid();
            Pair<AminoAcid, AminoAcid> pair = new Pair<>(aminoAcid1, aminoAcid2);
            ResidueTopologicPropertiesContainer residueTopologicPropertiesContainer1 = aminoAcid1.getFeature(ResidueTopologicPropertiesContainer.class);
            ResidueTopologicPropertiesContainer residueTopologicPropertiesContainer2 = aminoAcid1.getFeature(ResidueTopologicPropertiesContainer.class);
            LinearAlgebra.PrimitiveDoubleArrayLinearAlgebra contactCentroid = aminoAcid1.calculate().centroid().add(aminoAcid2.calculate().centroid()).divide(2);
            return pdbId + "," + "A" + "," + contact.getResidueIdentifier1() + "," + contact.getAa1() + "," + contact.getResidueIdentifier2() + "," + contact.getAa2() + "," + contact.getContactDistanceBin() + "," + (contact.getContactDistanceBin() == ContactDistanceBin.LONG) + "," + (contact.getContactDistanceBin() == ContactDistanceBin.MEDIUM) + "," + (contact.getContactDistanceBin() == ContactDistanceBin.SHORT) + "," + StandardFormat.format(contactCentroid.distance(centroid)) + "," + StandardFormat.format(contact.getAverageRmsdIncrease()) + "," + StandardFormat.format(contact.getAverageTmScoreIncrease()) + "," + StandardFormat.format(contact.getAverageQIncrease()) + "," + StandardFormat.format(contact.getMaximumRmsdIncrease()) + "," + StandardFormat.format(contact.getMaximumTmScoreIncrease()) + "," + StandardFormat.format(contact.getMaximumQIncrease()) + "," + StandardFormat.format(contact.getAverageRmsdIncreaseZScore()) + "," + contact.getFractionOfTopScoringContacts() + "," + StandardFormat.format(contact.getPlmScore()) + "," + contact.getCouplingRank() + "," + contact.istop02() + "," + contact.isTop04() + "," + contact.isTop06() + "," + contact.isTop08() + "," + contact.isTop10() + "," + contact.isTop12() + "," + contact.isTop14() + "," + contact.isTop16() + "," + StandardFormat.format(residueGraphCalculations.betweenness(pair)) + "," + StandardFormat.format(0.5 * residueTopologicPropertiesContainer1.getConventional().getBetweenness() + 0.5 * residueTopologicPropertiesContainer2.getConventional().getBetweenness()) + "," + StandardFormat.format(0.5 * residueTopologicPropertiesContainer1.getConventional().getCloseness() + 0.5 * residueTopologicPropertiesContainer2.getConventional().getCloseness()) + "," + StandardFormat.format(0.5 * residueTopologicPropertiesContainer1.getConventional().getClusteringCoefficient() + 0.5 * residueTopologicPropertiesContainer2.getConventional().getClusteringCoefficient()) + "," + plipInteractionContainer.getHydrogenBonds().stream().anyMatch(hydrogenBond -> isContact(hydrogenBond, aminoAcid1, aminoAcid2)) + "," + plipInteractionContainer.getHydrophobicInteractions().stream().anyMatch(hydrophobicInteraction -> isContact(hydrophobicInteraction, aminoAcid1, aminoAcid2)) + "," + contact.isEarlyFoldingResidue() + "," + contact.isEarlyFoldingContact() + "," + residueIsInCollection(functionalResidues, contact.getResidueIdentifier1(), contact.getResidueIdentifier2()) + "," + contactIsInCollection(functionalResidues, contact.getResidueIdentifier1(), contact.getResidueIdentifier2()) + "," + residueIsInCollection(strongResidues, contact.getResidueIdentifier1(), contact.getResidueIdentifier2()) + "," + contactIsInCollection(strongResidues, contact.getResidueIdentifier1(), contact.getResidueIdentifier2()) + "," + residueIsInCollection(buriedResidues, contact.getResidueIdentifier1(), contact.getResidueIdentifier2()) + "," + contactIsInCollection(buriedResidues, contact.getResidueIdentifier1(), contact.getResidueIdentifier2()) + "," + residueIsInCollection(orderedResidues, contact.getResidueIdentifier1(), contact.getResidueIdentifier2()) + "," + contactIsInCollection(orderedResidues, contact.getResidueIdentifier1(), contact.getResidueIdentifier2()) + "," + residueIsInCollection(residuesInEarlyFoldingSecondaryStructureElements, contact.getResidueIdentifier1(), contact.getResidueIdentifier2()) + "," + contactIsInCollection(residuesInEarlyFoldingSecondaryStructureElements, contact.getResidueIdentifier1(), contact.getResidueIdentifier2()) + "," + residueIsInCollection(aromaticResidues, contact.getResidueIdentifier1(), contact.getResidueIdentifier2()) + "," + contactIsInCollection(aromaticResidues, contact.getResidueIdentifier1(), contact.getResidueIdentifier2()) + "," + (earlyFoldingResidues.size() > 0) + "," + (strongResidues.size() > 0) + "," + (functionalResidues.size() > 0) + "," + ecAnnotation;
        }).collect(Collectors.joining(System.lineSeparator())));
    } catch (Exception e) {
        logger.info("calculation failed for {}\nby: {}", line, e.getMessage());
        return Optional.empty();
    }
}
Also used : FunctionalResidueParser(de.bioforscher.jstructure.efr.parser.FunctionalResidueParser) LinearAlgebra(de.bioforscher.jstructure.mathematics.LinearAlgebra) StructuralInformationParserService(de.bioforscher.jstructure.efr.parser.StructuralInformationParserService) PLIPInteraction(de.bioforscher.jstructure.feature.interaction.PLIPInteraction) ResidueIdentifier(de.bioforscher.jstructure.model.identifier.ResidueIdentifier) LoggerFactory(org.slf4j.LoggerFactory) Structure(de.bioforscher.jstructure.model.structure.Structure) GenericSecondaryStructure(de.bioforscher.jstructure.feature.sse.GenericSecondaryStructure) StructureParser(de.bioforscher.jstructure.model.structure.StructureParser) ArrayList(java.util.ArrayList) ContactDistanceBin(de.bioforscher.jstructure.efr.model.ContactDistanceBin) Group(de.bioforscher.jstructure.model.structure.Group) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) Chain(de.bioforscher.jstructure.model.structure.Chain) StandardFormat(de.bioforscher.jstructure.StandardFormat) Path(java.nio.file.Path) Logger(org.slf4j.Logger) Start2FoldResidueAnnotation(de.bioforscher.jstructure.efr.model.Start2FoldResidueAnnotation) Files(java.nio.file.Files) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) FunctionalResidueAnnotation(de.bioforscher.jstructure.efr.model.FunctionalResidueAnnotation) Start2FoldXmlParser(de.bioforscher.jstructure.efr.parser.Start2FoldXmlParser) Pair(de.bioforscher.jstructure.mathematics.Pair) Start2FoldConstants(de.bioforscher.jstructure.efr.Start2FoldConstants) ResidueGraphCalculations(de.bioforscher.jstructure.graph.ResidueGraphCalculations) List(java.util.List) AccessibleSurfaceArea(de.bioforscher.jstructure.feature.asa.AccessibleSurfaceArea) ResidueTopologicPropertiesContainer(de.bioforscher.jstructure.graph.ResidueTopologicPropertiesContainer) ResidueGraph(de.bioforscher.jstructure.graph.ResidueGraph) EvolutionaryCouplingParser(de.bioforscher.jstructure.efr.parser.EvolutionaryCouplingParser) Optional(java.util.Optional) Jsoup(org.jsoup.Jsoup) Pattern(java.util.regex.Pattern) ContactStructuralInformation(de.bioforscher.jstructure.efr.model.si.ContactStructuralInformation) PLIPInteractionContainer(de.bioforscher.jstructure.feature.interaction.PLIPInteractionContainer) ContactDefinitionFactory(de.bioforscher.jstructure.graph.contact.definition.ContactDefinitionFactory) Chain(de.bioforscher.jstructure.model.structure.Chain) ArrayList(java.util.ArrayList) GenericSecondaryStructure(de.bioforscher.jstructure.feature.sse.GenericSecondaryStructure) ContactStructuralInformation(de.bioforscher.jstructure.efr.model.si.ContactStructuralInformation) ArrayList(java.util.ArrayList) List(java.util.List) Structure(de.bioforscher.jstructure.model.structure.Structure) GenericSecondaryStructure(de.bioforscher.jstructure.feature.sse.GenericSecondaryStructure) AccessibleSurfaceArea(de.bioforscher.jstructure.feature.asa.AccessibleSurfaceArea) Pair(de.bioforscher.jstructure.mathematics.Pair) Path(java.nio.file.Path) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) Start2FoldResidueAnnotation(de.bioforscher.jstructure.efr.model.Start2FoldResidueAnnotation) ResidueTopologicPropertiesContainer(de.bioforscher.jstructure.graph.ResidueTopologicPropertiesContainer) FunctionalResidueAnnotation(de.bioforscher.jstructure.efr.model.FunctionalResidueAnnotation) IOException(java.io.IOException) LinearAlgebra(de.bioforscher.jstructure.mathematics.LinearAlgebra) ResidueGraph(de.bioforscher.jstructure.graph.ResidueGraph) PLIPInteractionContainer(de.bioforscher.jstructure.feature.interaction.PLIPInteractionContainer) ResidueGraphCalculations(de.bioforscher.jstructure.graph.ResidueGraphCalculations)

Aggregations

ContactStructuralInformation (de.bioforscher.jstructure.efr.model.si.ContactStructuralInformation)10 Chain (de.bioforscher.jstructure.model.structure.Chain)9 AminoAcid (de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid)9 Collectors (java.util.stream.Collectors)9 StandardFormat (de.bioforscher.jstructure.StandardFormat)7 List (java.util.List)7 Pair (de.bioforscher.jstructure.mathematics.Pair)6 Start2FoldResidueAnnotation (de.bioforscher.jstructure.efr.model.Start2FoldResidueAnnotation)5 ResidueStructuralInformation (de.bioforscher.jstructure.efr.model.si.ResidueStructuralInformation)5 ResidueGraph (de.bioforscher.jstructure.graph.ResidueGraph)5 StructureParser (de.bioforscher.jstructure.model.structure.StructureParser)5 Files (java.nio.file.Files)5 Path (java.nio.file.Path)5 Stream (java.util.stream.Stream)5 Start2FoldConstants (de.bioforscher.jstructure.efr.Start2FoldConstants)4 Start2FoldXmlParser (de.bioforscher.jstructure.efr.parser.Start2FoldXmlParser)4 StructuralInformationParserService (de.bioforscher.jstructure.efr.parser.StructuralInformationParserService)4 TestUtils (de.bioforscher.testutil.TestUtils)4 ContactDistanceBin (de.bioforscher.jstructure.efr.model.ContactDistanceBin)3 HotSpotScoring (de.bioforscher.jstructure.efr.model.HotSpotScoring)3