Search in sources :

Example 26 with AminoAcid

use of de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid in project jstructure by JonStargaryen.

the class EarlyFoldingClassificationRunner method process.

public void process(Structure structure, Path outputPath) throws IOException {
    // report structure characteristics
    System.out.println("structure: " + structure.getProteinIdentifier().getFullName() + "\n" + "chains: " + structure.chainsWithAminoAcids().map(Chain::getChainIdentifier).map(ChainIdentifier::getChainId).collect(Collectors.toList()) + "\n" + "total residues: " + structure.aminoAcids().count());
    System.out.println();
    // compute features
    System.out.println("computing residue-level features");
    // start with PLIP to fail fast
    System.out.println("querying PLIP-REST-Service");
    try {
        // try to annotate by standard routine
        PLIP_INTRA_MOLECULAR_ANNOTATOR.process(structure);
        System.out.println("fetched PLIP contacts");
    } catch (Exception e1) {
        try {
            // potential non-pdb-entry, try to compute on-the-fly
            structure.chainsWithAminoAcids().forEach(chain -> {
                Document document = PLIPRestServiceQuery.calculateIntraChainDocument(chain);
                PLIP_INTRA_MOLECULAR_ANNOTATOR.process(chain, document);
            });
            System.out.println("computed PLIP contacts");
        } catch (Exception e2) {
            System.out.println("failed: could not compute PLIP contacts");
            e2.printStackTrace();
            return;
        }
    }
    System.out.println("computing energy profiles");
    EGOR_AGREEMENT_CALCULATOR.process(structure);
    System.out.println("annotating secondary structure elements");
    LOOP_FRACTION_CALCULATOR.process(structure);
    System.out.println("computing relative accessible surface area");
    ACCESSIBLE_SURFACE_AREA_CALCULATOR.process(structure);
    // assign feature vectors
    structure.aminoAcids().forEach(RawFeatureVector::assignRawFeatureVector);
    // smooth feature vectors
    structure.chainsWithAminoAcids().forEach(chain -> {
        List<AminoAcid> aminoAcids = chain.aminoAcids().collect(Collectors.toList());
        aminoAcids.forEach(aminoAcid -> {
            SmoothedFeatureVector.assignSmoothedFeatureVector(aminoAcids, aminoAcid);
        });
    });
    // classify each residue
    StringJoiner outputJoiner = new StringJoiner(System.lineSeparator());
    // print header
    outputJoiner.add("structure: '" + structure.getProteinIdentifier().getFullName() + "'").add("chains: " + structure.chainsWithAminoAcids().map(Chain::getChainIdentifier).map(ChainIdentifier::getChainId).collect(Collectors.toList())).add("total residues: " + structure.aminoAcids().count()).add("chain,res,aa,sse,energy,egor,sse_size,loop_fraction,rasa,plip_local_contacts," + "plip_local_hbonds,plip_local_hydrophobic,plip_local_backbone,plip_long_range_contacts," + "plip_long_range_hbonds,plip_long_range_hydrophobic,plip_long_range_backbone," + "plip_betweenness,plip_closeness,plip_clusteringcoefficient,plip_hbonds_betweenness," + "plip_hbonds_closeness,plip_hbonds_clusteringcoefficient,plip_hydrophobic_betweenness," + "plip_hydrophobic_closeness,plip_hydrophobic_clusteringcoefficient,conv_betweenness," + "conv_closeness,conv_clusteringcoefficient,plip_neighborhoods,conv_neighborhoods,prob,folds");
    structure.chainsWithAminoAcids().forEach(chain -> {
        List<String> output = structure.aminoAcids().map(aminoAcid -> {
            boolean isProline = aminoAcid instanceof Proline;
            SmoothedFeatureVector smoothedFeatureVector = aminoAcid.getFeature(SmoothedFeatureVector.class);
            double loopFraction = aminoAcid.getFeature(LoopFraction.class).getLoopFraction();
            Instance instance = createInstance(smoothedFeatureVector, loopFraction);
            double prob = 0.0;
            if (!isProline) {
                try {
                    prob = model.distributionForInstance(normalize(instance))[0];
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
            StringJoiner lineJoiner = new StringJoiner(",");
            lineJoiner.add(aminoAcid.getParentChain().getChainIdentifier().getChainId()).add(aminoAcid.getResidueIdentifier().toString()).add(aminoAcid.getOneLetterCode()).add(aminoAcid.getFeature(GenericSecondaryStructure.class).getSecondaryStructure().getReducedRepresentation());
            for (int i = 0; i < instance.numAttributes() - 1; i++) {
                lineJoiner.add(StandardFormat.format(instance.value(i)));
            }
            lineJoiner.add(StandardFormat.format(prob));
            return lineJoiner.toString();
        }).sorted(Comparator.comparingDouble((String line) -> Double.valueOf(line.split(",")[line.split(",").length - 1])).reversed()).collect(Collectors.toList());
        int numberOfEarlyFoldingResidues = (int) (0.15 * (int) chain.aminoAcids().count());
        int counter = 0;
        for (int i = 0; i < chain.aminoAcids().count(); i++) {
            outputJoiner.add(output.get(i) + "," + (counter < numberOfEarlyFoldingResidues ? "early" : "late"));
            counter++;
        }
    });
    // write output
    System.out.println("writing output to " + outputPath);
    Files.write(outputPath, outputJoiner.toString().getBytes());
}
Also used : java.util(java.util) LoopFraction(de.bioforscher.jstructure.feature.loopfraction.LoopFraction) Structure(de.bioforscher.jstructure.model.structure.Structure) GenericSecondaryStructure(de.bioforscher.jstructure.feature.sse.GenericSecondaryStructure) StructureParser(de.bioforscher.jstructure.model.structure.StructureParser) LoopFractionCalculator(de.bioforscher.jstructure.feature.loopfraction.LoopFractionCalculator) Classifier(weka.classifiers.Classifier) EgorAgreementCalculator(de.bioforscher.jstructure.feature.energyprofile.EgorAgreementCalculator) ProteinIdentifier(de.bioforscher.jstructure.model.identifier.ProteinIdentifier) ChainIdentifier(de.bioforscher.jstructure.model.identifier.ChainIdentifier) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) Chain(de.bioforscher.jstructure.model.structure.Chain) PLIPRestServiceQuery(de.bioforscher.jstructure.feature.interactions.PLIPRestServiceQuery) StandardFormat(de.bioforscher.jstructure.StandardFormat) PLIPIntraMolecularAnnotator(de.bioforscher.jstructure.feature.interactions.PLIPIntraMolecularAnnotator) Path(java.nio.file.Path) RawFeatureVector(de.bioforscher.start2fold.model.vector.RawFeatureVector) Proline(de.bioforscher.jstructure.model.structure.aminoacid.Proline) Files(java.nio.file.Files) AccessibleSurfaceAreaCalculator(de.bioforscher.jstructure.feature.asa.AccessibleSurfaceAreaCalculator) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) IdentifierFactory(de.bioforscher.jstructure.model.identifier.IdentifierFactory) Paths(java.nio.file.Paths) Document(org.jsoup.nodes.Document) weka.core(weka.core) SmoothedFeatureVector(de.bioforscher.start2fold.model.vector.SmoothedFeatureVector) InputStream(java.io.InputStream) Chain(de.bioforscher.jstructure.model.structure.Chain) ChainIdentifier(de.bioforscher.jstructure.model.identifier.ChainIdentifier) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) SmoothedFeatureVector(de.bioforscher.start2fold.model.vector.SmoothedFeatureVector) Proline(de.bioforscher.jstructure.model.structure.aminoacid.Proline) GenericSecondaryStructure(de.bioforscher.jstructure.feature.sse.GenericSecondaryStructure) Document(org.jsoup.nodes.Document) IOException(java.io.IOException) LoopFraction(de.bioforscher.jstructure.feature.loopfraction.LoopFraction) RawFeatureVector(de.bioforscher.start2fold.model.vector.RawFeatureVector)

Example 27 with AminoAcid

use of de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid in project jstructure by JonStargaryen.

the class Start2FoldXmlParser method assignValues.

private static void assignValues(Experiment experiment, Chain chain) {
    String pdbSequence = chain.getAminoAcidSequence();
    String experimentSequence = experiment.getSequence();
    // align sequences to ensure correct mapping
    SequencePair<ProteinSequence, AminoAcidCompound> pair = null;
    try {
        pair = Alignments.getPairwiseAlignment(new ProteinSequence(experimentSequence), new ProteinSequence(pdbSequence), Alignments.PairwiseSequenceAlignerType.GLOBAL, new SimpleGapPenalty(), SubstitutionMatrixHelper.getBlosum62());
        // logger.info("alignment:{}{}",
        // System.lineSeparator(),
        // pair.toString());
        List<AminoAcid> aminoAcids = chain.aminoAcids().collect(Collectors.toList());
        for (Experiment.Residue residue : experiment.getResidues()) {
            int experimentIndex = residue.getIndex() - 1;
            try {
                int pdbIndex;
                if (residue.getCode().equals("P") && residue.getIndex() == 1) {
                    // super-russian fix for STF0017 where the alignment should match theoretically
                    pdbIndex = 0;
                } else {
                    pdbIndex = pair.getIndexInTargetForQueryAt(experimentIndex);
                }
                AminoAcid aminoAcid = aminoAcids.get(pdbIndex);
                // logger.debug("mapped experiment {}-{} onto PDB {}-{}",
                // residue.getCode(),
                // residue.getIndex(),
                // aminoAcid.getOneLetterCode(),
                // aminoAcid.getResidueIdentifier());
                // if(!residue.getCode().equals(aminoAcid.getOneLetterCode())) {
                // logger.warn("alignment:{}{}",
                // System.lineSeparator(),
                // pair.toString());
                // logger.warn("could not map data correctly for {}-{} and {}-{}",
                // residue.getCode(),
                // residue.getIndex(),
                // aminoAcid.getOneLetterCode(),
                // aminoAcid.getResidueIdentifier());
                // }
                // ignore: STF0034 (cannot align)
                // assign experiment-specific protection level to residue
                aminoAcid.getFeature(Start2FoldResidueAnnotation.class).addProtectionLevelEntry(Stream.of(ProtectionLevel.values()).filter(pl -> pl == experiment.getProtectionLevel()).findFirst().get());
            } catch (Exception e) {
                // residue not present in structure - e.g. for STF0031 and STF0032
                logger.warn("alignment:{}{}", System.lineSeparator(), pair.toString());
                logger.warn("failed to map residue {}-{}", residue.getCode(), residue.getIndex(), e);
            }
        }
    } catch (CompoundNotFoundException e) {
        throw new IllegalArgumentException(e);
    }
}
Also used : LoggerFactory(org.slf4j.LoggerFactory) ProteinSequence(org.biojava.nbio.core.sequence.ProteinSequence) SimpleGapPenalty(org.biojava.nbio.alignment.SimpleGapPenalty) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) Chain(de.bioforscher.jstructure.model.structure.Chain) SubstitutionMatrixHelper(org.biojava.nbio.core.alignment.matrices.SubstitutionMatrixHelper) CompoundNotFoundException(org.biojava.nbio.core.exceptions.CompoundNotFoundException) Path(java.nio.file.Path) Logger(org.slf4j.Logger) Experiment(de.bioforscher.start2fold.model.Experiment) Files(java.nio.file.Files) SequencePair(org.biojava.nbio.core.alignment.template.SequencePair) AminoAcidCompound(org.biojava.nbio.core.sequence.compound.AminoAcidCompound) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) UncheckedIOException(java.io.UncheckedIOException) Start2FoldResidueAnnotation(de.bioforscher.start2fold.model.Start2FoldResidueAnnotation) List(java.util.List) Stream(java.util.stream.Stream) ProtectionLevel(de.bioforscher.start2fold.model.ProtectionLevel) Alignments(org.biojava.nbio.alignment.Alignments) Document(org.jsoup.nodes.Document) Jsoup(org.jsoup.Jsoup) Elements(org.jsoup.select.Elements) InputStream(java.io.InputStream) CompoundNotFoundException(org.biojava.nbio.core.exceptions.CompoundNotFoundException) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) Experiment(de.bioforscher.start2fold.model.Experiment) Start2FoldResidueAnnotation(de.bioforscher.start2fold.model.Start2FoldResidueAnnotation) SimpleGapPenalty(org.biojava.nbio.alignment.SimpleGapPenalty) CompoundNotFoundException(org.biojava.nbio.core.exceptions.CompoundNotFoundException) IOException(java.io.IOException) UncheckedIOException(java.io.UncheckedIOException) ProteinSequence(org.biojava.nbio.core.sequence.ProteinSequence) AminoAcidCompound(org.biojava.nbio.core.sequence.compound.AminoAcidCompound)

Example 28 with AminoAcid

use of de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid in project jstructure by JonStargaryen.

the class A01_CreateContactMaps method mapToAdjacentAminoAcid.

private static Optional<AminoAcid> mapToAdjacentAminoAcid(Pair<AminoAcid, AminoAcid> contact, List<AminoAcid> selectedAminoAcids) {
    AminoAcid aminoAcid1 = contact.getLeft();
    AminoAcid aminoAcid2 = contact.getRight();
    if (selectedAminoAcids.contains(aminoAcid1) && selectedAminoAcids.contains(aminoAcid2)) {
        return Optional.empty();
    }
    if (!selectedAminoAcids.contains(aminoAcid1) && !selectedAminoAcids.contains(aminoAcid2)) {
        return Optional.empty();
    }
    if (selectedAminoAcids.contains(aminoAcid1)) {
        return Optional.of(aminoAcid2);
    }
    return Optional.of(aminoAcid1);
}
Also used : AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid)

Example 29 with AminoAcid

use of de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid in project jstructure by JonStargaryen.

the class A04_WriteTransitionStateCsv method handleLineLocally.

private static Optional<String> handleLineLocally(String line) {
    try {
        System.out.println(line);
        String[] split = line.split(";");
        String entryId = split[0];
        String pdbId = split[1];
        List<Integer> experimentIds = Pattern.compile(",").splitAsStream(split[2].replaceAll("\\[", "").replaceAll("]", "")).map(Integer::valueOf).collect(Collectors.toList());
        Structure structure = StructureParser.fromPdbId(pdbId).parse();
        Chain originalChain = structure.chains().findFirst().get();
        ProteinGraph originalFullPlipGraph = ProteinGraphFactory.createProteinGraph(originalChain, ProteinGraphFactory.InteractionScheme.SALENTIN2015);
        ProteinGraph originalHydrogenPlipGraph = ProteinGraphFactory.createProteinGraph(originalChain, ProteinGraphFactory.InteractionScheme.SALENTIN2015_HYDROGEN_BONDS);
        ProteinGraph originalHydrophobicPlipGraph = ProteinGraphFactory.createProteinGraph(originalChain, ProteinGraphFactory.InteractionScheme.SALENTIN2015_HYDROPHOBIC_INTERACTION);
        ProteinGraph originalConvGraph = ProteinGraphFactory.createProteinGraph(originalChain, ProteinGraphFactory.InteractionScheme.CALPHA8);
        Start2FoldXmlParser.parseSpecificExperiment(originalChain, Start2FoldConstants.XML_DIRECTORY.resolve(entryId + ".xml"), experimentIds);
        List<AminoAcid> earlyFoldingResidues = originalChain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(Start2FoldResidueAnnotation.class).isEarly()).collect(Collectors.toList());
        List<Chain> reconstructedChains = Files.list(Paths.get("/home/bittrich/git/phd_sb_repo/data/" + "reconstruction-start2fold/reconstructions/" + pdbId + "-early-conventional-1/stage1/")).filter(path -> path.toFile().getName().contains("_model")).map(path -> StructureParser.fromPath(path).forceProteinName(IdentifierFactory.createProteinIdentifier(pdbId, path.toFile().getName().split("_")[2].split("\\.")[0])).parse().getChains().get(0)).collect(Collectors.toList());
        for (Chain reconstructedChain : reconstructedChains) {
            Document document = PLIPRestServiceQuery.calculateIntraChainDocument(reconstructedChain);
            PLIP_INTRA_MOLECULAR_ANNOTATOR.process(originalChain, document);
        }
        List<ProteinGraph> convGraphs = reconstructedChains.stream().map(c -> ProteinGraphFactory.createProteinGraph(c, ProteinGraphFactory.InteractionScheme.CALPHA8)).collect(Collectors.toList());
        List<ProteinGraphCalculations> convGraphCalculations = convGraphs.stream().map(ProteinGraphCalculations::new).collect(Collectors.toList());
        List<ProteinGraph> fullPlipGraphs = reconstructedChains.stream().map(c -> ProteinGraphFactory.createProteinGraph(c, ProteinGraphFactory.InteractionScheme.SALENTIN2015)).collect(Collectors.toList());
        List<ProteinGraphCalculations> fullPlipGraphCalculations = fullPlipGraphs.stream().map(ProteinGraphCalculations::new).collect(Collectors.toList());
        List<ProteinGraph> hydrogenPlipGraphs = reconstructedChains.stream().map(c -> ProteinGraphFactory.createProteinGraph(c, ProteinGraphFactory.InteractionScheme.SALENTIN2015_HYDROGEN_BONDS)).collect(Collectors.toList());
        List<ProteinGraphCalculations> hydrogenPlipGraphCalculations = fullPlipGraphs.stream().map(ProteinGraphCalculations::new).collect(Collectors.toList());
        List<ProteinGraph> hydrophobicPlipGraphs = reconstructedChains.stream().map(c -> ProteinGraphFactory.createProteinGraph(c, ProteinGraphFactory.InteractionScheme.SALENTIN2015_HYDROPHOBIC_INTERACTION)).collect(Collectors.toList());
        List<ProteinGraphCalculations> hydrophobicPlipGraphCalculations = fullPlipGraphs.stream().map(ProteinGraphCalculations::new).collect(Collectors.toList());
        return Optional.of(originalChain.aminoAcids().map(aminoAcid -> {
            ResidueTopologicPropertiesContainer container = aminoAcid.getFeature(ResidueTopologicPropertiesContainer.class);
            ResidueIdentifier residueIdentifier = aminoAcid.getResidueIdentifier();
            return pdbId + "," + "A" + "," + aminoAcid.getResidueIdentifier() + "," + aminoAcid.getOneLetterCode() + "," + originalFullPlipGraph.getContactsOf(aminoAcid).size() + "," + originalFullPlipGraph.getLocalContactsOf(aminoAcid).size() + "," + originalFullPlipGraph.getNonLocalContactsOf(aminoAcid).size() + "," + StandardFormat.format(container.getFullPlip().getBetweenness()) + "," + StandardFormat.format(container.getFullPlip().getCloseness()) + "," + StandardFormat.format(container.getFullPlip().getClusteringCoefficient()) + "," + originalHydrogenPlipGraph.getContactsOf(aminoAcid).size() + "," + originalHydrogenPlipGraph.getLocalContactsOf(aminoAcid).size() + "," + originalHydrogenPlipGraph.getNonLocalContactsOf(aminoAcid).size() + "," + StandardFormat.format(container.getHydrogenPlip().getBetweenness()) + "," + StandardFormat.format(container.getHydrogenPlip().getCloseness()) + "," + StandardFormat.format(container.getHydrogenPlip().getClusteringCoefficient()) + "," + originalHydrophobicPlipGraph.getContactsOf(aminoAcid).size() + "," + originalHydrophobicPlipGraph.getLocalContactsOf(aminoAcid).size() + "," + originalHydrophobicPlipGraph.getNonLocalContactsOf(aminoAcid).size() + "," + StandardFormat.format(container.getHydrophobicPlip().getBetweenness()) + "," + StandardFormat.format(container.getHydrophobicPlip().getCloseness()) + "," + StandardFormat.format(container.getHydrophobicPlip().getClusteringCoefficient()) + "," + originalConvGraph.getContactsOf(aminoAcid).size() + "," + originalConvGraph.getLocalContactsOf(aminoAcid).size() + "," + originalConvGraph.getNonLocalContactsOf(aminoAcid).size() + "," + StandardFormat.format(container.getConventional().getBetweenness()) + "," + StandardFormat.format(container.getConventional().getCloseness()) + "," + StandardFormat.format(container.getConventional().getClusteringCoefficient()) + "," + container.getFullPlip().getDistinctNeighborhoodCount() + "," + container.getConventional().getDistinctNeighborhoodCount() + "," + StandardFormat.format(aminoAcid.getFeature(EnergyProfile.class).getSolvationEnergy()) + "," + StandardFormat.format(aminoAcid.getFeature(AccessibleSurfaceArea.class).getRelativeAccessibleSurfaceArea()) + "," + StandardFormat.format(aminoAcid.getFeature(LoopFraction.class).getLoopFraction()) + "," + (earlyFoldingResidues.contains(aminoAcid) ? "early" : "late") + "," + "native" + System.lineSeparator() + pdbId + "," + "A" + "," + aminoAcid.getResidueIdentifier() + "," + aminoAcid.getOneLetterCode() + "," + StandardFormat.format(fullPlipGraphs.stream().mapToInt(proteinGraph -> proteinGraph.getContactsOf(residueIdentifier).size()).average().getAsDouble()) + "," + StandardFormat.format(fullPlipGraphs.stream().mapToInt(proteinGraph -> proteinGraph.getLocalContactsOf(residueIdentifier).size()).average().getAsDouble()) + "," + StandardFormat.format(fullPlipGraphs.stream().mapToInt(proteinGraph -> proteinGraph.getNonLocalContactsOf(residueIdentifier).size()).average().getAsDouble()) + "," + StandardFormat.format(fullPlipGraphCalculations.stream().mapToDouble(proteinGraphCalculations -> proteinGraphCalculations.betweenness(residueIdentifier)).average().getAsDouble()) + "," + StandardFormat.format(fullPlipGraphCalculations.stream().mapToDouble(proteinGraphCalculations -> proteinGraphCalculations.closeness(residueIdentifier)).average().getAsDouble()) + "," + StandardFormat.format(fullPlipGraphCalculations.stream().mapToDouble(proteinGraphCalculations -> proteinGraphCalculations.clusteringCoefficient(residueIdentifier)).average().getAsDouble()) + "," + StandardFormat.format(hydrogenPlipGraphs.stream().mapToInt(proteinGraph -> proteinGraph.getContactsOf(residueIdentifier).size()).average().getAsDouble()) + "," + StandardFormat.format(hydrogenPlipGraphs.stream().mapToInt(proteinGraph -> proteinGraph.getLocalContactsOf(residueIdentifier).size()).average().getAsDouble()) + "," + StandardFormat.format(hydrogenPlipGraphs.stream().mapToInt(proteinGraph -> proteinGraph.getNonLocalContactsOf(residueIdentifier).size()).average().getAsDouble()) + "," + StandardFormat.format(hydrogenPlipGraphCalculations.stream().mapToDouble(proteinGraphCalculations -> proteinGraphCalculations.betweenness(residueIdentifier)).average().getAsDouble()) + "," + StandardFormat.format(hydrogenPlipGraphCalculations.stream().mapToDouble(proteinGraphCalculations -> proteinGraphCalculations.closeness(residueIdentifier)).average().getAsDouble()) + "," + StandardFormat.format(hydrogenPlipGraphCalculations.stream().mapToDouble(proteinGraphCalculations -> proteinGraphCalculations.clusteringCoefficient(residueIdentifier)).average().getAsDouble()) + "," + StandardFormat.format(hydrophobicPlipGraphs.stream().mapToInt(proteinGraph -> proteinGraph.getContactsOf(residueIdentifier).size()).average().getAsDouble()) + "," + StandardFormat.format(hydrophobicPlipGraphs.stream().mapToInt(proteinGraph -> proteinGraph.getLocalContactsOf(residueIdentifier).size()).average().getAsDouble()) + "," + StandardFormat.format(hydrophobicPlipGraphs.stream().mapToInt(proteinGraph -> proteinGraph.getNonLocalContactsOf(residueIdentifier).size()).average().getAsDouble()) + "," + StandardFormat.format(hydrophobicPlipGraphCalculations.stream().mapToDouble(proteinGraphCalculations -> proteinGraphCalculations.betweenness(residueIdentifier)).average().getAsDouble()) + "," + StandardFormat.format(hydrophobicPlipGraphCalculations.stream().mapToDouble(proteinGraphCalculations -> proteinGraphCalculations.closeness(residueIdentifier)).average().getAsDouble()) + "," + StandardFormat.format(hydrophobicPlipGraphCalculations.stream().mapToDouble(proteinGraphCalculations -> proteinGraphCalculations.clusteringCoefficient(residueIdentifier)).average().getAsDouble()) + "," + StandardFormat.format(convGraphs.stream().mapToInt(proteinGraph -> proteinGraph.getContactsOf(residueIdentifier).size()).average().getAsDouble()) + "," + StandardFormat.format(convGraphs.stream().mapToInt(proteinGraph -> proteinGraph.getLocalContactsOf(residueIdentifier).size()).average().getAsDouble()) + "," + StandardFormat.format(convGraphs.stream().mapToInt(proteinGraph -> proteinGraph.getNonLocalContactsOf(residueIdentifier).size()).average().getAsDouble()) + "," + StandardFormat.format(convGraphCalculations.stream().mapToDouble(proteinGraphCalculations -> proteinGraphCalculations.betweenness(residueIdentifier)).average().getAsDouble()) + "," + StandardFormat.format(convGraphCalculations.stream().mapToDouble(proteinGraphCalculations -> proteinGraphCalculations.closeness(residueIdentifier)).average().getAsDouble()) + "," + StandardFormat.format(convGraphCalculations.stream().mapToDouble(proteinGraphCalculations -> proteinGraphCalculations.clusteringCoefficient(residueIdentifier)).average().getAsDouble()) + "," + StandardFormat.format(fullPlipGraphCalculations.stream().mapToInt(proteinGraphCalculations -> proteinGraphCalculations.distinctNeighborhoodCount(residueIdentifier)).average().getAsDouble()) + "," + StandardFormat.format(convGraphCalculations.stream().mapToInt(proteinGraphCalculations -> proteinGraphCalculations.distinctNeighborhoodCount(residueIdentifier)).average().getAsDouble()) + "," + StandardFormat.format(reconstructedChains.stream().map(chain -> chain.select().residueIdentifier(aminoAcid.getResidueIdentifier()).asAminoAcid()).mapToDouble(aa -> aa.getFeature(EnergyProfile.class).getSolvationEnergy()).average().getAsDouble()) + "," + StandardFormat.format(reconstructedChains.stream().map(chain -> chain.select().residueIdentifier(aminoAcid.getResidueIdentifier()).asAminoAcid()).mapToDouble(aa -> aa.getFeature(AccessibleSurfaceArea.class).getRelativeAccessibleSurfaceArea()).average().getAsDouble()) + "," + StandardFormat.format(reconstructedChains.stream().map(chain -> chain.select().residueIdentifier(aminoAcid.getResidueIdentifier()).asAminoAcid()).mapToDouble(aa -> aa.getFeature(LoopFraction.class).getLoopFraction()).average().getAsDouble()) + "," + (earlyFoldingResidues.contains(aminoAcid) ? "early" : "late") + "," + "transition";
        }).collect(Collectors.joining(System.lineSeparator())));
    } catch (Exception e) {
        e.printStackTrace();
        logger.info("calculation failed for {}", line, e);
        return Optional.empty();
    }
}
Also used : LoopFraction(de.bioforscher.jstructure.feature.loopfraction.LoopFraction) ResidueIdentifier(de.bioforscher.jstructure.model.identifier.ResidueIdentifier) LoggerFactory(org.slf4j.LoggerFactory) Structure(de.bioforscher.jstructure.model.structure.Structure) StructureParser(de.bioforscher.jstructure.model.structure.StructureParser) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) Start2FoldXmlParser(de.bioforscher.start2fold.parser.Start2FoldXmlParser) Chain(de.bioforscher.jstructure.model.structure.Chain) PLIPRestServiceQuery(de.bioforscher.jstructure.feature.interactions.PLIPRestServiceQuery) StandardFormat(de.bioforscher.jstructure.StandardFormat) PLIPIntraMolecularAnnotator(de.bioforscher.jstructure.feature.interactions.PLIPIntraMolecularAnnotator) EnergyProfile(de.bioforscher.jstructure.feature.energyprofile.EnergyProfile) Logger(org.slf4j.Logger) Files(java.nio.file.Files) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) IdentifierFactory(de.bioforscher.jstructure.model.identifier.IdentifierFactory) Start2FoldResidueAnnotation(de.bioforscher.start2fold.model.Start2FoldResidueAnnotation) List(java.util.List) AccessibleSurfaceArea(de.bioforscher.jstructure.feature.asa.AccessibleSurfaceArea) Start2FoldConstants(de.bioforscher.start2fold.Start2FoldConstants) Paths(java.nio.file.Paths) de.bioforscher.jstructure.feature.graphs(de.bioforscher.jstructure.feature.graphs) Document(org.jsoup.nodes.Document) Optional(java.util.Optional) Pattern(java.util.regex.Pattern) Chain(de.bioforscher.jstructure.model.structure.Chain) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) ResidueIdentifier(de.bioforscher.jstructure.model.identifier.ResidueIdentifier) Start2FoldResidueAnnotation(de.bioforscher.start2fold.model.Start2FoldResidueAnnotation) Document(org.jsoup.nodes.Document) IOException(java.io.IOException) LoopFraction(de.bioforscher.jstructure.feature.loopfraction.LoopFraction) EnergyProfile(de.bioforscher.jstructure.feature.energyprofile.EnergyProfile) Structure(de.bioforscher.jstructure.model.structure.Structure) AccessibleSurfaceArea(de.bioforscher.jstructure.feature.asa.AccessibleSurfaceArea)

Example 30 with AminoAcid

use of de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid in project jstructure by JonStargaryen.

the class SecondaryStructureElement method of.

public static List<SecondaryStructureElement> of(Chain chain) {
    List<SecondaryStructureElement> secondaryStructureElements = new ArrayList<>();
    int start = -1;
    int end = -1;
    String currentType = "c";
    List<AminoAcid> aminoAcids = chain.aminoAcids().collect(Collectors.toList());
    for (AminoAcid aminoAcid : aminoAcids) {
        String type = aminoAcid.getFeature(GenericSecondaryStructure.class).getSecondaryStructure().getReducedRepresentation();
        if (!currentType.equals("c") && !currentType.equals(type)) {
            secondaryStructureElements.add(new SecondaryStructureElement(start, end, currentType));
            start = -1;
            end = -1;
        }
        currentType = type;
        if (!currentType.equals("c")) {
            int resnum = aminoAcid.getResidueIndex();
            // int resnum = aminoAcid.getResidueIdentifier().getResidueNumber();
            if (start == -1) {
                start = resnum;
            }
            end = resnum;
        }
    }
    if (!currentType.equals("c") && start != -1) {
        secondaryStructureElements.add(new SecondaryStructureElement(start, end, currentType));
    }
    return secondaryStructureElements;
}
Also used : AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) ArrayList(java.util.ArrayList)

Aggregations

AminoAcid (de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid)66 Chain (de.bioforscher.jstructure.model.structure.Chain)40 Collectors (java.util.stream.Collectors)40 IOException (java.io.IOException)36 Files (java.nio.file.Files)35 List (java.util.List)31 StandardFormat (de.bioforscher.jstructure.StandardFormat)26 StructureParser (de.bioforscher.jstructure.model.structure.StructureParser)26 Path (java.nio.file.Path)25 Structure (de.bioforscher.jstructure.model.structure.Structure)23 Pattern (java.util.regex.Pattern)17 Logger (org.slf4j.Logger)16 LoggerFactory (org.slf4j.LoggerFactory)16 Start2FoldResidueAnnotation (de.bioforscher.jstructure.efr.model.Start2FoldResidueAnnotation)15 UncheckedIOException (java.io.UncheckedIOException)14 ArrayList (java.util.ArrayList)14 Stream (java.util.stream.Stream)14 Start2FoldResidueAnnotation (de.bioforscher.start2fold.model.Start2FoldResidueAnnotation)13 Optional (java.util.Optional)13 Pair (de.bioforscher.jstructure.mathematics.Pair)11