Search in sources :

Example 1 with Structure

use of de.bioforscher.jstructure.model.structure.Structure in project jstructure by JonStargaryen.

the class A06_WriteStructuralInformationByResidueCsv method handleLine.

private static Optional<String> handleLine(String line) {
    try {
        System.out.println(line);
        String[] split = line.split(";");
        String entryId = split[0];
        String pdbId = split[1];
        List<Integer> experimentIds = Pattern.compile(",").splitAsStream(split[2].replaceAll("\\[", "").replaceAll("]", "")).map(Integer::valueOf).collect(Collectors.toList());
        // boolean sane = split[6].equalsIgnoreCase("true");
        Structure structure = StructureParser.fromPdbId(pdbId).parse();
        Chain chain = structure.chains().findFirst().get();
        Path start2foldXml = Start2FoldConstants.XML_DIRECTORY.resolve(entryId + ".xml");
        Start2FoldXmlParser.parseStability(chain, start2foldXml);
        Start2FoldXmlParser.parseSpecificExperiment(chain, start2foldXml, experimentIds);
        try {
            EvolutionaryCouplingParser.parseHotSpotFile(chain, Start2FoldConstants.COUPLING_DIRECTORY.resolve(entryId.toUpperCase() + "_hs.html"));
        } catch (Exception e) {
        }
        boolean ecAnnotation = chain.aminoAcids().anyMatch(residue -> residue.getFeature(HotSpotScoring.class).getEcCount() > 0);
        List<AminoAcid> topScoringResidues;
        if (ecAnnotation) {
            double fraction = 0.4;
            int residuesToSelect = (int) (fraction * chain.aminoAcids().count());
            topScoringResidues = chain.aminoAcids().sorted(Comparator.comparingDouble((AminoAcid aminoAcid) -> aminoAcid.getFeature(HotSpotScoring.class).getCumStrength()).reversed()).limit(residuesToSelect).collect(Collectors.toList());
        } else {
            topScoringResidues = new ArrayList<>();
        }
        EQuantParser.parseEQuantFile(chain, Start2FoldConstants.EQUANT_DIRECTORY.resolve(entryId.toLowerCase() + ".equant-small.txt"));
        List<AminoAcid> earlyFoldingResidues = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(Start2FoldResidueAnnotation.class).isEarly()).collect(Collectors.toList());
        List<AminoAcid> strongResidues = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(Start2FoldResidueAnnotation.class).isStrong()).collect(Collectors.toList());
        List<Integer> functionalResidueNumbers = Start2FoldConstants.extractFunctionalResidueNumbers(split);
        List<AminoAcid> functionalResidues = new ArrayList<>();
        // do nothing if no annotation of functional residues exists
        if (!functionalResidueNumbers.isEmpty()) {
            FunctionalResidueParser.parse(chain, functionalResidueNumbers);
            chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(FunctionalResidueAnnotation.class).isFunctional()).forEach(functionalResidues::add);
        }
        List<AminoAcid> aminoAcids = chain.aminoAcids().collect(Collectors.toList());
        List<ContactStructuralInformation> contactStructuralInformation = StructuralInformationParserService.getInstance().parseContactStructuralInformation(Start2FoldConstants.DATA_DIRECTORY.resolve("si").resolve("raw").resolve(entryId.toUpperCase() + ".out"), chain, earlyFoldingResidues);
        List<ResidueStructuralInformation> residueStructuralInformation = StructuralInformationParserService.getInstance().composeResidueStructuralInformation(aminoAcids, earlyFoldingResidues, contactStructuralInformation);
        ResidueGraph conventionalProteinGraph = ResidueGraph.createResidueGraph(chain, ContactDefinitionFactory.createAlphaCarbonContactDefinition(8.0));
        List<AminoAcid> residuesInEarlyFoldingSecondaryStructureElements = chain.aminoAcids().filter(aminoAcid -> !aminoAcid.getFeature(GenericSecondaryStructure.class).getSecondaryStructure().isCoilType()).filter(aminoAcid -> {
            GenericSecondaryStructure.SecondaryStructureElement surroundingSecondaryStructureElement = aminoAcid.getFeature(GenericSecondaryStructure.class).getSurroundingSecondaryStructureElement(aminoAcid);
            List<AminoAcid> surroundingAminoAcids = chain.getAminoAcids().subList(surroundingSecondaryStructureElement.getStart(), surroundingSecondaryStructureElement.getEnd() + 1);
            return surroundingAminoAcids.stream().anyMatch(earlyFoldingResidues::contains);
        }).collect(Collectors.toList());
        System.out.println("efr: " + (earlyFoldingResidues.size() > 0) + " strong: " + (strongResidues.size() > 0) + " functional: " + (functionalResidues.size() > 0));
        return Optional.of(chain.aminoAcids().filter(AminoAcid::isStandardAminoAcid).map(aminoAcid -> {
            GenericSecondaryStructure sse = aminoAcid.getFeature(GenericSecondaryStructure.class);
            HotSpotScoring hotSpotScoring = aminoAcid.getFeature(HotSpotScoring.class);
            PLIPInteractionContainer plipInteractionContainer = aminoAcid.getFeature(PLIPInteractionContainer.class);
            PLIPInteractionContainer nonLocalPlipInteractionContainer = new PLIPInteractionContainer(null, plipInteractionContainer.getInteractions().stream().filter(inter -> Math.abs(inter.getPartner1().getResidueIndex() - inter.getPartner2().getResidueIndex()) > 5).collect(Collectors.toList()));
            PLIPInteractionContainer localPlipInteractionContainer = new PLIPInteractionContainer(null, plipInteractionContainer.getInteractions().stream().filter(inter -> !nonLocalPlipInteractionContainer.getInteractions().contains(inter)).collect(Collectors.toList()));
            String equantScore = "NA";
            try {
                equantScore = StandardFormat.format(aminoAcid.getFeature(EQuantScore.class).getEvaluation());
            } catch (ComputationException e) {
                logger.warn("missing equant scoring for {}", aminoAcid);
            }
            String functionalAnnotation = "NA";
            if (functionalResidues.size() > 0) {
                functionalAnnotation = functionalResidues.contains(aminoAcid) ? "functional" : "non-functional";
            }
            ResidueTopologicPropertiesContainer residueTopologicPropertiesContainer = aminoAcid.getFeature(ResidueTopologicPropertiesContainer.class);
            double terminusDistance = aminoAcids.indexOf(aminoAcid);
            terminusDistance = Math.min(terminusDistance, aminoAcids.size() - terminusDistance);
            terminusDistance /= (double) aminoAcids.size();
            ResidueStructuralInformation residueStructuralInformationEntry = residueStructuralInformation.get(aminoAcid.getAminoAcidIndex());
            GenericSecondaryStructure.SecondaryStructureElement surroundingSecondaryStructureElement = sse.getSurroundingSecondaryStructureElement(aminoAcid);
            int sseSize = surroundingSecondaryStructureElement.getSize();
            if (sse.getSecondaryStructure().isCoilType()) {
                sseSize = 0;
            }
            int sseTerminusDistance = surroundingSecondaryStructureElement.getTerminusDistance();
            if (sse.getSecondaryStructure().isCoilType()) {
                sseTerminusDistance = -1;
            }
            return pdbId + "," + "A" + "," + aminoAcid.getResidueIdentifier() + "," + aminoAcid.getOneLetterCode() + "," + sse.getSecondaryStructure().getReducedRepresentation() + "," + sse.getSecondaryStructure().getOneLetterRepresentation() + "," + (sse.getSecondaryStructure().isHelixType() ? "true" : "false") + "," + (sse.getSecondaryStructure().isStrandType() ? "true" : "false") + "," + (sse.getSecondaryStructure().isCoilType() ? "true" : "false") + "," + sseSize + "," + sseTerminusDistance + "," + (aminoAcid.getFeature(AccessibleSurfaceArea.class).isExposed() ? "exposed" : "buried") + "," + StandardFormat.format(aminoAcid.getFeature(GeometricProperties.class).getDistanceToCentroid()) + "," + StandardFormat.format(terminusDistance) + "," + plipInteractionContainer.getHydrogenBonds().size() + "," + plipInteractionContainer.getHydrophobicInteractions().size() + "," + plipInteractionContainer.getBackboneInteractions().size() + "," + plipInteractionContainer.getInteractions().size() + "," + localPlipInteractionContainer.getHydrogenBonds().size() + "," + localPlipInteractionContainer.getHydrophobicInteractions().size() + "," + localPlipInteractionContainer.getBackboneInteractions().size() + "," + localPlipInteractionContainer.getInteractions().size() + "," + nonLocalPlipInteractionContainer.getHydrogenBonds().size() + "," + nonLocalPlipInteractionContainer.getHydrophobicInteractions().size() + "," + nonLocalPlipInteractionContainer.getBackboneInteractions().size() + "," + nonLocalPlipInteractionContainer.getInteractions().size() + "," + StandardFormat.format(aminoAcid.getFeature(EnergyProfile.class).getSolvationEnergy()) + "," + StandardFormat.format(aminoAcid.getFeature(EgorAgreement.class).getEgorPrediction()) + "," + equantScore + "," + StandardFormat.format(aminoAcid.getFeature(AccessibleSurfaceArea.class).getRelativeAccessibleSurfaceArea()) + "," + StandardFormat.format(aminoAcid.getFeature(LoopFraction.class).getLoopFraction()) + "," + hotSpotScoring.getEcCount() + "," + StandardFormat.format(hotSpotScoring.getCumStrength()) + "," + StandardFormat.format(hotSpotScoring.getEcStrength()) + "," + hotSpotScoring.getConservation() + "," + topScoringResidues.contains(aminoAcid) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getFullPlip().getBetweenness()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getFullPlip().getCloseness()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getFullPlip().getClusteringCoefficient()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getHydrogenPlip().getBetweenness()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getHydrogenPlip().getCloseness()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getHydrogenPlip().getClusteringCoefficient()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getHydrophobicPlip().getBetweenness()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getHydrophobicPlip().getCloseness()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getHydrophobicPlip().getClusteringCoefficient()) + "," + conventionalProteinGraph.getContactsOf(aminoAcid).size() + "," + conventionalProteinGraph.getLocalContactsOf(aminoAcid).size() + "," + conventionalProteinGraph.getNonLocalContactsOf(aminoAcid).size() + "," + StandardFormat.format(residueTopologicPropertiesContainer.getConventional().getBetweenness()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getConventional().getCloseness()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getConventional().getClusteringCoefficient()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getFullPlip().getDistinctNeighborhoodCount()) + "," + StandardFormat.format(residueTopologicPropertiesContainer.getConventional().getDistinctNeighborhoodCount()) + "," + StandardFormat.format(residueStructuralInformationEntry.getAverageRmsdIncrease()) + "," + StandardFormat.format(residueStructuralInformationEntry.getAverageTmScoreIncrease()) + "," + StandardFormat.format(residueStructuralInformationEntry.getAverageQIncrease()) + "," + StandardFormat.format(residueStructuralInformationEntry.getMaximumRmsdIncrease()) + "," + StandardFormat.format(residueStructuralInformationEntry.getMaximumTmScoreIncrease()) + "," + StandardFormat.format(residueStructuralInformationEntry.getMaximumQIncrease()) + "," + StandardFormat.format(residueStructuralInformationEntry.getAverageRmsdIncreaseZScore()) + "," + StandardFormat.format(residueStructuralInformationEntry.getFractionOfTopScoringContacts()) + "," + (earlyFoldingResidues.contains(aminoAcid) ? "early" : "late") + "," + (residuesInEarlyFoldingSecondaryStructureElements.contains(aminoAcid) ? "true" : "false") + "," + functionalAnnotation + "," + (strongResidues.contains(aminoAcid) ? "strong" : "weak") + "," + (earlyFoldingResidues.size() > 0) + "," + (strongResidues.size() > 0) + "," + (functionalResidues.size() > 0) + "," + ecAnnotation;
        }).collect(Collectors.joining(System.lineSeparator())));
    } catch (Exception e) {
        logger.info("calculation failed for {}\nby: {}", line, e.getMessage());
        return Optional.empty();
    }
}
Also used : LoopFraction(de.bioforscher.jstructure.feature.loopfraction.LoopFraction) ComputationException(de.bioforscher.jstructure.model.feature.ComputationException) LoggerFactory(org.slf4j.LoggerFactory) EQuantScore(de.bioforscher.jstructure.efr.model.EQuantScore) Structure(de.bioforscher.jstructure.model.structure.Structure) GenericSecondaryStructure(de.bioforscher.jstructure.feature.sse.GenericSecondaryStructure) StructureParser(de.bioforscher.jstructure.model.structure.StructureParser) EgorAgreement(de.bioforscher.jstructure.feature.energyprofile.EgorAgreement) HotSpotScoring(de.bioforscher.jstructure.efr.model.HotSpotScoring) ArrayList(java.util.ArrayList) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) Chain(de.bioforscher.jstructure.model.structure.Chain) ResidueStructuralInformation(de.bioforscher.jstructure.efr.model.si.ResidueStructuralInformation) StandardFormat(de.bioforscher.jstructure.StandardFormat) Path(java.nio.file.Path) EnergyProfile(de.bioforscher.jstructure.feature.energyprofile.EnergyProfile) Logger(org.slf4j.Logger) GeometricProperties(de.bioforscher.jstructure.feature.geometry.GeometricProperties) Start2FoldResidueAnnotation(de.bioforscher.jstructure.efr.model.Start2FoldResidueAnnotation) Files(java.nio.file.Files) de.bioforscher.jstructure.efr.parser(de.bioforscher.jstructure.efr.parser) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) FunctionalResidueAnnotation(de.bioforscher.jstructure.efr.model.FunctionalResidueAnnotation) Start2FoldConstants(de.bioforscher.jstructure.efr.Start2FoldConstants) List(java.util.List) AccessibleSurfaceArea(de.bioforscher.jstructure.feature.asa.AccessibleSurfaceArea) ResidueTopologicPropertiesContainer(de.bioforscher.jstructure.graph.ResidueTopologicPropertiesContainer) ResidueGraph(de.bioforscher.jstructure.graph.ResidueGraph) Optional(java.util.Optional) Pattern(java.util.regex.Pattern) ContactStructuralInformation(de.bioforscher.jstructure.efr.model.si.ContactStructuralInformation) PLIPInteractionContainer(de.bioforscher.jstructure.feature.interaction.PLIPInteractionContainer) Comparator(java.util.Comparator) ContactDefinitionFactory(de.bioforscher.jstructure.graph.contact.definition.ContactDefinitionFactory) Chain(de.bioforscher.jstructure.model.structure.Chain) ArrayList(java.util.ArrayList) GenericSecondaryStructure(de.bioforscher.jstructure.feature.sse.GenericSecondaryStructure) ContactStructuralInformation(de.bioforscher.jstructure.efr.model.si.ContactStructuralInformation) HotSpotScoring(de.bioforscher.jstructure.efr.model.HotSpotScoring) LoopFraction(de.bioforscher.jstructure.feature.loopfraction.LoopFraction) ComputationException(de.bioforscher.jstructure.model.feature.ComputationException) ArrayList(java.util.ArrayList) List(java.util.List) Structure(de.bioforscher.jstructure.model.structure.Structure) GenericSecondaryStructure(de.bioforscher.jstructure.feature.sse.GenericSecondaryStructure) GeometricProperties(de.bioforscher.jstructure.feature.geometry.GeometricProperties) AccessibleSurfaceArea(de.bioforscher.jstructure.feature.asa.AccessibleSurfaceArea) Path(java.nio.file.Path) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) EgorAgreement(de.bioforscher.jstructure.feature.energyprofile.EgorAgreement) Start2FoldResidueAnnotation(de.bioforscher.jstructure.efr.model.Start2FoldResidueAnnotation) ResidueTopologicPropertiesContainer(de.bioforscher.jstructure.graph.ResidueTopologicPropertiesContainer) ResidueStructuralInformation(de.bioforscher.jstructure.efr.model.si.ResidueStructuralInformation) FunctionalResidueAnnotation(de.bioforscher.jstructure.efr.model.FunctionalResidueAnnotation) EQuantScore(de.bioforscher.jstructure.efr.model.EQuantScore) ComputationException(de.bioforscher.jstructure.model.feature.ComputationException) IOException(java.io.IOException) EnergyProfile(de.bioforscher.jstructure.feature.energyprofile.EnergyProfile) ResidueGraph(de.bioforscher.jstructure.graph.ResidueGraph) PLIPInteractionContainer(de.bioforscher.jstructure.feature.interaction.PLIPInteractionContainer)

Example 2 with Structure

use of de.bioforscher.jstructure.model.structure.Structure in project jstructure by JonStargaryen.

the class A01_CreatePyMolRenderJobsForEarlyFoldingResidues method composePyMolCommand.

private static String composePyMolCommand(String line) {
    String[] split = line.split(";");
    String entryId = split[0];
    String pdbId = split[1];
    List<Integer> experimentIds = Pattern.compile(",").splitAsStream(split[2].replaceAll("\\[", "").replaceAll("]", "")).map(Integer::valueOf).collect(Collectors.toList());
    Structure structure = StructureParser.fromPdbId(pdbId).parse();
    Chain chain = structure.chains().findFirst().get();
    Start2FoldXmlParser.parseSpecificExperiment(chain, Start2FoldConstants.XML_DIRECTORY.resolve(entryId + ".xml"), experimentIds);
    List<Integer> earlyFoldingResidues = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(Start2FoldResidueAnnotation.class).isEarly()).map(AminoAcid::getResidueIdentifier).map(ResidueIdentifier::getResidueNumber).collect(Collectors.toList());
    return "delete all" + System.lineSeparator() + "fetch " + pdbId + ", async=0" + System.lineSeparator() + // hide non-relevant stuff
    "hide everything" + System.lineSeparator() + "show cartoon, chain A" + System.lineSeparator() + // decolor everything
    "color grey80" + System.lineSeparator() + "zoom (chain A)" + System.lineSeparator() + earlyFoldingResidues.stream().map(res -> "color efr, resi " + res).collect(Collectors.joining(System.lineSeparator())) + System.lineSeparator() + "ray" + System.lineSeparator() + "png " + Start2FoldConstants.PYMOL_DIRECTORY.resolve(entryId + "-efr.png") + System.lineSeparator();
}
Also used : Files(java.nio.file.Files) ResidueIdentifier(de.bioforscher.jstructure.model.identifier.ResidueIdentifier) Structure(de.bioforscher.jstructure.model.structure.Structure) IOException(java.io.IOException) StructureParser(de.bioforscher.jstructure.model.structure.StructureParser) Collectors(java.util.stream.Collectors) Start2FoldResidueAnnotation(de.bioforscher.start2fold.model.Start2FoldResidueAnnotation) List(java.util.List) Start2FoldConstants(de.bioforscher.start2fold.Start2FoldConstants) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) Start2FoldXmlParser(de.bioforscher.start2fold.parser.Start2FoldXmlParser) Chain(de.bioforscher.jstructure.model.structure.Chain) StandardFormat(de.bioforscher.jstructure.StandardFormat) Pattern(java.util.regex.Pattern) Chain(de.bioforscher.jstructure.model.structure.Chain) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) Start2FoldResidueAnnotation(de.bioforscher.start2fold.model.Start2FoldResidueAnnotation) Structure(de.bioforscher.jstructure.model.structure.Structure)

Example 3 with Structure

use of de.bioforscher.jstructure.model.structure.Structure in project jstructure by JonStargaryen.

the class A04_VisualizeAARSStructures method composePyMolCommand.

private static Optional<String> composePyMolCommand(String line) {
    String[] split = line.split(",");
    String id = split[0];
    String pdbId = id.split("_")[0];
    String chainId = id.split("_")[1];
    String clazz = split[1];
    // skip non-representative structures
    List<String> fastaLines;
    try (Stream<String> lines = Files.lines(Paths.get("/home/bittrich/git/aars_data/T04_representative_sequences/C" + clazz + "_representatives_cluster.fasta"))) {
        fastaLines = lines.collect(Collectors.toList());
    } catch (IOException e) {
        // e.printStackTrace();
        return Optional.empty();
    }
    if (fastaLines.stream().noneMatch(l -> l.equals(">" + pdbId + "_" + chainId))) {
        return Optional.empty();
    }
    // System.out.println(line);
    Structure originalStructure = StructureParser.fromPath(Paths.get("/home/bittrich/git/aars_data/T06_renumbered_structures/C" + clazz + "/" + pdbId + "_" + chainId + ".pdb")).parse();
    Chain originalChain = originalStructure.select().chainName(chainId).asChain();
    Structure renumberedStructure = StructureParser.fromPath(Paths.get("/home/bittrich/git/aars_data/T06_renumbered_structures/C" + clazz + "/renum/" + pdbId + "_" + chainId + "_renum.pdb")).parse();
    Chain renumberedChain = renumberedStructure.select().chainName(chainId).asChain();
    List<String> earlyLines;
    try (Stream<String> lines = Files.lines(Start2FoldConstants.DATA_DIRECTORY.resolve("classifier").resolve("aars").resolve("out").resolve(pdbId + ".out"))) {
        earlyLines = lines.collect(Collectors.toList());
    } catch (IOException e) {
        // e.printStackTrace();
        return Optional.empty();
    }
    List<Integer> earlyFoldingResidues = earlyLines.stream().filter(out -> !out.startsWith("chain")).filter(out -> out.endsWith("early")).filter(out -> out.startsWith(chainId)).map(out -> out.split(",")).filter(outSplit -> outSplit.length > 0).map(outSplit -> outSplit[1]).mapToInt(Integer::valueOf).boxed().collect(Collectors.toList());
    List<String> functionalLines;
    try (Stream<String> lines = Files.lines(Paths.get("/home/bittrich/git/aars_data/T09_interactions/C" + clazz + "/contacts_per_structure.txt"))) {
        functionalLines = lines.collect(Collectors.toList());
    } catch (IOException e) {
        // e.printStackTrace();
        return Optional.empty();
    }
    List<Integer> functionalResidues = functionalLines.stream().filter(l -> l.startsWith(pdbId + "_" + chainId)).map(l -> l.split("\\[")[1].split("]")[0]).map(l -> l.split(",")).flatMap(Stream::of).mapToInt(Integer::valueOf).mapToObj(renumberedResidueNumber -> mapToOriginalResidueNumber(renumberedResidueNumber, originalChain, renumberedChain)).filter(Optional::isPresent).map(Optional::get).collect(Collectors.toList());
    if (earlyFoldingResidues.isEmpty() || functionalResidues.isEmpty()) {
        return Optional.empty();
    }
    System.out.println("EFR: " + earlyFoldingResidues);
    System.out.println("func: " + functionalResidues);
    System.out.println("overlap: " + SetOperations.createIntersectionSet(earlyFoldingResidues, functionalResidues));
    return Optional.of("delete all" + System.lineSeparator() + "fetch " + pdbId + ", async=0" + System.lineSeparator() + // hide non-relevant stuff
    "hide everything" + System.lineSeparator() + "show cartoon, chain " + chainId + System.lineSeparator() + // decolor everything
    "color grey80" + System.lineSeparator() + "zoom (chain " + chainId + ")" + System.lineSeparator() + earlyFoldingResidues.stream().map(res -> "color efr, resi " + res).collect(Collectors.joining(System.lineSeparator())) + System.lineSeparator() + functionalResidues.stream().map(res -> "color func, resi " + res).collect(Collectors.joining(System.lineSeparator())) + System.lineSeparator() + "ray" + System.lineSeparator() + "png " + Start2FoldConstants.DATA_DIRECTORY.resolve("classifier").resolve("aars").resolve("png").resolve(id + ".png") + System.lineSeparator());
}
Also used : LinearAlgebra(de.bioforscher.jstructure.mathematics.LinearAlgebra) Files(java.nio.file.Files) ResidueIdentifier(de.bioforscher.jstructure.model.identifier.ResidueIdentifier) Structure(de.bioforscher.jstructure.model.structure.Structure) IOException(java.io.IOException) StructureParser(de.bioforscher.jstructure.model.structure.StructureParser) Collectors(java.util.stream.Collectors) List(java.util.List) Start2FoldConstants(de.bioforscher.start2fold.Start2FoldConstants) Stream(java.util.stream.Stream) Paths(java.nio.file.Paths) Group(de.bioforscher.jstructure.model.structure.Group) SetOperations(de.bioforscher.jstructure.mathematics.SetOperations) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) Chain(de.bioforscher.jstructure.model.structure.Chain) Optional(java.util.Optional) StandardFormat(de.bioforscher.jstructure.StandardFormat) Chain(de.bioforscher.jstructure.model.structure.Chain) Optional(java.util.Optional) IOException(java.io.IOException) Structure(de.bioforscher.jstructure.model.structure.Structure)

Example 4 with Structure

use of de.bioforscher.jstructure.model.structure.Structure in project jstructure by JonStargaryen.

the class EarlyFoldingClassificationRunner method main.

public static void main(String[] args) throws IOException {
    if (args.length != 2) {
        System.out.println("incorrect number of arguments\n" + "usage: java -jar efr.jar input output\n" + "input: either '/a/path/to/some/file.pdb' or '1pdb' a pdb-id\n" + "output: either '/a/absolute/path/output.csv' or 'relative.csv' a relative path");
        return;
    }
    String input = args[0];
    String output = args[1];
    Path outputPath = Paths.get(output);
    System.out.println();
    // parse structure - classic flow-control by exceptions
    Structure structure;
    try {
        ProteinIdentifier pdbId = IdentifierFactory.createProteinIdentifier(input);
        structure = StructureParser.fromProteinIdentifier(pdbId).parse();
        System.out.println("parsing structure by pdb-id: '" + pdbId + "'");
    } catch (Exception e) {
        System.out.println("parsing structure from file at: '" + input + "'");
        Path inputPath = Paths.get(input);
        structure = StructureParser.fromPath(inputPath).parse();
    }
    System.out.println();
    EarlyFoldingClassificationRunner instance = getInstance();
    instance.process(structure, outputPath);
}
Also used : Path(java.nio.file.Path) ProteinIdentifier(de.bioforscher.jstructure.model.identifier.ProteinIdentifier) Structure(de.bioforscher.jstructure.model.structure.Structure) GenericSecondaryStructure(de.bioforscher.jstructure.feature.sse.GenericSecondaryStructure) IOException(java.io.IOException)

Example 5 with Structure

use of de.bioforscher.jstructure.model.structure.Structure in project jstructure by JonStargaryen.

the class EarlyFoldingClassificationRunner method process.

public void process(Structure structure, Path outputPath) throws IOException {
    // report structure characteristics
    System.out.println("structure: " + structure.getProteinIdentifier().getFullName() + "\n" + "chains: " + structure.chainsWithAminoAcids().map(Chain::getChainIdentifier).map(ChainIdentifier::getChainId).collect(Collectors.toList()) + "\n" + "total residues: " + structure.aminoAcids().count());
    System.out.println();
    // compute features
    System.out.println("computing residue-level features");
    // start with PLIP to fail fast
    System.out.println("querying PLIP-REST-Service");
    try {
        // try to annotate by standard routine
        PLIP_INTRA_MOLECULAR_ANNOTATOR.process(structure);
        System.out.println("fetched PLIP contacts");
    } catch (Exception e1) {
        try {
            // potential non-pdb-entry, try to compute on-the-fly
            structure.chainsWithAminoAcids().forEach(chain -> {
                Document document = PLIPRestServiceQuery.calculateIntraChainDocument(chain);
                PLIP_INTRA_MOLECULAR_ANNOTATOR.process(chain, document);
            });
            System.out.println("computed PLIP contacts");
        } catch (Exception e2) {
            System.out.println("failed: could not compute PLIP contacts");
            e2.printStackTrace();
            return;
        }
    }
    System.out.println("computing energy profiles");
    EGOR_AGREEMENT_CALCULATOR.process(structure);
    System.out.println("annotating secondary structure elements");
    LOOP_FRACTION_CALCULATOR.process(structure);
    System.out.println("computing relative accessible surface area");
    ACCESSIBLE_SURFACE_AREA_CALCULATOR.process(structure);
    // assign feature vectors
    structure.aminoAcids().forEach(RawFeatureVector::assignRawFeatureVector);
    // smooth feature vectors
    structure.chainsWithAminoAcids().forEach(chain -> {
        List<AminoAcid> aminoAcids = chain.aminoAcids().collect(Collectors.toList());
        aminoAcids.forEach(aminoAcid -> {
            SmoothedFeatureVector.assignSmoothedFeatureVector(aminoAcids, aminoAcid);
        });
    });
    // classify each residue
    StringJoiner outputJoiner = new StringJoiner(System.lineSeparator());
    // print header
    outputJoiner.add("structure: '" + structure.getProteinIdentifier().getFullName() + "'").add("chains: " + structure.chainsWithAminoAcids().map(Chain::getChainIdentifier).map(ChainIdentifier::getChainId).collect(Collectors.toList())).add("total residues: " + structure.aminoAcids().count()).add("chain,res,aa,sse,energy,egor,sse_size,loop_fraction,rasa,plip_local_contacts," + "plip_local_hbonds,plip_local_hydrophobic,plip_local_backbone,plip_long_range_contacts," + "plip_long_range_hbonds,plip_long_range_hydrophobic,plip_long_range_backbone," + "plip_betweenness,plip_closeness,plip_clusteringcoefficient,plip_hbonds_betweenness," + "plip_hbonds_closeness,plip_hbonds_clusteringcoefficient,plip_hydrophobic_betweenness," + "plip_hydrophobic_closeness,plip_hydrophobic_clusteringcoefficient,conv_betweenness," + "conv_closeness,conv_clusteringcoefficient,plip_neighborhoods,conv_neighborhoods,prob,folds");
    structure.chainsWithAminoAcids().forEach(chain -> {
        List<String> output = structure.aminoAcids().map(aminoAcid -> {
            boolean isProline = aminoAcid instanceof Proline;
            SmoothedFeatureVector smoothedFeatureVector = aminoAcid.getFeature(SmoothedFeatureVector.class);
            double loopFraction = aminoAcid.getFeature(LoopFraction.class).getLoopFraction();
            Instance instance = createInstance(smoothedFeatureVector, loopFraction);
            double prob = 0.0;
            if (!isProline) {
                try {
                    prob = model.distributionForInstance(normalize(instance))[0];
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
            StringJoiner lineJoiner = new StringJoiner(",");
            lineJoiner.add(aminoAcid.getParentChain().getChainIdentifier().getChainId()).add(aminoAcid.getResidueIdentifier().toString()).add(aminoAcid.getOneLetterCode()).add(aminoAcid.getFeature(GenericSecondaryStructure.class).getSecondaryStructure().getReducedRepresentation());
            for (int i = 0; i < instance.numAttributes() - 1; i++) {
                lineJoiner.add(StandardFormat.format(instance.value(i)));
            }
            lineJoiner.add(StandardFormat.format(prob));
            return lineJoiner.toString();
        }).sorted(Comparator.comparingDouble((String line) -> Double.valueOf(line.split(",")[line.split(",").length - 1])).reversed()).collect(Collectors.toList());
        int numberOfEarlyFoldingResidues = (int) (0.15 * (int) chain.aminoAcids().count());
        int counter = 0;
        for (int i = 0; i < chain.aminoAcids().count(); i++) {
            outputJoiner.add(output.get(i) + "," + (counter < numberOfEarlyFoldingResidues ? "early" : "late"));
            counter++;
        }
    });
    // write output
    System.out.println("writing output to " + outputPath);
    Files.write(outputPath, outputJoiner.toString().getBytes());
}
Also used : java.util(java.util) LoopFraction(de.bioforscher.jstructure.feature.loopfraction.LoopFraction) Structure(de.bioforscher.jstructure.model.structure.Structure) GenericSecondaryStructure(de.bioforscher.jstructure.feature.sse.GenericSecondaryStructure) StructureParser(de.bioforscher.jstructure.model.structure.StructureParser) LoopFractionCalculator(de.bioforscher.jstructure.feature.loopfraction.LoopFractionCalculator) Classifier(weka.classifiers.Classifier) EgorAgreementCalculator(de.bioforscher.jstructure.feature.energyprofile.EgorAgreementCalculator) ProteinIdentifier(de.bioforscher.jstructure.model.identifier.ProteinIdentifier) ChainIdentifier(de.bioforscher.jstructure.model.identifier.ChainIdentifier) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) Chain(de.bioforscher.jstructure.model.structure.Chain) PLIPRestServiceQuery(de.bioforscher.jstructure.feature.interactions.PLIPRestServiceQuery) StandardFormat(de.bioforscher.jstructure.StandardFormat) PLIPIntraMolecularAnnotator(de.bioforscher.jstructure.feature.interactions.PLIPIntraMolecularAnnotator) Path(java.nio.file.Path) RawFeatureVector(de.bioforscher.start2fold.model.vector.RawFeatureVector) Proline(de.bioforscher.jstructure.model.structure.aminoacid.Proline) Files(java.nio.file.Files) AccessibleSurfaceAreaCalculator(de.bioforscher.jstructure.feature.asa.AccessibleSurfaceAreaCalculator) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) IdentifierFactory(de.bioforscher.jstructure.model.identifier.IdentifierFactory) Paths(java.nio.file.Paths) Document(org.jsoup.nodes.Document) weka.core(weka.core) SmoothedFeatureVector(de.bioforscher.start2fold.model.vector.SmoothedFeatureVector) InputStream(java.io.InputStream) Chain(de.bioforscher.jstructure.model.structure.Chain) ChainIdentifier(de.bioforscher.jstructure.model.identifier.ChainIdentifier) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) SmoothedFeatureVector(de.bioforscher.start2fold.model.vector.SmoothedFeatureVector) Proline(de.bioforscher.jstructure.model.structure.aminoacid.Proline) GenericSecondaryStructure(de.bioforscher.jstructure.feature.sse.GenericSecondaryStructure) Document(org.jsoup.nodes.Document) IOException(java.io.IOException) LoopFraction(de.bioforscher.jstructure.feature.loopfraction.LoopFraction) RawFeatureVector(de.bioforscher.start2fold.model.vector.RawFeatureVector)

Aggregations

Structure (de.bioforscher.jstructure.model.structure.Structure)61 IOException (java.io.IOException)45 Collectors (java.util.stream.Collectors)40 Chain (de.bioforscher.jstructure.model.structure.Chain)39 Files (java.nio.file.Files)35 StructureParser (de.bioforscher.jstructure.model.structure.StructureParser)30 Path (java.nio.file.Path)26 AminoAcid (de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid)23 List (java.util.List)22 StandardFormat (de.bioforscher.jstructure.StandardFormat)21 Logger (org.slf4j.Logger)20 LoggerFactory (org.slf4j.LoggerFactory)20 Test (org.junit.Test)19 Group (de.bioforscher.jstructure.model.structure.Group)18 UncheckedIOException (java.io.UncheckedIOException)18 Pattern (java.util.regex.Pattern)17 Stream (java.util.stream.Stream)17 Jsoup (org.jsoup.Jsoup)17 ComputationException (de.bioforscher.jstructure.model.feature.ComputationException)16 java.util (java.util)15