Search in sources :

Example 6 with Structure

use of de.bioforscher.jstructure.model.structure.Structure in project jstructure by JonStargaryen.

the class A04_WriteTransitionStateCsv method handleLineLocally.

private static Optional<String> handleLineLocally(String line) {
    try {
        System.out.println(line);
        String[] split = line.split(";");
        String entryId = split[0];
        String pdbId = split[1];
        List<Integer> experimentIds = Pattern.compile(",").splitAsStream(split[2].replaceAll("\\[", "").replaceAll("]", "")).map(Integer::valueOf).collect(Collectors.toList());
        Structure structure = StructureParser.fromPdbId(pdbId).parse();
        Chain originalChain = structure.chains().findFirst().get();
        ProteinGraph originalFullPlipGraph = ProteinGraphFactory.createProteinGraph(originalChain, ProteinGraphFactory.InteractionScheme.SALENTIN2015);
        ProteinGraph originalHydrogenPlipGraph = ProteinGraphFactory.createProteinGraph(originalChain, ProteinGraphFactory.InteractionScheme.SALENTIN2015_HYDROGEN_BONDS);
        ProteinGraph originalHydrophobicPlipGraph = ProteinGraphFactory.createProteinGraph(originalChain, ProteinGraphFactory.InteractionScheme.SALENTIN2015_HYDROPHOBIC_INTERACTION);
        ProteinGraph originalConvGraph = ProteinGraphFactory.createProteinGraph(originalChain, ProteinGraphFactory.InteractionScheme.CALPHA8);
        Start2FoldXmlParser.parseSpecificExperiment(originalChain, Start2FoldConstants.XML_DIRECTORY.resolve(entryId + ".xml"), experimentIds);
        List<AminoAcid> earlyFoldingResidues = originalChain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(Start2FoldResidueAnnotation.class).isEarly()).collect(Collectors.toList());
        List<Chain> reconstructedChains = Files.list(Paths.get("/home/bittrich/git/phd_sb_repo/data/" + "reconstruction-start2fold/reconstructions/" + pdbId + "-early-conventional-1/stage1/")).filter(path -> path.toFile().getName().contains("_model")).map(path -> StructureParser.fromPath(path).forceProteinName(IdentifierFactory.createProteinIdentifier(pdbId, path.toFile().getName().split("_")[2].split("\\.")[0])).parse().getChains().get(0)).collect(Collectors.toList());
        for (Chain reconstructedChain : reconstructedChains) {
            Document document = PLIPRestServiceQuery.calculateIntraChainDocument(reconstructedChain);
            PLIP_INTRA_MOLECULAR_ANNOTATOR.process(originalChain, document);
        }
        List<ProteinGraph> convGraphs = reconstructedChains.stream().map(c -> ProteinGraphFactory.createProteinGraph(c, ProteinGraphFactory.InteractionScheme.CALPHA8)).collect(Collectors.toList());
        List<ProteinGraphCalculations> convGraphCalculations = convGraphs.stream().map(ProteinGraphCalculations::new).collect(Collectors.toList());
        List<ProteinGraph> fullPlipGraphs = reconstructedChains.stream().map(c -> ProteinGraphFactory.createProteinGraph(c, ProteinGraphFactory.InteractionScheme.SALENTIN2015)).collect(Collectors.toList());
        List<ProteinGraphCalculations> fullPlipGraphCalculations = fullPlipGraphs.stream().map(ProteinGraphCalculations::new).collect(Collectors.toList());
        List<ProteinGraph> hydrogenPlipGraphs = reconstructedChains.stream().map(c -> ProteinGraphFactory.createProteinGraph(c, ProteinGraphFactory.InteractionScheme.SALENTIN2015_HYDROGEN_BONDS)).collect(Collectors.toList());
        List<ProteinGraphCalculations> hydrogenPlipGraphCalculations = fullPlipGraphs.stream().map(ProteinGraphCalculations::new).collect(Collectors.toList());
        List<ProteinGraph> hydrophobicPlipGraphs = reconstructedChains.stream().map(c -> ProteinGraphFactory.createProteinGraph(c, ProteinGraphFactory.InteractionScheme.SALENTIN2015_HYDROPHOBIC_INTERACTION)).collect(Collectors.toList());
        List<ProteinGraphCalculations> hydrophobicPlipGraphCalculations = fullPlipGraphs.stream().map(ProteinGraphCalculations::new).collect(Collectors.toList());
        return Optional.of(originalChain.aminoAcids().map(aminoAcid -> {
            ResidueTopologicPropertiesContainer container = aminoAcid.getFeature(ResidueTopologicPropertiesContainer.class);
            ResidueIdentifier residueIdentifier = aminoAcid.getResidueIdentifier();
            return pdbId + "," + "A" + "," + aminoAcid.getResidueIdentifier() + "," + aminoAcid.getOneLetterCode() + "," + originalFullPlipGraph.getContactsOf(aminoAcid).size() + "," + originalFullPlipGraph.getLocalContactsOf(aminoAcid).size() + "," + originalFullPlipGraph.getNonLocalContactsOf(aminoAcid).size() + "," + StandardFormat.format(container.getFullPlip().getBetweenness()) + "," + StandardFormat.format(container.getFullPlip().getCloseness()) + "," + StandardFormat.format(container.getFullPlip().getClusteringCoefficient()) + "," + originalHydrogenPlipGraph.getContactsOf(aminoAcid).size() + "," + originalHydrogenPlipGraph.getLocalContactsOf(aminoAcid).size() + "," + originalHydrogenPlipGraph.getNonLocalContactsOf(aminoAcid).size() + "," + StandardFormat.format(container.getHydrogenPlip().getBetweenness()) + "," + StandardFormat.format(container.getHydrogenPlip().getCloseness()) + "," + StandardFormat.format(container.getHydrogenPlip().getClusteringCoefficient()) + "," + originalHydrophobicPlipGraph.getContactsOf(aminoAcid).size() + "," + originalHydrophobicPlipGraph.getLocalContactsOf(aminoAcid).size() + "," + originalHydrophobicPlipGraph.getNonLocalContactsOf(aminoAcid).size() + "," + StandardFormat.format(container.getHydrophobicPlip().getBetweenness()) + "," + StandardFormat.format(container.getHydrophobicPlip().getCloseness()) + "," + StandardFormat.format(container.getHydrophobicPlip().getClusteringCoefficient()) + "," + originalConvGraph.getContactsOf(aminoAcid).size() + "," + originalConvGraph.getLocalContactsOf(aminoAcid).size() + "," + originalConvGraph.getNonLocalContactsOf(aminoAcid).size() + "," + StandardFormat.format(container.getConventional().getBetweenness()) + "," + StandardFormat.format(container.getConventional().getCloseness()) + "," + StandardFormat.format(container.getConventional().getClusteringCoefficient()) + "," + container.getFullPlip().getDistinctNeighborhoodCount() + "," + container.getConventional().getDistinctNeighborhoodCount() + "," + StandardFormat.format(aminoAcid.getFeature(EnergyProfile.class).getSolvationEnergy()) + "," + StandardFormat.format(aminoAcid.getFeature(AccessibleSurfaceArea.class).getRelativeAccessibleSurfaceArea()) + "," + StandardFormat.format(aminoAcid.getFeature(LoopFraction.class).getLoopFraction()) + "," + (earlyFoldingResidues.contains(aminoAcid) ? "early" : "late") + "," + "native" + System.lineSeparator() + pdbId + "," + "A" + "," + aminoAcid.getResidueIdentifier() + "," + aminoAcid.getOneLetterCode() + "," + StandardFormat.format(fullPlipGraphs.stream().mapToInt(proteinGraph -> proteinGraph.getContactsOf(residueIdentifier).size()).average().getAsDouble()) + "," + StandardFormat.format(fullPlipGraphs.stream().mapToInt(proteinGraph -> proteinGraph.getLocalContactsOf(residueIdentifier).size()).average().getAsDouble()) + "," + StandardFormat.format(fullPlipGraphs.stream().mapToInt(proteinGraph -> proteinGraph.getNonLocalContactsOf(residueIdentifier).size()).average().getAsDouble()) + "," + StandardFormat.format(fullPlipGraphCalculations.stream().mapToDouble(proteinGraphCalculations -> proteinGraphCalculations.betweenness(residueIdentifier)).average().getAsDouble()) + "," + StandardFormat.format(fullPlipGraphCalculations.stream().mapToDouble(proteinGraphCalculations -> proteinGraphCalculations.closeness(residueIdentifier)).average().getAsDouble()) + "," + StandardFormat.format(fullPlipGraphCalculations.stream().mapToDouble(proteinGraphCalculations -> proteinGraphCalculations.clusteringCoefficient(residueIdentifier)).average().getAsDouble()) + "," + StandardFormat.format(hydrogenPlipGraphs.stream().mapToInt(proteinGraph -> proteinGraph.getContactsOf(residueIdentifier).size()).average().getAsDouble()) + "," + StandardFormat.format(hydrogenPlipGraphs.stream().mapToInt(proteinGraph -> proteinGraph.getLocalContactsOf(residueIdentifier).size()).average().getAsDouble()) + "," + StandardFormat.format(hydrogenPlipGraphs.stream().mapToInt(proteinGraph -> proteinGraph.getNonLocalContactsOf(residueIdentifier).size()).average().getAsDouble()) + "," + StandardFormat.format(hydrogenPlipGraphCalculations.stream().mapToDouble(proteinGraphCalculations -> proteinGraphCalculations.betweenness(residueIdentifier)).average().getAsDouble()) + "," + StandardFormat.format(hydrogenPlipGraphCalculations.stream().mapToDouble(proteinGraphCalculations -> proteinGraphCalculations.closeness(residueIdentifier)).average().getAsDouble()) + "," + StandardFormat.format(hydrogenPlipGraphCalculations.stream().mapToDouble(proteinGraphCalculations -> proteinGraphCalculations.clusteringCoefficient(residueIdentifier)).average().getAsDouble()) + "," + StandardFormat.format(hydrophobicPlipGraphs.stream().mapToInt(proteinGraph -> proteinGraph.getContactsOf(residueIdentifier).size()).average().getAsDouble()) + "," + StandardFormat.format(hydrophobicPlipGraphs.stream().mapToInt(proteinGraph -> proteinGraph.getLocalContactsOf(residueIdentifier).size()).average().getAsDouble()) + "," + StandardFormat.format(hydrophobicPlipGraphs.stream().mapToInt(proteinGraph -> proteinGraph.getNonLocalContactsOf(residueIdentifier).size()).average().getAsDouble()) + "," + StandardFormat.format(hydrophobicPlipGraphCalculations.stream().mapToDouble(proteinGraphCalculations -> proteinGraphCalculations.betweenness(residueIdentifier)).average().getAsDouble()) + "," + StandardFormat.format(hydrophobicPlipGraphCalculations.stream().mapToDouble(proteinGraphCalculations -> proteinGraphCalculations.closeness(residueIdentifier)).average().getAsDouble()) + "," + StandardFormat.format(hydrophobicPlipGraphCalculations.stream().mapToDouble(proteinGraphCalculations -> proteinGraphCalculations.clusteringCoefficient(residueIdentifier)).average().getAsDouble()) + "," + StandardFormat.format(convGraphs.stream().mapToInt(proteinGraph -> proteinGraph.getContactsOf(residueIdentifier).size()).average().getAsDouble()) + "," + StandardFormat.format(convGraphs.stream().mapToInt(proteinGraph -> proteinGraph.getLocalContactsOf(residueIdentifier).size()).average().getAsDouble()) + "," + StandardFormat.format(convGraphs.stream().mapToInt(proteinGraph -> proteinGraph.getNonLocalContactsOf(residueIdentifier).size()).average().getAsDouble()) + "," + StandardFormat.format(convGraphCalculations.stream().mapToDouble(proteinGraphCalculations -> proteinGraphCalculations.betweenness(residueIdentifier)).average().getAsDouble()) + "," + StandardFormat.format(convGraphCalculations.stream().mapToDouble(proteinGraphCalculations -> proteinGraphCalculations.closeness(residueIdentifier)).average().getAsDouble()) + "," + StandardFormat.format(convGraphCalculations.stream().mapToDouble(proteinGraphCalculations -> proteinGraphCalculations.clusteringCoefficient(residueIdentifier)).average().getAsDouble()) + "," + StandardFormat.format(fullPlipGraphCalculations.stream().mapToInt(proteinGraphCalculations -> proteinGraphCalculations.distinctNeighborhoodCount(residueIdentifier)).average().getAsDouble()) + "," + StandardFormat.format(convGraphCalculations.stream().mapToInt(proteinGraphCalculations -> proteinGraphCalculations.distinctNeighborhoodCount(residueIdentifier)).average().getAsDouble()) + "," + StandardFormat.format(reconstructedChains.stream().map(chain -> chain.select().residueIdentifier(aminoAcid.getResidueIdentifier()).asAminoAcid()).mapToDouble(aa -> aa.getFeature(EnergyProfile.class).getSolvationEnergy()).average().getAsDouble()) + "," + StandardFormat.format(reconstructedChains.stream().map(chain -> chain.select().residueIdentifier(aminoAcid.getResidueIdentifier()).asAminoAcid()).mapToDouble(aa -> aa.getFeature(AccessibleSurfaceArea.class).getRelativeAccessibleSurfaceArea()).average().getAsDouble()) + "," + StandardFormat.format(reconstructedChains.stream().map(chain -> chain.select().residueIdentifier(aminoAcid.getResidueIdentifier()).asAminoAcid()).mapToDouble(aa -> aa.getFeature(LoopFraction.class).getLoopFraction()).average().getAsDouble()) + "," + (earlyFoldingResidues.contains(aminoAcid) ? "early" : "late") + "," + "transition";
        }).collect(Collectors.joining(System.lineSeparator())));
    } catch (Exception e) {
        e.printStackTrace();
        logger.info("calculation failed for {}", line, e);
        return Optional.empty();
    }
}
Also used : LoopFraction(de.bioforscher.jstructure.feature.loopfraction.LoopFraction) ResidueIdentifier(de.bioforscher.jstructure.model.identifier.ResidueIdentifier) LoggerFactory(org.slf4j.LoggerFactory) Structure(de.bioforscher.jstructure.model.structure.Structure) StructureParser(de.bioforscher.jstructure.model.structure.StructureParser) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) Start2FoldXmlParser(de.bioforscher.start2fold.parser.Start2FoldXmlParser) Chain(de.bioforscher.jstructure.model.structure.Chain) PLIPRestServiceQuery(de.bioforscher.jstructure.feature.interactions.PLIPRestServiceQuery) StandardFormat(de.bioforscher.jstructure.StandardFormat) PLIPIntraMolecularAnnotator(de.bioforscher.jstructure.feature.interactions.PLIPIntraMolecularAnnotator) EnergyProfile(de.bioforscher.jstructure.feature.energyprofile.EnergyProfile) Logger(org.slf4j.Logger) Files(java.nio.file.Files) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) IdentifierFactory(de.bioforscher.jstructure.model.identifier.IdentifierFactory) Start2FoldResidueAnnotation(de.bioforscher.start2fold.model.Start2FoldResidueAnnotation) List(java.util.List) AccessibleSurfaceArea(de.bioforscher.jstructure.feature.asa.AccessibleSurfaceArea) Start2FoldConstants(de.bioforscher.start2fold.Start2FoldConstants) Paths(java.nio.file.Paths) de.bioforscher.jstructure.feature.graphs(de.bioforscher.jstructure.feature.graphs) Document(org.jsoup.nodes.Document) Optional(java.util.Optional) Pattern(java.util.regex.Pattern) Chain(de.bioforscher.jstructure.model.structure.Chain) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) ResidueIdentifier(de.bioforscher.jstructure.model.identifier.ResidueIdentifier) Start2FoldResidueAnnotation(de.bioforscher.start2fold.model.Start2FoldResidueAnnotation) Document(org.jsoup.nodes.Document) IOException(java.io.IOException) LoopFraction(de.bioforscher.jstructure.feature.loopfraction.LoopFraction) EnergyProfile(de.bioforscher.jstructure.feature.energyprofile.EnergyProfile) Structure(de.bioforscher.jstructure.model.structure.Structure) AccessibleSurfaceArea(de.bioforscher.jstructure.feature.asa.AccessibleSurfaceArea)

Example 7 with Structure

use of de.bioforscher.jstructure.model.structure.Structure in project jstructure by JonStargaryen.

the class A01_ReportGeneralStatistics method handleEFRLine.

private static void handleEFRLine(String line) {
    String[] split = line.split(";");
    String entryId = split[0];
    String pdbId = split[1];
    List<Integer> experimentIds = Pattern.compile(",").splitAsStream(split[2].replaceAll("\\[", "").replaceAll("]", "")).map(Integer::valueOf).collect(Collectors.toList());
    int numberOfEarlyFoldingResidues = Integer.valueOf(split[3]);
    Structure structure = StructureParser.fromPdbId(pdbId).parse();
    Chain chain = structure.chains().findFirst().get();
    Start2FoldXmlParser.parseSpecificExperiment(chain, Start2FoldConstants.XML_DIRECTORY.resolve(entryId + ".xml"), experimentIds);
    List<AminoAcid> earlyFoldingResidues = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(Start2FoldResidueAnnotation.class).isEarly()).collect(Collectors.toList());
    List<AminoAcid> lateFoldingResidues = chain.aminoAcids().filter(aminoAcid -> !earlyFoldingResidues.contains(aminoAcid)).collect(Collectors.toList());
    early.add(earlyFoldingResidues.size());
    late.add((int) (chain.aminoAcids().count() - earlyFoldingResidues.size()));
    if (earlyFoldingResidues.size() != numberOfEarlyFoldingResidues) {
        System.err.println("number of EFR did not match expectation for " + entryId + ": " + earlyFoldingResidues.size() + " vs " + numberOfEarlyFoldingResidues);
    }
    String uniProtId = split[4];
    List<Integer> functionalResidueNumbers = Start2FoldConstants.extractFunctioanlResidueNumbers(split);
    List<AminoAcid> functionalResidues = new ArrayList<>();
    // do nothing if no annotation of functional residues exists
    if (!functionalResidueNumbers.isEmpty()) {
        FunctionalResidueParser.parse(chain, functionalResidueNumbers);
        chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(FunctionalResidueAnnotation.class).isFunctional()).forEach(functionalResidues::add);
    }
    List<AminoAcid> nonFunctionalResidues = chain.aminoAcids().filter(aminoAcid -> !functionalResidues.contains(aminoAcid)).collect(Collectors.toList());
    List<AminoAcid> exposedAminoAcids = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(AccessibleSurfaceArea.class).isExposed()).collect(Collectors.toList());
    List<AminoAcid> buriedAminoAcids = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(AccessibleSurfaceArea.class).isBuried()).collect(Collectors.toList());
    rasaContingencyTable[0] += SetOperations.createIntersectionSet(earlyFoldingResidues, buriedAminoAcids).size();
    rasaContingencyTable[1] += SetOperations.createIntersectionSet(earlyFoldingResidues, exposedAminoAcids).size();
    rasaContingencyTable[2] += SetOperations.createIntersectionSet(lateFoldingResidues, buriedAminoAcids).size();
    rasaContingencyTable[3] += SetOperations.createIntersectionSet(lateFoldingResidues, exposedAminoAcids).size();
    int earlyFunctionalCount = 0;
    if (!functionalResidues.isEmpty()) {
        functional.add(functionalResidues.size());
        nonFunctional.add((int) chain.aminoAcids().count() - functionalResidues.size());
        earlyFunctionalCount = SetOperations.createIntersectionSet(earlyFoldingResidues, functionalResidues).size();
        overlap.add(earlyFunctionalCount);
        int ef = earlyFunctionalCount;
        int en = SetOperations.createIntersectionSet(earlyFoldingResidues, nonFunctionalResidues).size();
        int lf = SetOperations.createIntersectionSet(lateFoldingResidues, functionalResidues).size();
        int ln = SetOperations.createIntersectionSet(lateFoldingResidues, nonFunctionalResidues).size();
        contingencyTable[0] += ef;
        contingencyTable[1] += en;
        contingencyTable[2] += lf;
        contingencyTable[3] += ln;
        double[] test = FishersExactTest.fishersExactTest(ef, en, lf, ln);
        System.out.println("values: " + ef + ", " + en + ", " + lf + ", " + ln);
        System.out.println("test: " + Arrays.toString(test));
        functionalTableLines.add(entryId + " & " + chain.aminoAcids().count() + " & " + earlyFoldingResidues.size() + " & " + functionalResidues.size() + " & " + ef + " & " + StandardFormat.format(test[0]) + " & " + "? \\\\");
    }
    tableLines.add(entryId + " & " + pdbId + "\\_A & " + uniProtId + " & " + chain.aminoAcids().count() + " & " + earlyFoldingResidues.size() + " & " + (functionalResidues.isEmpty() ? "-" : functionalResidues.size()) + " & " + (functionalResidues.isEmpty() ? "-" : earlyFunctionalCount) + " \\\\");
}
Also used : java.util(java.util) Files(java.nio.file.Files) FishersExactTest(edu.northwestern.at.utils.math.statistics.FishersExactTest) Structure(de.bioforscher.jstructure.model.structure.Structure) IOException(java.io.IOException) StructureParser(de.bioforscher.jstructure.model.structure.StructureParser) Collectors(java.util.stream.Collectors) FunctionalResidueParser(de.bioforscher.start2fold.parser.FunctionalResidueParser) UncheckedIOException(java.io.UncheckedIOException) FunctionalResidueAnnotation(de.bioforscher.start2fold.model.FunctionalResidueAnnotation) Start2FoldResidueAnnotation(de.bioforscher.start2fold.model.Start2FoldResidueAnnotation) AccessibleSurfaceArea(de.bioforscher.jstructure.feature.asa.AccessibleSurfaceArea) SetOperations(de.bioforscher.jstructure.mathematics.SetOperations) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) Start2FoldXmlParser(de.bioforscher.start2fold.parser.Start2FoldXmlParser) Chain(de.bioforscher.jstructure.model.structure.Chain) StandardFormat(de.bioforscher.jstructure.StandardFormat) Jsoup(org.jsoup.Jsoup) Pattern(java.util.regex.Pattern) Path(java.nio.file.Path) Chain(de.bioforscher.jstructure.model.structure.Chain) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) Start2FoldResidueAnnotation(de.bioforscher.start2fold.model.Start2FoldResidueAnnotation) FunctionalResidueAnnotation(de.bioforscher.start2fold.model.FunctionalResidueAnnotation) Structure(de.bioforscher.jstructure.model.structure.Structure) AccessibleSurfaceArea(de.bioforscher.jstructure.feature.asa.AccessibleSurfaceArea)

Example 8 with Structure

use of de.bioforscher.jstructure.model.structure.Structure in project jstructure by JonStargaryen.

the class A01_WriteEarlyFoldingClassificationArff method handleLine.

private static Optional<String> handleLine(String line) {
    try {
        logger.info("handling {}", line);
        String[] split = line.split(";");
        String entryId = split[0];
        String pdbId = split[1];
        List<Integer> experimentIds = Pattern.compile(",").splitAsStream(split[2].replaceAll("\\[", "").replaceAll("]", "")).map(Integer::valueOf).collect(Collectors.toList());
        Structure structure = StructureParser.fromPdbId(pdbId).parse();
        Chain chain = structure.getFirstChain();
        Start2FoldXmlParser.parseSpecificExperiment(chain, Start2FoldConstants.XML_DIRECTORY.resolve(entryId + ".xml"), experimentIds);
        List<AminoAcid> earlyFoldingResidues = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(Start2FoldResidueAnnotation.class).isEarly()).collect(Collectors.toList());
        List<AminoAcid> aminoAcids = chain.aminoAcids().collect(Collectors.toList());
        aminoAcids.forEach(RawFeatureVector::assignRawFeatureVector);
        // smooth features
        aminoAcids.forEach(aminoAcid -> SmoothedFeatureVector.assignSmoothedFeatureVector(aminoAcids, aminoAcid));
        return Optional.of(aminoAcids.stream().filter(aminoAcid -> !(aminoAcid instanceof Proline)).map(aminoAcid -> {
            SmoothedFeatureVector smoothedFeatureVector = aminoAcid.getFeature(SmoothedFeatureVector.class);
            return StandardFormat.format(smoothedFeatureVector.getEnergy()) + "," + StandardFormat.format(smoothedFeatureVector.getEgor()) + "," + StandardFormat.format(smoothedFeatureVector.getSecondaryStructureElementSize()) + "," + StandardFormat.format(aminoAcid.getFeature(LoopFraction.class).getLoopFraction()) + // already smoothed
            "," + StandardFormat.format(smoothedFeatureVector.getRasa()) + "," + StandardFormat.format(smoothedFeatureVector.getLocalInteractions()) + "," + StandardFormat.format(smoothedFeatureVector.getLocalHydrogen()) + "," + StandardFormat.format(smoothedFeatureVector.getLocalHydrophobic()) + "," + StandardFormat.format(smoothedFeatureVector.getLocalBackbone()) + "," + StandardFormat.format(smoothedFeatureVector.getNonLocalInteractions()) + "," + StandardFormat.format(smoothedFeatureVector.getNonLocalHydrogen()) + "," + StandardFormat.format(smoothedFeatureVector.getNonLocalHydrophobic()) + "," + StandardFormat.format(smoothedFeatureVector.getNonLocalBackbone()) + "," + StandardFormat.format(smoothedFeatureVector.getBetweenness()) + "," + StandardFormat.format(smoothedFeatureVector.getCloseness()) + "," + StandardFormat.format(smoothedFeatureVector.getClusteringCoefficient()) + "," + StandardFormat.format(smoothedFeatureVector.getHydrogenBetweenness()) + "," + StandardFormat.format(smoothedFeatureVector.getHydrogenCloseness()) + "," + StandardFormat.format(smoothedFeatureVector.getHydrogenClusteringCoefficient()) + "," + StandardFormat.format(smoothedFeatureVector.getHydrophobicBetweenness()) + "," + StandardFormat.format(smoothedFeatureVector.getHydrophobicCloseness()) + "," + StandardFormat.format(smoothedFeatureVector.getHydrophobicClusteringCoefficient()) + "," + StandardFormat.format(smoothedFeatureVector.getConvBetweenness()) + "," + StandardFormat.format(smoothedFeatureVector.getConvCloseness()) + "," + StandardFormat.format(smoothedFeatureVector.getConvClusteringCoefficient()) + "," + StandardFormat.format(smoothedFeatureVector.getDistinctNeighborhoods()) + "," + StandardFormat.format(smoothedFeatureVector.getConvDistinctNeighborhoods()) + "," + (earlyFoldingResidues.contains(aminoAcid) ? "early" : "late");
        }).collect(Collectors.joining(System.lineSeparator())));
    } catch (Exception e) {
        logger.warn("computation for {} failed", line, e);
        return Optional.empty();
    }
}
Also used : RawFeatureVector(de.bioforscher.start2fold.model.vector.RawFeatureVector) Proline(de.bioforscher.jstructure.model.structure.aminoacid.Proline) Logger(org.slf4j.Logger) LoopFraction(de.bioforscher.jstructure.feature.loopfraction.LoopFraction) Files(java.nio.file.Files) LoggerFactory(org.slf4j.LoggerFactory) Structure(de.bioforscher.jstructure.model.structure.Structure) IOException(java.io.IOException) StructureParser(de.bioforscher.jstructure.model.structure.StructureParser) Collectors(java.util.stream.Collectors) Start2FoldResidueAnnotation(de.bioforscher.start2fold.model.Start2FoldResidueAnnotation) List(java.util.List) Start2FoldConstants(de.bioforscher.start2fold.Start2FoldConstants) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) SmoothedFeatureVector(de.bioforscher.start2fold.model.vector.SmoothedFeatureVector) Start2FoldXmlParser(de.bioforscher.start2fold.parser.Start2FoldXmlParser) Chain(de.bioforscher.jstructure.model.structure.Chain) Optional(java.util.Optional) StandardFormat(de.bioforscher.jstructure.StandardFormat) Pattern(java.util.regex.Pattern) Chain(de.bioforscher.jstructure.model.structure.Chain) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) SmoothedFeatureVector(de.bioforscher.start2fold.model.vector.SmoothedFeatureVector) Start2FoldResidueAnnotation(de.bioforscher.start2fold.model.Start2FoldResidueAnnotation) Proline(de.bioforscher.jstructure.model.structure.aminoacid.Proline) IOException(java.io.IOException) LoopFraction(de.bioforscher.jstructure.feature.loopfraction.LoopFraction) Structure(de.bioforscher.jstructure.model.structure.Structure) RawFeatureVector(de.bioforscher.start2fold.model.vector.RawFeatureVector)

Example 9 with Structure

use of de.bioforscher.jstructure.model.structure.Structure in project jstructure by JonStargaryen.

the class Start2FoldXmlParserTest method shouldParseAllFiles.

@Test
@Ignore
public void shouldParseAllFiles() {
    // shows that experiments may have different sequences - need to handle every one individually
    Path directory = Start2FoldConstants.XML_DIRECTORY;
    Start2FoldConstants.list(directory).forEach(path -> {
        try {
            logger.info("handling {}", path);
            // safe are: STF0005, STF0021
            String pdbId = Jsoup.parse(path.toFile(), "UTF-8").getElementsByTag("protein").attr("pdb_id");
            Structure structure = StructureParser.fromPdbId(pdbId).parse();
            Chain chain = structure.chains().findFirst().get();
            Start2FoldXmlParser.parse(chain, path);
        } catch (Exception e) {
            logger.warn("inspect:", e);
        }
    });
}
Also used : Path(java.nio.file.Path) Chain(de.bioforscher.jstructure.model.structure.Chain) Structure(de.bioforscher.jstructure.model.structure.Structure) IOException(java.io.IOException) UncheckedIOException(java.io.UncheckedIOException) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 10 with Structure

use of de.bioforscher.jstructure.model.structure.Structure in project jstructure by JonStargaryen.

the class Start2FoldXmlParserTest method shouldParseStart2FoldXml.

@Test
public void shouldParseStart2FoldXml() {
    Structure structure = StructureParser.fromPdbId("1hrh").parse();
    Chain chain = structure.chains().findFirst().get();
    Start2FoldXmlParser.parse(chain, TestUtils.getResourceAsInputStream("STF0026.xml"));
}
Also used : Chain(de.bioforscher.jstructure.model.structure.Chain) Structure(de.bioforscher.jstructure.model.structure.Structure) Test(org.junit.Test)

Aggregations

Structure (de.bioforscher.jstructure.model.structure.Structure)61 IOException (java.io.IOException)45 Collectors (java.util.stream.Collectors)40 Chain (de.bioforscher.jstructure.model.structure.Chain)39 Files (java.nio.file.Files)35 StructureParser (de.bioforscher.jstructure.model.structure.StructureParser)30 Path (java.nio.file.Path)26 AminoAcid (de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid)23 List (java.util.List)22 StandardFormat (de.bioforscher.jstructure.StandardFormat)21 Logger (org.slf4j.Logger)20 LoggerFactory (org.slf4j.LoggerFactory)20 Test (org.junit.Test)19 Group (de.bioforscher.jstructure.model.structure.Group)18 UncheckedIOException (java.io.UncheckedIOException)18 Pattern (java.util.regex.Pattern)17 Stream (java.util.stream.Stream)17 Jsoup (org.jsoup.Jsoup)17 ComputationException (de.bioforscher.jstructure.model.feature.ComputationException)16 java.util (java.util)15