Search in sources :

Example 56 with AminoAcid

use of de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid in project jstructure by JonStargaryen.

the class A01_ReportGeneralStatistics method handleStrongFile.

private static void handleStrongFile(Path path) {
    try {
        String pdbId = Jsoup.parse(path.toFile(), "UTF-8").getElementsByTag("protein").attr("pdb_id");
        Structure structure = StructureParser.fromPdbId(pdbId).parse();
        Chain chain = structure.chains().findFirst().get();
        Start2FoldXmlParser.parse(chain, path);
        boolean hasStabilityData = chain.aminoAcids().map(aminoAcid -> aminoAcid.getFeature(Start2FoldResidueAnnotation.class)).anyMatch(Start2FoldResidueAnnotation::isStrong);
        if (!hasStabilityData) {
            return;
        }
        long count = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(Start2FoldResidueAnnotation.class).isStrong()).count();
        strong.add((int) count);
        weak.add((int) (chain.aminoAcids().count() - count));
    } catch (IOException e) {
        throw new UncheckedIOException(e);
    }
}
Also used : java.util(java.util) Files(java.nio.file.Files) FishersExactTest(edu.northwestern.at.utils.math.statistics.FishersExactTest) Structure(de.bioforscher.jstructure.model.structure.Structure) IOException(java.io.IOException) StructureParser(de.bioforscher.jstructure.model.structure.StructureParser) Collectors(java.util.stream.Collectors) FunctionalResidueParser(de.bioforscher.start2fold.parser.FunctionalResidueParser) UncheckedIOException(java.io.UncheckedIOException) FunctionalResidueAnnotation(de.bioforscher.start2fold.model.FunctionalResidueAnnotation) Start2FoldResidueAnnotation(de.bioforscher.start2fold.model.Start2FoldResidueAnnotation) AccessibleSurfaceArea(de.bioforscher.jstructure.feature.asa.AccessibleSurfaceArea) SetOperations(de.bioforscher.jstructure.mathematics.SetOperations) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) Start2FoldXmlParser(de.bioforscher.start2fold.parser.Start2FoldXmlParser) Chain(de.bioforscher.jstructure.model.structure.Chain) StandardFormat(de.bioforscher.jstructure.StandardFormat) Jsoup(org.jsoup.Jsoup) Pattern(java.util.regex.Pattern) Path(java.nio.file.Path) Chain(de.bioforscher.jstructure.model.structure.Chain) Start2FoldResidueAnnotation(de.bioforscher.start2fold.model.Start2FoldResidueAnnotation) UncheckedIOException(java.io.UncheckedIOException) IOException(java.io.IOException) UncheckedIOException(java.io.UncheckedIOException) Structure(de.bioforscher.jstructure.model.structure.Structure)

Example 57 with AminoAcid

use of de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid in project jstructure by JonStargaryen.

the class A03_WriteFunctionalClassificationArff method handleLine.

private static Optional<String> handleLine(String line) {
    try {
        logger.info("handling {}", line);
        String[] split = line.split(";");
        String pdbId = split[1];
        Structure structure = StructureParser.fromPdbId(pdbId).parse();
        Chain chain = structure.getFirstChain();
        List<AminoAcid> aminoAcids = chain.aminoAcids().collect(Collectors.toList());
        aminoAcids.forEach(RawFeatureVector::assignRawFeatureVector);
        List<Integer> functionalResidueNumbers = Start2FoldConstants.extractFunctioanlResidueNumbers(split);
        List<AminoAcid> functionalResidues = new ArrayList<>();
        // do nothing if no annotation of functional residues exists
        if (!functionalResidueNumbers.isEmpty()) {
            FunctionalResidueParser.parse(chain, functionalResidueNumbers);
            chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(FunctionalResidueAnnotation.class).isFunctional()).forEach(functionalResidues::add);
        }
        // ignore proteins lacking annotation of functional residues
        if (functionalResidues.isEmpty()) {
            return Optional.empty();
        }
        // smooth features
        aminoAcids.forEach(aminoAcid -> SmoothedFeatureVector.assignSmoothedFeatureVector(aminoAcids, aminoAcid));
        return Optional.of(aminoAcids.stream().filter(aminoAcid -> !(aminoAcid instanceof Proline)).map(aminoAcid -> {
            SmoothedFeatureVector smoothedFeatureVector = aminoAcid.getFeature(SmoothedFeatureVector.class);
            return StandardFormat.format(smoothedFeatureVector.getEnergy()) + "," + StandardFormat.format(smoothedFeatureVector.getEgor()) + "," + StandardFormat.format(smoothedFeatureVector.getSecondaryStructureElementSize()) + "," + StandardFormat.format(aminoAcid.getFeature(LoopFraction.class).getLoopFraction()) + // already smoothed
            "," + StandardFormat.format(smoothedFeatureVector.getRasa()) + "," + StandardFormat.format(smoothedFeatureVector.getLocalInteractions()) + "," + StandardFormat.format(smoothedFeatureVector.getLocalHydrogen()) + "," + StandardFormat.format(smoothedFeatureVector.getLocalHydrophobic()) + "," + StandardFormat.format(smoothedFeatureVector.getLocalBackbone()) + "," + StandardFormat.format(smoothedFeatureVector.getNonLocalInteractions()) + "," + StandardFormat.format(smoothedFeatureVector.getNonLocalHydrogen()) + "," + StandardFormat.format(smoothedFeatureVector.getNonLocalHydrophobic()) + "," + StandardFormat.format(smoothedFeatureVector.getNonLocalBackbone()) + "," + StandardFormat.format(smoothedFeatureVector.getBetweenness()) + "," + StandardFormat.format(smoothedFeatureVector.getCloseness()) + "," + StandardFormat.format(smoothedFeatureVector.getClusteringCoefficient()) + "," + StandardFormat.format(smoothedFeatureVector.getHydrogenBetweenness()) + "," + StandardFormat.format(smoothedFeatureVector.getHydrogenCloseness()) + "," + StandardFormat.format(smoothedFeatureVector.getHydrogenClusteringCoefficient()) + "," + StandardFormat.format(smoothedFeatureVector.getHydrophobicBetweenness()) + "," + StandardFormat.format(smoothedFeatureVector.getHydrophobicCloseness()) + "," + StandardFormat.format(smoothedFeatureVector.getHydrophobicClusteringCoefficient()) + "," + StandardFormat.format(smoothedFeatureVector.getConvBetweenness()) + "," + StandardFormat.format(smoothedFeatureVector.getConvCloseness()) + "," + StandardFormat.format(smoothedFeatureVector.getConvClusteringCoefficient()) + "," + StandardFormat.format(smoothedFeatureVector.getDistinctNeighborhoods()) + "," + StandardFormat.format(smoothedFeatureVector.getConvDistinctNeighborhoods()) + "," + (functionalResidues.contains(aminoAcid) ? "functional" : "non-functional");
        }).collect(Collectors.joining(System.lineSeparator())));
    } catch (Exception e) {
        logger.warn("computation for {} failed", line, e);
        return Optional.empty();
    }
}
Also used : RawFeatureVector(de.bioforscher.start2fold.model.vector.RawFeatureVector) Proline(de.bioforscher.jstructure.model.structure.aminoacid.Proline) Logger(org.slf4j.Logger) LoopFraction(de.bioforscher.jstructure.feature.loopfraction.LoopFraction) Files(java.nio.file.Files) LoggerFactory(org.slf4j.LoggerFactory) Structure(de.bioforscher.jstructure.model.structure.Structure) IOException(java.io.IOException) StructureParser(de.bioforscher.jstructure.model.structure.StructureParser) Collectors(java.util.stream.Collectors) FunctionalResidueParser(de.bioforscher.start2fold.parser.FunctionalResidueParser) ArrayList(java.util.ArrayList) FunctionalResidueAnnotation(de.bioforscher.start2fold.model.FunctionalResidueAnnotation) List(java.util.List) Start2FoldConstants(de.bioforscher.start2fold.Start2FoldConstants) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) SmoothedFeatureVector(de.bioforscher.start2fold.model.vector.SmoothedFeatureVector) Chain(de.bioforscher.jstructure.model.structure.Chain) Optional(java.util.Optional) StandardFormat(de.bioforscher.jstructure.StandardFormat) Chain(de.bioforscher.jstructure.model.structure.Chain) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) SmoothedFeatureVector(de.bioforscher.start2fold.model.vector.SmoothedFeatureVector) ArrayList(java.util.ArrayList) Proline(de.bioforscher.jstructure.model.structure.aminoacid.Proline) FunctionalResidueAnnotation(de.bioforscher.start2fold.model.FunctionalResidueAnnotation) IOException(java.io.IOException) LoopFraction(de.bioforscher.jstructure.feature.loopfraction.LoopFraction) Structure(de.bioforscher.jstructure.model.structure.Structure) RawFeatureVector(de.bioforscher.start2fold.model.vector.RawFeatureVector)

Example 58 with AminoAcid

use of de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid in project jstructure by JonStargaryen.

the class StructuralInformationParserServiceTest method shouldPrintStructuralInformationByResidue.

@Test
public void shouldPrintStructuralInformationByResidue() {
    Chain chain = StructureParser.fromPdbId("1bdd").parse().getFirstChain();
    List<AminoAcid> aminoAcids = chain.aminoAcids().collect(Collectors.toList());
    Start2FoldXmlParser.parseSpecificExperiment(chain, TestUtils.getResourceAsInputStream("efr/STF0045.xml"), Stream.of(185).collect(Collectors.toList()));
    List<AminoAcid> earlyFoldingResidues = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(Start2FoldResidueAnnotation.class).isEarly()).collect(Collectors.toList());
    List<ContactStructuralInformation> contactStructuralInformation = StructuralInformationParserService.getInstance().parseContactStructuralInformationFile(TestUtils.getResourceAsInputStream("si/STF0045.out"), chain, earlyFoldingResidues);
    List<ResidueStructuralInformation> residueStructuralInformation = StructuralInformationParserService.getInstance().composeResidueStructuralInformation(aminoAcids, earlyFoldingResidues, contactStructuralInformation);
    ResidueGraph residueGraph = ResidueGraph.createDistanceResidueGraph(chain);
    ResidueGraphCalculations residueGraphCalculations = new ResidueGraphCalculations(residueGraph);
    residueStructuralInformation.stream().map(si -> {
        AminoAcid aminoAcid = chain.select().residueIdentifier(IdentifierFactory.createResidueIdentifier(si.getResidueIdentifier())).asAminoAcid();
        double betweenness = residueGraphCalculations.betweenness(aminoAcid);
        double closeness = residueGraphCalculations.closeness(aminoAcid);
        double cc = residueGraphCalculations.clusteringCoefficient(aminoAcid);
        int degree = residueGraph.degreeOf(aminoAcid);
        return StandardFormat.format(si.getAverageRmsdIncrease()) + "," + StandardFormat.format(si.getAverageTmScoreIncrease()) + "," + StandardFormat.format(si.getAverageQIncrease()) + "," + StandardFormat.format(si.getMaximumRmsdIncrease()) + "," + StandardFormat.format(si.getMaximumTmScoreIncrease()) + "," + StandardFormat.format(si.getMaximumQIncrease()) + "," + StandardFormat.format(betweenness) + "," + StandardFormat.format(closeness) + "," + StandardFormat.format(cc) + "," + StandardFormat.format(degree);
    }).forEach(System.out::println);
}
Also used : StructuralInformationParserService(de.bioforscher.jstructure.efr.parser.StructuralInformationParserService) Start2FoldResidueAnnotation(de.bioforscher.jstructure.efr.model.Start2FoldResidueAnnotation) Test(org.junit.Test) TestUtils(de.bioforscher.testutil.TestUtils) StructureParser(de.bioforscher.jstructure.model.structure.StructureParser) Collectors(java.util.stream.Collectors) Start2FoldXmlParser(de.bioforscher.jstructure.efr.parser.Start2FoldXmlParser) Pair(de.bioforscher.jstructure.mathematics.Pair) IdentifierFactory(de.bioforscher.jstructure.model.identifier.IdentifierFactory) ResidueGraphCalculations(de.bioforscher.jstructure.graph.ResidueGraphCalculations) List(java.util.List) Stream(java.util.stream.Stream) ResidueGraph(de.bioforscher.jstructure.graph.ResidueGraph) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) Chain(de.bioforscher.jstructure.model.structure.Chain) ResidueStructuralInformation(de.bioforscher.jstructure.efr.model.si.ResidueStructuralInformation) StandardFormat(de.bioforscher.jstructure.StandardFormat) ContactStructuralInformation(de.bioforscher.jstructure.efr.model.si.ContactStructuralInformation) Chain(de.bioforscher.jstructure.model.structure.Chain) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) ResidueGraph(de.bioforscher.jstructure.graph.ResidueGraph) Start2FoldResidueAnnotation(de.bioforscher.jstructure.efr.model.Start2FoldResidueAnnotation) ResidueGraphCalculations(de.bioforscher.jstructure.graph.ResidueGraphCalculations) ResidueStructuralInformation(de.bioforscher.jstructure.efr.model.si.ResidueStructuralInformation) ContactStructuralInformation(de.bioforscher.jstructure.efr.model.si.ContactStructuralInformation) Test(org.junit.Test)

Example 59 with AminoAcid

use of de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid in project jstructure by JonStargaryen.

the class StructuralInformationParserServiceTest method shouldPrintStructuralInformationByContact.

@Test
public void shouldPrintStructuralInformationByContact() {
    Chain chain = StructureParser.fromPdbId("1bdd").parse().getFirstChain();
    List<AminoAcid> aminoAcids = chain.aminoAcids().collect(Collectors.toList());
    Start2FoldXmlParser.parseSpecificExperiment(chain, TestUtils.getResourceAsInputStream("efr/STF0045.xml"), Stream.of(185).collect(Collectors.toList()));
    List<AminoAcid> earlyFoldingResidues = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(Start2FoldResidueAnnotation.class).isEarly()).collect(Collectors.toList());
    List<ContactStructuralInformation> contactStructuralInformation = StructuralInformationParserService.getInstance().parseContactStructuralInformationFile(TestUtils.getResourceAsInputStream("si/STF0045.out"), chain, earlyFoldingResidues);
    ResidueGraph residueGraph = ResidueGraph.createDistanceResidueGraph(chain);
    ResidueGraphCalculations residueGraphCalculations = new ResidueGraphCalculations(residueGraph);
    contactStructuralInformation.stream().map(si -> {
        AminoAcid aminoAcid1 = chain.select().residueIdentifier(IdentifierFactory.createResidueIdentifier(si.getResidueIdentifier1())).asAminoAcid();
        AminoAcid aminoAcid2 = chain.select().residueIdentifier(IdentifierFactory.createResidueIdentifier(si.getResidueIdentifier2())).asAminoAcid();
        Pair<AminoAcid, AminoAcid> pair = new Pair<>(aminoAcid1, aminoAcid2);
        double betweenness = residueGraphCalculations.betweenness(pair);
        return StandardFormat.format(si.getAverageRmsdIncrease()) + "," + StandardFormat.format(si.getAverageTmScoreIncrease()) + "," + StandardFormat.format(si.getAverageQIncrease()) + "," + StandardFormat.format(si.getMaximumRmsdIncrease()) + "," + StandardFormat.format(si.getMaximumTmScoreIncrease()) + "," + StandardFormat.format(si.getMaximumQIncrease()) + "," + StandardFormat.format(betweenness);
    }).forEach(System.out::println);
}
Also used : StructuralInformationParserService(de.bioforscher.jstructure.efr.parser.StructuralInformationParserService) Start2FoldResidueAnnotation(de.bioforscher.jstructure.efr.model.Start2FoldResidueAnnotation) Test(org.junit.Test) TestUtils(de.bioforscher.testutil.TestUtils) StructureParser(de.bioforscher.jstructure.model.structure.StructureParser) Collectors(java.util.stream.Collectors) Start2FoldXmlParser(de.bioforscher.jstructure.efr.parser.Start2FoldXmlParser) Pair(de.bioforscher.jstructure.mathematics.Pair) IdentifierFactory(de.bioforscher.jstructure.model.identifier.IdentifierFactory) ResidueGraphCalculations(de.bioforscher.jstructure.graph.ResidueGraphCalculations) List(java.util.List) Stream(java.util.stream.Stream) ResidueGraph(de.bioforscher.jstructure.graph.ResidueGraph) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) Chain(de.bioforscher.jstructure.model.structure.Chain) ResidueStructuralInformation(de.bioforscher.jstructure.efr.model.si.ResidueStructuralInformation) StandardFormat(de.bioforscher.jstructure.StandardFormat) ContactStructuralInformation(de.bioforscher.jstructure.efr.model.si.ContactStructuralInformation) Chain(de.bioforscher.jstructure.model.structure.Chain) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) ResidueGraph(de.bioforscher.jstructure.graph.ResidueGraph) Start2FoldResidueAnnotation(de.bioforscher.jstructure.efr.model.Start2FoldResidueAnnotation) ResidueGraphCalculations(de.bioforscher.jstructure.graph.ResidueGraphCalculations) ContactStructuralInformation(de.bioforscher.jstructure.efr.model.si.ContactStructuralInformation) Pair(de.bioforscher.jstructure.mathematics.Pair) Test(org.junit.Test)

Example 60 with AminoAcid

use of de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid in project jstructure by JonStargaryen.

the class A06_EarlyFoldingFrequencies method handleLine.

private static void handleLine(String line) {
    System.out.println(line);
    String[] split = line.split(";");
    String entryId = split[0];
    String pdbId = split[1];
    List<Integer> experimentIds = Pattern.compile(",").splitAsStream(split[2].replaceAll("\\[", "").replaceAll("]", "")).map(Integer::valueOf).collect(Collectors.toList());
    Structure structure = StructureParser.fromPdbId(pdbId).parse();
    Chain chain = structure.chains().findFirst().get();
    Start2FoldXmlParser.parseSpecificExperiment(chain, Start2FoldConstants.XML_DIRECTORY.resolve(entryId + ".xml"), experimentIds);
    List<AminoAcid> earlyFoldingResidues = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(Start2FoldResidueAnnotation.class).isEarly()).collect(Collectors.toList());
    chain.aminoAcids().forEach(aminoAcid -> {
        int ordinal = AminoAcid.Family.resolveOneLetterCode(aminoAcid.getOneLetterCode()).ordinal();
        aminoAcidFrequencies[ordinal]++;
    });
    earlyFoldingResidues.forEach(aminoAcid -> {
        int ordinal = AminoAcid.Family.resolveOneLetterCode(aminoAcid.getOneLetterCode()).ordinal();
        earlyFoldingFrequencies[ordinal]++;
    });
}
Also used : IntStream(java.util.stream.IntStream) Files(java.nio.file.Files) Structure(de.bioforscher.jstructure.model.structure.Structure) IOException(java.io.IOException) StructureParser(de.bioforscher.jstructure.model.structure.StructureParser) Collectors(java.util.stream.Collectors) Start2FoldResidueAnnotation(de.bioforscher.start2fold.model.Start2FoldResidueAnnotation) List(java.util.List) Start2FoldConstants(de.bioforscher.start2fold.Start2FoldConstants) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) Start2FoldXmlParser(de.bioforscher.start2fold.parser.Start2FoldXmlParser) Chain(de.bioforscher.jstructure.model.structure.Chain) StandardFormat(de.bioforscher.jstructure.StandardFormat) Pattern(java.util.regex.Pattern) Chain(de.bioforscher.jstructure.model.structure.Chain) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) Start2FoldResidueAnnotation(de.bioforscher.start2fold.model.Start2FoldResidueAnnotation) Structure(de.bioforscher.jstructure.model.structure.Structure)

Aggregations

AminoAcid (de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid)66 Chain (de.bioforscher.jstructure.model.structure.Chain)40 Collectors (java.util.stream.Collectors)40 IOException (java.io.IOException)36 Files (java.nio.file.Files)35 List (java.util.List)31 StandardFormat (de.bioforscher.jstructure.StandardFormat)26 StructureParser (de.bioforscher.jstructure.model.structure.StructureParser)26 Path (java.nio.file.Path)25 Structure (de.bioforscher.jstructure.model.structure.Structure)23 Pattern (java.util.regex.Pattern)17 Logger (org.slf4j.Logger)16 LoggerFactory (org.slf4j.LoggerFactory)16 Start2FoldResidueAnnotation (de.bioforscher.jstructure.efr.model.Start2FoldResidueAnnotation)15 UncheckedIOException (java.io.UncheckedIOException)14 ArrayList (java.util.ArrayList)14 Stream (java.util.stream.Stream)14 Start2FoldResidueAnnotation (de.bioforscher.start2fold.model.Start2FoldResidueAnnotation)13 Optional (java.util.Optional)13 Pair (de.bioforscher.jstructure.mathematics.Pair)11