Search in sources :

Example 16 with AminoAcid

use of de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid in project jstructure by JonStargaryen.

the class SelectionTest method shouldSelectAminoAcidsAndReturnAsContainer.

@Test
public void shouldSelectAminoAcidsAndReturnAsContainer() {
    List<Group> groups = protein.select().aminoAcids().asFilteredGroups().collect(Collectors.toList());
    groups.forEach(group -> Assert.assertTrue("group " + group + " of original selected stream is no amino acid, was " + group.getClass().getSimpleName(), group instanceof AminoAcid));
    GroupContainer container = groups.stream().collect(StructureCollectors.toIsolatedStructure());
    container.groups().forEach(group -> Assert.assertTrue("group " + group + " of container stream is no amino acid, was " + group.getClass().getSimpleName(), group instanceof AminoAcid));
}
Also used : AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) GroupContainer(de.bioforscher.jstructure.model.structure.container.GroupContainer) Test(org.junit.Test)

Example 17 with AminoAcid

use of de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid in project jstructure by JonStargaryen.

the class A01_ReportGeneralStatistics method handleEFRLine.

private static void handleEFRLine(String line) {
    String[] split = line.split(";");
    String entryId = split[0];
    String pdbId = split[1];
    List<Integer> experimentIds = Pattern.compile(",").splitAsStream(split[2].replaceAll("\\[", "").replaceAll("]", "")).map(Integer::valueOf).collect(Collectors.toList());
    int numberOfEarlyFoldingResidues = Integer.valueOf(split[3]);
    Structure structure = StructureParser.fromPdbId(pdbId).parse();
    Chain chain = structure.chains().findFirst().get();
    Start2FoldXmlParser.parseSpecificExperiment(chain, Start2FoldConstants.XML_DIRECTORY.resolve(entryId + ".xml"), experimentIds);
    List<AminoAcid> earlyFoldingResidues = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(Start2FoldResidueAnnotation.class).isEarly()).collect(Collectors.toList());
    List<AminoAcid> lateFoldingResidues = chain.aminoAcids().filter(aminoAcid -> !earlyFoldingResidues.contains(aminoAcid)).collect(Collectors.toList());
    early.add(earlyFoldingResidues.size());
    late.add((int) (chain.aminoAcids().count() - earlyFoldingResidues.size()));
    if (earlyFoldingResidues.size() != numberOfEarlyFoldingResidues) {
        System.err.println("number of EFR did not match expectation for " + entryId + ": " + earlyFoldingResidues.size() + " vs " + numberOfEarlyFoldingResidues);
    }
    String uniProtId = split[4];
    List<Integer> functionalResidueNumbers = Start2FoldConstants.extractFunctioanlResidueNumbers(split);
    List<AminoAcid> functionalResidues = new ArrayList<>();
    // do nothing if no annotation of functional residues exists
    if (!functionalResidueNumbers.isEmpty()) {
        FunctionalResidueParser.parse(chain, functionalResidueNumbers);
        chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(FunctionalResidueAnnotation.class).isFunctional()).forEach(functionalResidues::add);
    }
    List<AminoAcid> nonFunctionalResidues = chain.aminoAcids().filter(aminoAcid -> !functionalResidues.contains(aminoAcid)).collect(Collectors.toList());
    List<AminoAcid> exposedAminoAcids = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(AccessibleSurfaceArea.class).isExposed()).collect(Collectors.toList());
    List<AminoAcid> buriedAminoAcids = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(AccessibleSurfaceArea.class).isBuried()).collect(Collectors.toList());
    rasaContingencyTable[0] += SetOperations.createIntersectionSet(earlyFoldingResidues, buriedAminoAcids).size();
    rasaContingencyTable[1] += SetOperations.createIntersectionSet(earlyFoldingResidues, exposedAminoAcids).size();
    rasaContingencyTable[2] += SetOperations.createIntersectionSet(lateFoldingResidues, buriedAminoAcids).size();
    rasaContingencyTable[3] += SetOperations.createIntersectionSet(lateFoldingResidues, exposedAminoAcids).size();
    int earlyFunctionalCount = 0;
    if (!functionalResidues.isEmpty()) {
        functional.add(functionalResidues.size());
        nonFunctional.add((int) chain.aminoAcids().count() - functionalResidues.size());
        earlyFunctionalCount = SetOperations.createIntersectionSet(earlyFoldingResidues, functionalResidues).size();
        overlap.add(earlyFunctionalCount);
        int ef = earlyFunctionalCount;
        int en = SetOperations.createIntersectionSet(earlyFoldingResidues, nonFunctionalResidues).size();
        int lf = SetOperations.createIntersectionSet(lateFoldingResidues, functionalResidues).size();
        int ln = SetOperations.createIntersectionSet(lateFoldingResidues, nonFunctionalResidues).size();
        contingencyTable[0] += ef;
        contingencyTable[1] += en;
        contingencyTable[2] += lf;
        contingencyTable[3] += ln;
        double[] test = FishersExactTest.fishersExactTest(ef, en, lf, ln);
        System.out.println("values: " + ef + ", " + en + ", " + lf + ", " + ln);
        System.out.println("test: " + Arrays.toString(test));
        functionalTableLines.add(entryId + " & " + chain.aminoAcids().count() + " & " + earlyFoldingResidues.size() + " & " + functionalResidues.size() + " & " + ef + " & " + StandardFormat.format(test[0]) + " & " + "? \\\\");
    }
    tableLines.add(entryId + " & " + pdbId + "\\_A & " + uniProtId + " & " + chain.aminoAcids().count() + " & " + earlyFoldingResidues.size() + " & " + (functionalResidues.isEmpty() ? "-" : functionalResidues.size()) + " & " + (functionalResidues.isEmpty() ? "-" : earlyFunctionalCount) + " \\\\");
}
Also used : java.util(java.util) Files(java.nio.file.Files) FishersExactTest(edu.northwestern.at.utils.math.statistics.FishersExactTest) Structure(de.bioforscher.jstructure.model.structure.Structure) IOException(java.io.IOException) StructureParser(de.bioforscher.jstructure.model.structure.StructureParser) Collectors(java.util.stream.Collectors) FunctionalResidueParser(de.bioforscher.start2fold.parser.FunctionalResidueParser) UncheckedIOException(java.io.UncheckedIOException) FunctionalResidueAnnotation(de.bioforscher.start2fold.model.FunctionalResidueAnnotation) Start2FoldResidueAnnotation(de.bioforscher.start2fold.model.Start2FoldResidueAnnotation) AccessibleSurfaceArea(de.bioforscher.jstructure.feature.asa.AccessibleSurfaceArea) SetOperations(de.bioforscher.jstructure.mathematics.SetOperations) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) Start2FoldXmlParser(de.bioforscher.start2fold.parser.Start2FoldXmlParser) Chain(de.bioforscher.jstructure.model.structure.Chain) StandardFormat(de.bioforscher.jstructure.StandardFormat) Jsoup(org.jsoup.Jsoup) Pattern(java.util.regex.Pattern) Path(java.nio.file.Path) Chain(de.bioforscher.jstructure.model.structure.Chain) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) Start2FoldResidueAnnotation(de.bioforscher.start2fold.model.Start2FoldResidueAnnotation) FunctionalResidueAnnotation(de.bioforscher.start2fold.model.FunctionalResidueAnnotation) Structure(de.bioforscher.jstructure.model.structure.Structure) AccessibleSurfaceArea(de.bioforscher.jstructure.feature.asa.AccessibleSurfaceArea)

Example 18 with AminoAcid

use of de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid in project jstructure by JonStargaryen.

the class A01_WriteEarlyFoldingClassificationArff method handleLine.

private static Optional<String> handleLine(String line) {
    try {
        logger.info("handling {}", line);
        String[] split = line.split(";");
        String entryId = split[0];
        String pdbId = split[1];
        List<Integer> experimentIds = Pattern.compile(",").splitAsStream(split[2].replaceAll("\\[", "").replaceAll("]", "")).map(Integer::valueOf).collect(Collectors.toList());
        Structure structure = StructureParser.fromPdbId(pdbId).parse();
        Chain chain = structure.getFirstChain();
        Start2FoldXmlParser.parseSpecificExperiment(chain, Start2FoldConstants.XML_DIRECTORY.resolve(entryId + ".xml"), experimentIds);
        List<AminoAcid> earlyFoldingResidues = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(Start2FoldResidueAnnotation.class).isEarly()).collect(Collectors.toList());
        List<AminoAcid> aminoAcids = chain.aminoAcids().collect(Collectors.toList());
        aminoAcids.forEach(RawFeatureVector::assignRawFeatureVector);
        // smooth features
        aminoAcids.forEach(aminoAcid -> SmoothedFeatureVector.assignSmoothedFeatureVector(aminoAcids, aminoAcid));
        return Optional.of(aminoAcids.stream().filter(aminoAcid -> !(aminoAcid instanceof Proline)).map(aminoAcid -> {
            SmoothedFeatureVector smoothedFeatureVector = aminoAcid.getFeature(SmoothedFeatureVector.class);
            return StandardFormat.format(smoothedFeatureVector.getEnergy()) + "," + StandardFormat.format(smoothedFeatureVector.getEgor()) + "," + StandardFormat.format(smoothedFeatureVector.getSecondaryStructureElementSize()) + "," + StandardFormat.format(aminoAcid.getFeature(LoopFraction.class).getLoopFraction()) + // already smoothed
            "," + StandardFormat.format(smoothedFeatureVector.getRasa()) + "," + StandardFormat.format(smoothedFeatureVector.getLocalInteractions()) + "," + StandardFormat.format(smoothedFeatureVector.getLocalHydrogen()) + "," + StandardFormat.format(smoothedFeatureVector.getLocalHydrophobic()) + "," + StandardFormat.format(smoothedFeatureVector.getLocalBackbone()) + "," + StandardFormat.format(smoothedFeatureVector.getNonLocalInteractions()) + "," + StandardFormat.format(smoothedFeatureVector.getNonLocalHydrogen()) + "," + StandardFormat.format(smoothedFeatureVector.getNonLocalHydrophobic()) + "," + StandardFormat.format(smoothedFeatureVector.getNonLocalBackbone()) + "," + StandardFormat.format(smoothedFeatureVector.getBetweenness()) + "," + StandardFormat.format(smoothedFeatureVector.getCloseness()) + "," + StandardFormat.format(smoothedFeatureVector.getClusteringCoefficient()) + "," + StandardFormat.format(smoothedFeatureVector.getHydrogenBetweenness()) + "," + StandardFormat.format(smoothedFeatureVector.getHydrogenCloseness()) + "," + StandardFormat.format(smoothedFeatureVector.getHydrogenClusteringCoefficient()) + "," + StandardFormat.format(smoothedFeatureVector.getHydrophobicBetweenness()) + "," + StandardFormat.format(smoothedFeatureVector.getHydrophobicCloseness()) + "," + StandardFormat.format(smoothedFeatureVector.getHydrophobicClusteringCoefficient()) + "," + StandardFormat.format(smoothedFeatureVector.getConvBetweenness()) + "," + StandardFormat.format(smoothedFeatureVector.getConvCloseness()) + "," + StandardFormat.format(smoothedFeatureVector.getConvClusteringCoefficient()) + "," + StandardFormat.format(smoothedFeatureVector.getDistinctNeighborhoods()) + "," + StandardFormat.format(smoothedFeatureVector.getConvDistinctNeighborhoods()) + "," + (earlyFoldingResidues.contains(aminoAcid) ? "early" : "late");
        }).collect(Collectors.joining(System.lineSeparator())));
    } catch (Exception e) {
        logger.warn("computation for {} failed", line, e);
        return Optional.empty();
    }
}
Also used : RawFeatureVector(de.bioforscher.start2fold.model.vector.RawFeatureVector) Proline(de.bioforscher.jstructure.model.structure.aminoacid.Proline) Logger(org.slf4j.Logger) LoopFraction(de.bioforscher.jstructure.feature.loopfraction.LoopFraction) Files(java.nio.file.Files) LoggerFactory(org.slf4j.LoggerFactory) Structure(de.bioforscher.jstructure.model.structure.Structure) IOException(java.io.IOException) StructureParser(de.bioforscher.jstructure.model.structure.StructureParser) Collectors(java.util.stream.Collectors) Start2FoldResidueAnnotation(de.bioforscher.start2fold.model.Start2FoldResidueAnnotation) List(java.util.List) Start2FoldConstants(de.bioforscher.start2fold.Start2FoldConstants) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) SmoothedFeatureVector(de.bioforscher.start2fold.model.vector.SmoothedFeatureVector) Start2FoldXmlParser(de.bioforscher.start2fold.parser.Start2FoldXmlParser) Chain(de.bioforscher.jstructure.model.structure.Chain) Optional(java.util.Optional) StandardFormat(de.bioforscher.jstructure.StandardFormat) Pattern(java.util.regex.Pattern) Chain(de.bioforscher.jstructure.model.structure.Chain) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) SmoothedFeatureVector(de.bioforscher.start2fold.model.vector.SmoothedFeatureVector) Start2FoldResidueAnnotation(de.bioforscher.start2fold.model.Start2FoldResidueAnnotation) Proline(de.bioforscher.jstructure.model.structure.aminoacid.Proline) IOException(java.io.IOException) LoopFraction(de.bioforscher.jstructure.feature.loopfraction.LoopFraction) Structure(de.bioforscher.jstructure.model.structure.Structure) RawFeatureVector(de.bioforscher.start2fold.model.vector.RawFeatureVector)

Example 19 with AminoAcid

use of de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid in project jstructure by JonStargaryen.

the class StructuralInformationParserServiceTest method shouldParseStructuralInformationFile.

@Test
public void shouldParseStructuralInformationFile() {
    Chain chain = StructureParser.fromPdbId("1bdd").parse().getFirstChain();
    List<AminoAcid> aminoAcids = chain.aminoAcids().collect(Collectors.toList());
    Start2FoldXmlParser.parseSpecificExperiment(chain, TestUtils.getResourceAsInputStream("efr/STF0045.xml"), Stream.of(185).collect(Collectors.toList()));
    List<AminoAcid> earlyFoldingResidues = chain.aminoAcids().filter(aminoAcid -> aminoAcid.getFeature(Start2FoldResidueAnnotation.class).isEarly()).collect(Collectors.toList());
    List<ContactStructuralInformation> contactStructuralInformation = StructuralInformationParserService.getInstance().parseContactStructuralInformationFile(TestUtils.getResourceAsInputStream("si/STF0045.out"), chain, earlyFoldingResidues);
    contactStructuralInformation.stream().map(ContactStructuralInformation::getCsvLine).forEach(System.out::println);
    System.out.println();
    List<ResidueStructuralInformation> residueStructuralInformation = StructuralInformationParserService.getInstance().composeResidueStructuralInformation(aminoAcids, earlyFoldingResidues, contactStructuralInformation);
    residueStructuralInformation.stream().map(ResidueStructuralInformation::getCsvLine).forEach(System.out::println);
}
Also used : StructuralInformationParserService(de.bioforscher.jstructure.efr.parser.StructuralInformationParserService) Start2FoldResidueAnnotation(de.bioforscher.jstructure.efr.model.Start2FoldResidueAnnotation) Test(org.junit.Test) TestUtils(de.bioforscher.testutil.TestUtils) StructureParser(de.bioforscher.jstructure.model.structure.StructureParser) Collectors(java.util.stream.Collectors) Start2FoldXmlParser(de.bioforscher.jstructure.efr.parser.Start2FoldXmlParser) Pair(de.bioforscher.jstructure.mathematics.Pair) IdentifierFactory(de.bioforscher.jstructure.model.identifier.IdentifierFactory) ResidueGraphCalculations(de.bioforscher.jstructure.graph.ResidueGraphCalculations) List(java.util.List) Stream(java.util.stream.Stream) ResidueGraph(de.bioforscher.jstructure.graph.ResidueGraph) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) Chain(de.bioforscher.jstructure.model.structure.Chain) ResidueStructuralInformation(de.bioforscher.jstructure.efr.model.si.ResidueStructuralInformation) StandardFormat(de.bioforscher.jstructure.StandardFormat) ContactStructuralInformation(de.bioforscher.jstructure.efr.model.si.ContactStructuralInformation) Chain(de.bioforscher.jstructure.model.structure.Chain) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) Start2FoldResidueAnnotation(de.bioforscher.jstructure.efr.model.Start2FoldResidueAnnotation) ResidueStructuralInformation(de.bioforscher.jstructure.efr.model.si.ResidueStructuralInformation) ContactStructuralInformation(de.bioforscher.jstructure.efr.model.si.ContactStructuralInformation) Test(org.junit.Test)

Example 20 with AminoAcid

use of de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid in project jstructure by JonStargaryen.

the class EQuantParser method parseEQuantFile.

public static void parseEQuantFile(Chain chain, Path equantPath) {
    try {
        String chainId = chain.getChainIdentifier().getChainId();
        try (Stream<String> lines = Files.lines(equantPath)) {
            // skip header
            lines.filter(line -> !line.startsWith("chain")).filter(line -> line.startsWith(chainId)).forEach(line -> {
                String[] split = line.split("\\s+");
                int residueNumber = Integer.valueOf(split[1]);
                double evaluation = Double.valueOf(split[4]);
                AminoAcid aminoAcid = chain.select().residueNumber(residueNumber).asAminoAcid();
                aminoAcid.getFeatureContainer().addFeature(new EQuantScore(evaluation));
            });
        }
    } catch (IOException e) {
        throw new UncheckedIOException(e);
    }
}
Also used : UncheckedIOException(java.io.UncheckedIOException) Stream(java.util.stream.Stream) Files(java.nio.file.Files) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) EQuantScore(de.bioforscher.jstructure.efr.model.EQuantScore) Chain(de.bioforscher.jstructure.model.structure.Chain) IOException(java.io.IOException) Path(java.nio.file.Path) AminoAcid(de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid) UncheckedIOException(java.io.UncheckedIOException) UncheckedIOException(java.io.UncheckedIOException) IOException(java.io.IOException) EQuantScore(de.bioforscher.jstructure.efr.model.EQuantScore)

Aggregations

AminoAcid (de.bioforscher.jstructure.model.structure.aminoacid.AminoAcid)66 Chain (de.bioforscher.jstructure.model.structure.Chain)40 Collectors (java.util.stream.Collectors)40 IOException (java.io.IOException)36 Files (java.nio.file.Files)35 List (java.util.List)31 StandardFormat (de.bioforscher.jstructure.StandardFormat)26 StructureParser (de.bioforscher.jstructure.model.structure.StructureParser)26 Path (java.nio.file.Path)25 Structure (de.bioforscher.jstructure.model.structure.Structure)23 Pattern (java.util.regex.Pattern)17 Logger (org.slf4j.Logger)16 LoggerFactory (org.slf4j.LoggerFactory)16 Start2FoldResidueAnnotation (de.bioforscher.jstructure.efr.model.Start2FoldResidueAnnotation)15 UncheckedIOException (java.io.UncheckedIOException)14 ArrayList (java.util.ArrayList)14 Stream (java.util.stream.Stream)14 Start2FoldResidueAnnotation (de.bioforscher.start2fold.model.Start2FoldResidueAnnotation)13 Optional (java.util.Optional)13 Pair (de.bioforscher.jstructure.mathematics.Pair)11